Update Loki dependency version and remove unused Thanos configurations
This commit is contained in:
parent
7ad6e392ef
commit
7040788d01
@ -7,5 +7,5 @@ appVersion: "3.5.7"
|
||||
|
||||
dependencies:
|
||||
- name: loki
|
||||
version: 6.45.2
|
||||
version: 6.46.0
|
||||
repository: https://grafana.github.io/helm-charts
|
||||
|
||||
@ -1,4 +0,0 @@
|
||||
{{- range .Values.extraObjects }}
|
||||
---
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
@ -1,15 +1,12 @@
|
||||
grafana:
|
||||
# Admin credentials
|
||||
|
||||
adminUser: admin
|
||||
adminPassword: changeme # TODO: Use secret management
|
||||
|
||||
# Persistence
|
||||
|
||||
# Disable local persistence - using PostgreSQL database
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClassName: ceph-block
|
||||
size: 10Gi
|
||||
|
||||
# Resources
|
||||
enabled: false
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
@ -17,23 +14,26 @@ grafana:
|
||||
limits:
|
||||
memory: 512Mi
|
||||
|
||||
# Datasources
|
||||
extraSecretMounts:
|
||||
- name: db-secret
|
||||
secretName: grafana-pg-cluster-app
|
||||
mountPath: /secrets/my-db
|
||||
readOnly: true
|
||||
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
# Thanos datasource
|
||||
- name: Thanos
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://thanos-query-frontend.monitoring.svc.cluster.local:9090
|
||||
url: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: 30s
|
||||
queryTimeout: 60s
|
||||
|
||||
# Loki datasource
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
@ -42,12 +42,11 @@ grafana:
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
derivedFields:
|
||||
- datasourceUid: Thanos
|
||||
- datasourceUid: Prometheus
|
||||
matcherRegex: "traceID=(\\w+)"
|
||||
name: TraceID
|
||||
url: "$${__value.raw}"
|
||||
|
||||
# Dashboard providers
|
||||
|
||||
dashboardProviders:
|
||||
dashboardproviders.yaml:
|
||||
apiVersion: 1
|
||||
@ -68,42 +67,42 @@ grafana:
|
||||
editable: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/kubernetes
|
||||
|
||||
# Preload dashboards
|
||||
|
||||
dashboards:
|
||||
default:
|
||||
# Node exporter dashboard
|
||||
node-exporter:
|
||||
gnetId: 1860
|
||||
revision: 37
|
||||
datasource: Thanos
|
||||
datasource: Prometheus
|
||||
|
||||
# Kubernetes cluster monitoring
|
||||
k8s-cluster:
|
||||
gnetId: 7249
|
||||
revision: 1
|
||||
datasource: Thanos
|
||||
datasource: Prometheus
|
||||
|
||||
kubernetes:
|
||||
# Kubernetes pods
|
||||
k8s-pods:
|
||||
gnetId: 6417
|
||||
revision: 1
|
||||
datasource: Thanos
|
||||
datasource: Prometheus
|
||||
|
||||
# Loki logs dashboard
|
||||
loki-logs:
|
||||
gnetId: 13639
|
||||
revision: 2
|
||||
datasource: Loki
|
||||
|
||||
# Grafana config
|
||||
|
||||
grafana.ini:
|
||||
server:
|
||||
root_url: https://grafana.noxxos.nl
|
||||
serve_from_sub_path: false
|
||||
|
||||
# Authentication - Authentik OIDC
|
||||
database:
|
||||
type: postgres
|
||||
host: "$__file{/secrets/my-db/host}:$__file{/secrets/my-db/port}"
|
||||
name: "$__file{/secrets/my-db/dbname}"
|
||||
user: "$__file{/secrets/my-db/user}"
|
||||
password: "$__file{/secrets/my-db/password}"
|
||||
|
||||
auth.generic_oauth:
|
||||
enabled: false # Enable after configuring secret
|
||||
name: Authentik
|
||||
@ -127,51 +126,98 @@ grafana:
|
||||
users:
|
||||
auto_assign_org: true
|
||||
auto_assign_org_role: Viewer
|
||||
|
||||
# Service Monitor
|
||||
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
# Plugins
|
||||
enabled: false
|
||||
|
||||
plugins:
|
||||
- grafana-piechart-panel
|
||||
- grafana-clock-panel
|
||||
|
||||
# Gateway API HTTPRoute
|
||||
extraObjects:
|
||||
# ReferenceGrant
|
||||
- apiVersion: gateway.networking.k8s.io/v1beta1
|
||||
kind: ReferenceGrant
|
||||
metadata:
|
||||
name: traefik-gateway-access
|
||||
namespace: monitoring
|
||||
spec:
|
||||
from:
|
||||
- group: gateway.networking.k8s.io
|
||||
kind: HTTPRoute
|
||||
namespace: monitoring
|
||||
to:
|
||||
- group: ""
|
||||
kind: Service
|
||||
|
||||
# Grafana HTTPRoute
|
||||
- apiVersion: gateway.networking.k8s.io/v1
|
||||
kind: HTTPRoute
|
||||
metadata:
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
route:
|
||||
main:
|
||||
enabled: true
|
||||
hostnames:
|
||||
- grafana.noxxos.nl
|
||||
parentRefs:
|
||||
- name: traefik-gateway
|
||||
namespace: traefik
|
||||
sectionName: websecure
|
||||
hostnames:
|
||||
- "grafana.noxxos.nl"
|
||||
rules:
|
||||
- matches:
|
||||
- path:
|
||||
type: PathPrefix
|
||||
value: /
|
||||
backendRefs:
|
||||
- name: grafana
|
||||
port: 80
|
||||
|
||||
extraObjects:
|
||||
- apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: grafana-pg-cluster
|
||||
namespace: monitoring
|
||||
spec:
|
||||
instances: 2
|
||||
postgresql:
|
||||
parameters:
|
||||
max_connections: "20"
|
||||
shared_buffers: "25MB"
|
||||
effective_cache_size: "75MB"
|
||||
maintenance_work_mem: "6400kB"
|
||||
checkpoint_completion_target: "0.9"
|
||||
wal_buffers: "768kB"
|
||||
default_statistics_target: "100"
|
||||
random_page_cost: "1.1"
|
||||
effective_io_concurrency: "300"
|
||||
work_mem: "640kB"
|
||||
huge_pages: "off"
|
||||
max_wal_size: "128MB"
|
||||
bootstrap:
|
||||
initdb:
|
||||
database: grafana
|
||||
owner: grafana
|
||||
storage:
|
||||
size: 1Gi
|
||||
storageClass: ceph-block
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
limits:
|
||||
memory: 512Mi
|
||||
backup:
|
||||
method: plugin
|
||||
pluginConfiguration:
|
||||
name: barman-cloud.cloudnative-pg.io
|
||||
retentionPolicy: "30d"
|
||||
barmanObjectStore:
|
||||
destinationPath: s3://postgresql-backups/grafana
|
||||
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
|
||||
s3Credentials:
|
||||
accessKeyId:
|
||||
name: grafana-pg-backup-creds
|
||||
key: AWS_ACCESS_KEY_ID
|
||||
secretAccessKey:
|
||||
name: grafana-pg-backup-creds
|
||||
key: AWS_SECRET_ACCESS_KEY
|
||||
wal:
|
||||
compression: bzip2
|
||||
data:
|
||||
compression: bzip2
|
||||
scheduledBackups:
|
||||
- name: daily-backup
|
||||
schedule: "0 2 * * *" # 2 AM daily
|
||||
backupOwnerReference: self
|
||||
- apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: grafana-pg-backups
|
||||
namespace: monitoring
|
||||
spec:
|
||||
bucketName: postgresql-backups
|
||||
storageClassName: ceph-bucket
|
||||
additionalConfig:
|
||||
maxSize: "50Gi"
|
||||
- apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: grafana-pg-backup-creds
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
stringData:
|
||||
AWS_ACCESS_KEY_ID: placeholder
|
||||
AWS_SECRET_ACCESS_KEY: placeholder
|
||||
@ -1,5 +1,26 @@
|
||||
kube-prometheus-stack:
|
||||
# Prometheus Operator
|
||||
|
||||
crds:
|
||||
enabled: true
|
||||
|
||||
defaultRules:
|
||||
create: false
|
||||
|
||||
alertmanager:
|
||||
enabled: false
|
||||
|
||||
grafana:
|
||||
enabled: false
|
||||
|
||||
kubeProxy:
|
||||
enabled: false
|
||||
|
||||
kubeControllerManager:
|
||||
enabled: false
|
||||
|
||||
kubeEtcd:
|
||||
enabled: false
|
||||
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
resources:
|
||||
@ -8,25 +29,56 @@ kube-prometheus-stack:
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
|
||||
# Prometheus configuration
|
||||
networkPolicy:
|
||||
enabled: true
|
||||
flavor: Cilium
|
||||
|
||||
prometheus:
|
||||
enabled: true
|
||||
networkPolicy:
|
||||
enabled: true
|
||||
flavor: Cilium
|
||||
cilium: {}
|
||||
|
||||
# Disable Thanos integration
|
||||
thanosService:
|
||||
enabled: false
|
||||
thanosServiceMonitor:
|
||||
enabled: false
|
||||
thanosServiceExternal:
|
||||
enabled: false
|
||||
thanosIngress:
|
||||
enabled: false
|
||||
|
||||
route:
|
||||
main:
|
||||
enabled: true
|
||||
hostnames:
|
||||
- prometheus.noxxos.nl
|
||||
parentRefs:
|
||||
- name: traefik-gateway
|
||||
namespace: traefik
|
||||
sectionName: websecure
|
||||
serviceMonitor:
|
||||
selfMonitor: false
|
||||
prometheusSpec:
|
||||
# Retention
|
||||
retention: 24h
|
||||
retentionSize: 15GB
|
||||
# Enable compaction (was disabled for Thanos)
|
||||
disableCompaction: false
|
||||
scrapeInterval: 30s
|
||||
|
||||
# Resources
|
||||
# 3 months retention (~90 days)
|
||||
retention: 90d
|
||||
retentionSize: 100GB
|
||||
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 1Gi
|
||||
cpu: 100m
|
||||
memory: 400Mi
|
||||
limits:
|
||||
memory: 2Gi
|
||||
|
||||
# Storage
|
||||
# Increased storage for 3 month retention
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
@ -34,26 +86,10 @@ kube-prometheus-stack:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
|
||||
# Thanos sidecar configuration
|
||||
thanos:
|
||||
image: quay.io/thanos/thanos:v0.37.2
|
||||
version: v0.37.2
|
||||
objectStorageConfig:
|
||||
name: thanos-objstore-secret
|
||||
key: objstore.yml
|
||||
|
||||
# External labels for Thanos
|
||||
externalLabels:
|
||||
cluster: homelab
|
||||
prometheus: monitoring/prometheus
|
||||
|
||||
# Replicas
|
||||
replicas: 1
|
||||
replicaExternalLabelName: prometheus_replica
|
||||
storage: 150Gi
|
||||
|
||||
# Service monitors
|
||||
scrapeConfigSelectorNilUsesHelmValues: false
|
||||
serviceMonitorSelectorNilUsesHelmValues: false
|
||||
podMonitorSelectorNilUsesHelmValues: false
|
||||
ruleSelectorNilUsesHelmValues: false
|
||||
@ -61,30 +97,6 @@ kube-prometheus-stack:
|
||||
# Additional scrape configs
|
||||
additionalScrapeConfigs: []
|
||||
|
||||
# Alertmanager
|
||||
alertmanager:
|
||||
enabled: true
|
||||
alertmanagerSpec:
|
||||
replicas: 1
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: ceph-block
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
|
||||
# Grafana (disabled - using separate Grafana deployment)
|
||||
grafana:
|
||||
enabled: false
|
||||
|
||||
# Node Exporter
|
||||
nodeExporter:
|
||||
enabled: true
|
||||
@ -104,35 +116,3 @@ kube-prometheus-stack:
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
|
||||
# Default rules
|
||||
defaultRules:
|
||||
create: true
|
||||
rules:
|
||||
alertmanager: true
|
||||
etcd: false
|
||||
configReloaders: true
|
||||
general: true
|
||||
k8s: true
|
||||
kubeApiserverAvailability: true
|
||||
kubeApiserverBurnrate: true
|
||||
kubeApiserverHistogram: true
|
||||
kubeApiserverSlos: true
|
||||
kubeControllerManager: true
|
||||
kubelet: true
|
||||
kubeProxy: true
|
||||
kubePrometheusGeneral: true
|
||||
kubePrometheusNodeRecording: true
|
||||
kubernetesApps: true
|
||||
kubernetesResources: true
|
||||
kubernetesStorage: true
|
||||
kubernetesSystem: true
|
||||
kubeSchedulerAlerting: true
|
||||
kubeSchedulerRecording: true
|
||||
kubeStateMetrics: true
|
||||
network: true
|
||||
node: true
|
||||
nodeExporterAlerting: true
|
||||
nodeExporterRecording: true
|
||||
prometheus: true
|
||||
prometheusOperator: true
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
apiVersion: v2
|
||||
name: thanos
|
||||
description: Thanos distributed metrics wrapper chart
|
||||
type: application
|
||||
version: 1.0.0
|
||||
appVersion: "0.40.1"
|
||||
|
||||
dependencies:
|
||||
- name: thanos
|
||||
version: 1.22.0
|
||||
repository: oci://ghcr.io/stevehipwell/helm-charts
|
||||
@ -1,30 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: thanos
|
||||
namespace: argocd
|
||||
annotations:
|
||||
argocd.argoproj.io/sync-wave: "1"
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://git.mvzijl.nl/marco/veda.git
|
||||
targetRevision: applicationset-rewrite
|
||||
path: apps/monitoring/thanos
|
||||
helm:
|
||||
releaseName: thanos
|
||||
valueFiles:
|
||||
- values.yaml
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true
|
||||
- SkipDryRunOnMissingResource=true
|
||||
@ -1,4 +0,0 @@
|
||||
{{- range .Values.extraObjects }}
|
||||
---
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
@ -1,130 +0,0 @@
|
||||
thanos:
|
||||
# Object storage configuration
|
||||
objstoreConfig:
|
||||
create: false # We create the secret via extraObjects
|
||||
name: thanos-objstore-secret
|
||||
key: objstore.yml
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
registry: quay.io
|
||||
repository: thanos/thanos
|
||||
tag: v0.40.1
|
||||
|
||||
# Query component
|
||||
query:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
memory: 512Mi
|
||||
stores:
|
||||
- dnssrv+_grpc._tcp.thanos-storegateway.monitoring.svc.cluster.local
|
||||
- dnssrv+_grpc._tcp.thanos-receive.monitoring.svc.cluster.local
|
||||
|
||||
# Query Frontend
|
||||
queryFrontend:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
|
||||
# Store Gateway
|
||||
storegateway:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: ceph-block
|
||||
size: 10Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 1Gi
|
||||
|
||||
# Compactor
|
||||
compactor:
|
||||
enabled: true
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: ceph-block
|
||||
size: 10Gi
|
||||
retentionResolutionRaw: 14d
|
||||
retentionResolution5m: 90d
|
||||
retentionResolution1h: 2y
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 1Gi
|
||||
extraFlags:
|
||||
- --deduplication.replica-label=prometheus_replica
|
||||
- --deduplication.replica-label=replica
|
||||
- --downsampling.disable=false
|
||||
- --compact.enable-vertical-compaction
|
||||
|
||||
# Receive (for remote write from Prometheus)
|
||||
receive:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: ceph-block
|
||||
size: 20Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 1Gi
|
||||
|
||||
# Metrics and caching
|
||||
# Note: Memcached configuration would be added here if using external caching
|
||||
|
||||
# Metrics
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
# S3 Bucket and credentials provisioning
|
||||
extraObjects:
|
||||
# ObjectBucketClaim for Thanos metrics
|
||||
- apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: thanos-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
bucketName: thanos-metrics
|
||||
storageClassName: ceph-bucket
|
||||
additionalConfig:
|
||||
maxSize: "500Gi"
|
||||
|
||||
# Secret with S3 credentials (will be populated by Rook)
|
||||
# This is a placeholder - actual credentials come from the OBC
|
||||
- apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: thanos-objstore-secret
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
stringData:
|
||||
objstore.yml: |-
|
||||
type: S3
|
||||
config:
|
||||
bucket: thanos-metrics
|
||||
endpoint: rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
|
||||
insecure: true
|
||||
access_key: ${AWS_ACCESS_KEY_ID}
|
||||
secret_key: ${AWS_SECRET_ACCESS_KEY}
|
||||
Loading…
Reference in New Issue
Block a user