Update Loki dependency version and remove unused Thanos configurations

This commit is contained in:
Marco van Zijl 2025-11-09 19:26:18 +01:00
parent 7ad6e392ef
commit 7040788d01
8 changed files with 178 additions and 331 deletions

View File

@ -7,5 +7,5 @@ appVersion: "3.5.7"
dependencies:
- name: loki
version: 6.45.2
version: 6.46.0
repository: https://grafana.github.io/helm-charts

View File

@ -1,4 +0,0 @@
{{- range .Values.extraObjects }}
---
{{ toYaml . }}
{{- end }}

View File

@ -1,15 +1,12 @@
grafana:
# Admin credentials
adminUser: admin
adminPassword: changeme # TODO: Use secret management
# Persistence
# Disable local persistence - using PostgreSQL database
persistence:
enabled: true
storageClassName: ceph-block
size: 10Gi
# Resources
enabled: false
resources:
requests:
cpu: 100m
@ -17,23 +14,26 @@ grafana:
limits:
memory: 512Mi
# Datasources
extraSecretMounts:
- name: db-secret
secretName: grafana-pg-cluster-app
mountPath: /secrets/my-db
readOnly: true
datasources:
datasources.yaml:
apiVersion: 1
datasources:
# Thanos datasource
- name: Thanos
- name: Prometheus
type: prometheus
access: proxy
url: http://thanos-query-frontend.monitoring.svc.cluster.local:9090
url: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
isDefault: true
editable: false
jsonData:
timeInterval: 30s
queryTimeout: 60s
# Loki datasource
- name: Loki
type: loki
access: proxy
@ -42,12 +42,11 @@ grafana:
jsonData:
maxLines: 1000
derivedFields:
- datasourceUid: Thanos
- datasourceUid: Prometheus
matcherRegex: "traceID=(\\w+)"
name: TraceID
url: "$${__value.raw}"
# Dashboard providers
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
@ -68,42 +67,42 @@ grafana:
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes
# Preload dashboards
dashboards:
default:
# Node exporter dashboard
node-exporter:
gnetId: 1860
revision: 37
datasource: Thanos
datasource: Prometheus
# Kubernetes cluster monitoring
k8s-cluster:
gnetId: 7249
revision: 1
datasource: Thanos
datasource: Prometheus
kubernetes:
# Kubernetes pods
k8s-pods:
gnetId: 6417
revision: 1
datasource: Thanos
datasource: Prometheus
# Loki logs dashboard
loki-logs:
gnetId: 13639
revision: 2
datasource: Loki
# Grafana config
grafana.ini:
server:
root_url: https://grafana.noxxos.nl
serve_from_sub_path: false
# Authentication - Authentik OIDC
database:
type: postgres
host: "$__file{/secrets/my-db/host}:$__file{/secrets/my-db/port}"
name: "$__file{/secrets/my-db/dbname}"
user: "$__file{/secrets/my-db/user}"
password: "$__file{/secrets/my-db/password}"
auth.generic_oauth:
enabled: false # Enable after configuring secret
name: Authentik
@ -127,51 +126,98 @@ grafana:
users:
auto_assign_org: true
auto_assign_org_role: Viewer
# Service Monitor
serviceMonitor:
enabled: true
# Plugins
enabled: false
plugins:
- grafana-piechart-panel
- grafana-clock-panel
# Gateway API HTTPRoute
extraObjects:
# ReferenceGrant
- apiVersion: gateway.networking.k8s.io/v1beta1
kind: ReferenceGrant
metadata:
name: traefik-gateway-access
namespace: monitoring
spec:
from:
- group: gateway.networking.k8s.io
kind: HTTPRoute
namespace: monitoring
to:
- group: ""
kind: Service
# Grafana HTTPRoute
- apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: grafana
namespace: monitoring
spec:
route:
main:
enabled: true
hostnames:
- grafana.noxxos.nl
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
hostnames:
- "grafana.noxxos.nl"
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: grafana
port: 80
extraObjects:
- apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: grafana-pg-cluster
namespace: monitoring
spec:
instances: 2
postgresql:
parameters:
max_connections: "20"
shared_buffers: "25MB"
effective_cache_size: "75MB"
maintenance_work_mem: "6400kB"
checkpoint_completion_target: "0.9"
wal_buffers: "768kB"
default_statistics_target: "100"
random_page_cost: "1.1"
effective_io_concurrency: "300"
work_mem: "640kB"
huge_pages: "off"
max_wal_size: "128MB"
bootstrap:
initdb:
database: grafana
owner: grafana
storage:
size: 1Gi
storageClass: ceph-block
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
memory: 512Mi
backup:
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
retentionPolicy: "30d"
barmanObjectStore:
destinationPath: s3://postgresql-backups/grafana
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: grafana-pg-backup-creds
key: AWS_ACCESS_KEY_ID
secretAccessKey:
name: grafana-pg-backup-creds
key: AWS_SECRET_ACCESS_KEY
wal:
compression: bzip2
data:
compression: bzip2
scheduledBackups:
- name: daily-backup
schedule: "0 2 * * *" # 2 AM daily
backupOwnerReference: self
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: grafana-pg-backups
namespace: monitoring
spec:
bucketName: postgresql-backups
storageClassName: ceph-bucket
additionalConfig:
maxSize: "50Gi"
- apiVersion: v1
kind: Secret
metadata:
name: grafana-pg-backup-creds
namespace: monitoring
type: Opaque
stringData:
AWS_ACCESS_KEY_ID: placeholder
AWS_SECRET_ACCESS_KEY: placeholder

View File

@ -1,5 +1,26 @@
kube-prometheus-stack:
# Prometheus Operator
crds:
enabled: true
defaultRules:
create: false
alertmanager:
enabled: false
grafana:
enabled: false
kubeProxy:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
prometheusOperator:
enabled: true
resources:
@ -8,25 +29,56 @@ kube-prometheus-stack:
memory: 128Mi
limits:
memory: 256Mi
# Prometheus configuration
networkPolicy:
enabled: true
flavor: Cilium
prometheus:
enabled: true
networkPolicy:
enabled: true
flavor: Cilium
cilium: {}
# Disable Thanos integration
thanosService:
enabled: false
thanosServiceMonitor:
enabled: false
thanosServiceExternal:
enabled: false
thanosIngress:
enabled: false
route:
main:
enabled: true
hostnames:
- prometheus.noxxos.nl
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
serviceMonitor:
selfMonitor: false
prometheusSpec:
# Retention
retention: 24h
retentionSize: 15GB
# Enable compaction (was disabled for Thanos)
disableCompaction: false
scrapeInterval: 30s
# Resources
# 3 months retention (~90 days)
retention: 90d
retentionSize: 100GB
replicas: 1
resources:
requests:
cpu: 200m
memory: 1Gi
cpu: 100m
memory: 400Mi
limits:
memory: 2Gi
# Storage
# Increased storage for 3 month retention
storageSpec:
volumeClaimTemplate:
spec:
@ -34,26 +86,10 @@ kube-prometheus-stack:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
# Thanos sidecar configuration
thanos:
image: quay.io/thanos/thanos:v0.37.2
version: v0.37.2
objectStorageConfig:
name: thanos-objstore-secret
key: objstore.yml
# External labels for Thanos
externalLabels:
cluster: homelab
prometheus: monitoring/prometheus
# Replicas
replicas: 1
replicaExternalLabelName: prometheus_replica
storage: 150Gi
# Service monitors
scrapeConfigSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
ruleSelectorNilUsesHelmValues: false
@ -61,30 +97,6 @@ kube-prometheus-stack:
# Additional scrape configs
additionalScrapeConfigs: []
# Alertmanager
alertmanager:
enabled: true
alertmanagerSpec:
replicas: 1
storage:
volumeClaimTemplate:
spec:
storageClassName: ceph-block
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
# Grafana (disabled - using separate Grafana deployment)
grafana:
enabled: false
# Node Exporter
nodeExporter:
enabled: true
@ -104,35 +116,3 @@ kube-prometheus-stack:
memory: 128Mi
limits:
memory: 256Mi
# Default rules
defaultRules:
create: true
rules:
alertmanager: true
etcd: false
configReloaders: true
general: true
k8s: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubeControllerManager: true
kubelet: true
kubeProxy: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubernetesSystem: true
kubeSchedulerAlerting: true
kubeSchedulerRecording: true
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: thanos
description: Thanos distributed metrics wrapper chart
type: application
version: 1.0.0
appVersion: "0.40.1"
dependencies:
- name: thanos
version: 1.22.0
repository: oci://ghcr.io/stevehipwell/helm-charts

View File

@ -1,30 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: thanos
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/monitoring/thanos
helm:
releaseName: thanos
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- SkipDryRunOnMissingResource=true

View File

@ -1,4 +0,0 @@
{{- range .Values.extraObjects }}
---
{{ toYaml . }}
{{- end }}

View File

@ -1,130 +0,0 @@
thanos:
# Object storage configuration
objstoreConfig:
create: false # We create the secret via extraObjects
name: thanos-objstore-secret
key: objstore.yml
# Image configuration
image:
registry: quay.io
repository: thanos/thanos
tag: v0.40.1
# Query component
query:
enabled: true
replicaCount: 2
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 512Mi
stores:
- dnssrv+_grpc._tcp.thanos-storegateway.monitoring.svc.cluster.local
- dnssrv+_grpc._tcp.thanos-receive.monitoring.svc.cluster.local
# Query Frontend
queryFrontend:
enabled: true
replicaCount: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
# Store Gateway
storegateway:
enabled: true
replicaCount: 1
persistence:
enabled: true
storageClass: ceph-block
size: 10Gi
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 1Gi
# Compactor
compactor:
enabled: true
persistence:
enabled: true
storageClass: ceph-block
size: 10Gi
retentionResolutionRaw: 14d
retentionResolution5m: 90d
retentionResolution1h: 2y
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 1Gi
extraFlags:
- --deduplication.replica-label=prometheus_replica
- --deduplication.replica-label=replica
- --downsampling.disable=false
- --compact.enable-vertical-compaction
# Receive (for remote write from Prometheus)
receive:
enabled: true
replicaCount: 1
persistence:
enabled: true
storageClass: ceph-block
size: 20Gi
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 1Gi
# Metrics and caching
# Note: Memcached configuration would be added here if using external caching
# Metrics
metrics:
enabled: true
serviceMonitor:
enabled: true
# S3 Bucket and credentials provisioning
extraObjects:
# ObjectBucketClaim for Thanos metrics
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: thanos-metrics
namespace: monitoring
spec:
bucketName: thanos-metrics
storageClassName: ceph-bucket
additionalConfig:
maxSize: "500Gi"
# Secret with S3 credentials (will be populated by Rook)
# This is a placeholder - actual credentials come from the OBC
- apiVersion: v1
kind: Secret
metadata:
name: thanos-objstore-secret
namespace: monitoring
type: Opaque
stringData:
objstore.yml: |-
type: S3
config:
bucket: thanos-metrics
endpoint: rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
insecure: true
access_key: ${AWS_ACCESS_KEY_ID}
secret_key: ${AWS_SECRET_ACCESS_KEY}