Compare commits

..

No commits in common. "applicationset-rewrite" and "main" have entirely different histories.

96 changed files with 28811 additions and 4878 deletions

View File

@ -1,25 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: root
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: main
path: appset
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: false
selfHeal: true
syncOptions:
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
- PruneLast=true
- PrunePropagationPolicy=foreground

View File

@ -0,0 +1,63 @@
apiVersion: v1
kind: Namespace
metadata:
name: app-test-1
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: app-test-1-deploy
namespace: app-test-1
spec:
replicas: 1
selector:
matchLabels:
app: app-test-1
template:
metadata:
labels:
app: app-test-1
spec:
containers:
- name: app-test-1
image: nginx
ports:
- name: web
containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: app-test-1-svc
namespace: app-test-1
labels:
app: app-test-1
spec:
type: LoadBalancer
ports:
- name: http
port: 80
targetPort: 80
protocol: TCP
selector:
app: app-test-1
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-test-1-ingress
namespace: app-test-1
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
rules:
- host: test.noxxos.nl
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: app-test-1-svc
port:
name: http

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: argocd
version: 1.0.0
dependencies:
- name: argo-cd
version: 9.1.0
repository: https://argoproj.github.io/argo-helm

View File

@ -1,41 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: argocd
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-1" # Sync before other apps
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/argocd
helm:
releaseName: argocd
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: false # Be careful with pruning ArgoCD itself
selfHeal: true # Auto-fix configuration drift
syncOptions:
- CreateNamespace=true
- PruneLast=true
- PrunePropagationPolicy=foreground
ignoreDifferences:
# Ignore certain fields that change frequently
- group: apps
kind: Deployment
jsonPointers:
- /spec/replicas # If using HPA
- group: ""
kind: Secret
name: argocd-initial-admin-secret
jsonPointers:
- /data # Don't sync the initial password secret

View File

@ -1,27 +0,0 @@
argo-cd:
global:
domain: argocd.noxxos.nl
logging:
format: json
level: info
configs:
params:
server.insecure: true
server:
ingress:
enabled: false
httproute:
enabled: true
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
hostnames:
- argocd.noxxos.nl
rules:
- matches:
- path:
type: PathPrefix
value: /

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: authentik
version: 0.0.0
dependencies:
- name: authentik
version: 2025.10.0
repository: https://charts.goauthentik.io

View File

@ -1,53 +0,0 @@
authentik:
global:
priorityClassName: homelab-critical
authentik:
secret_key: bGd7nChCpPQmypR64rgF
postgresql:
host: file:///postgres-creds/host
name: file:///postgres-creds/dbname
user: file:///postgres-creds/username
password: file:///postgres-creds/password
server:
metrics:
enabled: true
serviceMonitor:
enabled: true
ingress:
enabled: true
hosts:
- authentik.noxxos.nl
- auth.noxxos.nl
- sso.noxxos.nl
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
volumes:
- name: postgres-creds
secret:
secretName: pg-authentik-cluster-app
volumeMounts:
- name: postgres-creds
mountPath: /postgres-creds
readOnly: true
worker:
volumes:
- name: postgres-creds
secret:
secretName: pg-authentik-cluster-app
volumeMounts:
- name: postgres-creds
mountPath: /postgres-creds
readOnly: true
postgresql:
enabled: false
redis:
enabled: true
image:
repository: redis
tag: 8.2
master:
persistence:
enabled: false
prometheus:
rules:
enabled: true

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: authentik
version: 1.0.0
dependencies:
- name: authentik
version: 2025.10.1
repository: https://charts.goauthentik.io

View File

@ -1,40 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: authentik
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/authentik
helm:
releaseName: authentik
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: authentik
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
managedNamespaceMetadata:
labels:
pod-security.kubernetes.io/enforce: baseline
pod-security.kubernetes.io/audit: baseline
pod-security.kubernetes.io/warn: baseline
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jqPathExpressions:
- .spec.parentRefs[] | .group, .kind
- .spec.rules[].backendRefs[] | .group, .kind, .weight

View File

@ -1,148 +0,0 @@
authentik:
authentik:
secret_key: "wgAt4swhmThtdOGZAqWHoXb1fLpcBeZvLy5X4RY6z6oAbvZCNLEzeH+ovbvG3mg2vy+zMLv0fpJxIhMy"
postgresql:
host: file:///postgres-creds/host
name: file:///postgres-creds/dbname
username: file:///postgres-creds/username
password: file:///postgres-creds/password
port: file:///postgres-creds/port
server:
route:
main:
enabled: true
hostnames:
- "auth.noxxos.nl"
- "authentik.noxxos.nl"
- "sso.noxxos.nl"
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 1Gi
volumes:
- name: postgres-creds
secret:
secretName: authentik-pg-cluster-app
volumeMounts:
- name: postgres-creds
mountPath: /postgres-creds
readOnly: true
worker:
replicas: 2
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 1Gi
volumes:
- name: postgres-creds
secret:
secretName: authentik-pg-cluster-app
volumeMounts:
- name: postgres-creds
mountPath: /postgres-creds
readOnly: true
blueprints:
configMaps:
- authentik-grafana-blueprint
additionalObjects:
- apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: authentik-pg-cluster
namespace: authentik
spec:
instances: 2
postgresql:
parameters:
max_connections: "200"
shared_buffers: "25MB"
effective_cache_size: "75MB"
maintenance_work_mem: "6400kB"
checkpoint_completion_target: "0.9"
wal_buffers: "768kB"
default_statistics_target: "100"
random_page_cost: "1.1"
effective_io_concurrency: "300"
work_mem: "640kB"
huge_pages: "off"
max_wal_size: "128MB"
bootstrap:
initdb:
database: authentik
owner: authentik
storage:
storageClass: local-path
size: 10Gi
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
memory: 512Mi
plugins:
- enabled: true
name: barman-cloud.cloudnative-pg.io
isWALArchiver: true
parameters:
barmanObjectName: authentik-backup-store
- apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: authentik-backup-store
namespace: authentik
spec:
retentionPolicy: "30d"
configuration:
destinationPath: s3://postgresql-backups/authentik
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: authentik-pg-backups
key: AWS_ACCESS_KEY_ID
secretAccessKey:
name: authentik-pg-backups
key: AWS_SECRET_ACCESS_KEY
wal:
compression: bzip2
data:
compression: bzip2
- apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: authentik-pg-backup
namespace: authentik
spec:
method: plugin
immediate: true
schedule: "0 30 3 * * *" # 03:30 daily
backupOwnerReference: self
cluster:
name: authentik-pg-cluster
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: authentik-pg-backups
namespace: authentik
spec:
bucketName: postgresql-backups
storageClassName: ceph-bucket
additionalConfig:
maxSize: "50Gi"

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: rook-ceph-cluster
version: 1.0.0
dependencies:
- name: rook-ceph-cluster
version: v1.18.6
repository: https://charts.rook.io/release

View File

@ -1,38 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: rook-ceph-cluster
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-3"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/ceph/cluster
helm:
releaseName: rook-ceph-cluster
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: rook-ceph
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jsonPointers:
- /spec/parentRefs/0/group
- /spec/parentRefs/0/kind
- /spec/rules/0/backendRefs/0/group
- /spec/rules/0/backendRefs/0/kind
- /spec/rules/0/backendRefs/0/weight
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true

View File

@ -1,206 +0,0 @@
rook-ceph-cluster:
operatorNamespace: rook-ceph
toolbox:
enabled: true
resources:
requests:
cpu: 100m
memory: 128Mi
monitoring:
enabled: true
ingress:
dashboard: {}
route:
dashboard:
enabled: true
host:
name: ceph.noxxos.nl
path: "/"
pathType: PathPrefix
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
cephClusterSpec:
crashCollector:
disable: true
mgr:
modules:
- name: pg_autoscaler
enabled: true
- name: devicehealth
enabled: true
- name: diskprediction_local
enabled: true
- name: rook
enabled: true
dashboard:
enabled: true
ssl: false
port: 7000
resources:
mgr:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "256Mi"
mon:
limits:
memory: "2Gi"
requests:
cpu: "200m"
memory: "1Gi"
osd:
limits:
memory: "4Gi"
requests:
cpu: "100m"
memory: "2Gi"
prepareosd:
requests:
cpu: "100m"
memory: "50Mi"
mgr-sidecar:
limits:
memory: "100Mi"
requests:
cpu: "100m"
memory: "40Mi"
crashcollector:
limits:
memory: "60Mi"
requests:
cpu: "100m"
memory: "60Mi"
logcollector:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "100Mi"
cleanup:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "100Mi"
cephBlockPools:
- name: ceph-blockpool
spec:
replicated:
size: 2
failureDomain: host
deviceClass: hdd
parameters:
min_size: "1"
storageClass:
name: ceph-block
enabled: true
isDefault: true
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
parameters:
imageFeatures: "layering,exclusive-lock,object-map,fast-diff"
csi.storage.k8s.io/fstype: ext4
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
cephFileSystems:
- name: ceph-filesystem
spec:
metadataPool:
failureDomain: host
replicated:
size: 2
deviceClass: hdd
parameters:
min_size: "1"
dataPools:
- failureDomain: host
replicated:
size: 2
deviceClass: hdd
parameters:
min_size: "1"
name: data0
preserveFilesystemOnDelete: true
metadataServer:
activeCount: 1
activeStandby: true
resources:
requests:
cpu: "250m"
memory: "1Gi"
priorityClassName: system-cluster-critical
storageClass:
name: ceph-filesystem
enabled: true
isDefault: false
pool: data0
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
parameters:
csi.storage.k8s.io/fstype: ext4
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
cephObjectStores:
- name: ceph-objectstore
spec:
metadataPool:
failureDomain: host
replicated:
size: 2
deviceClass: hdd
parameters:
min_size: "1"
dataPool:
failureDomain: host
replicated:
size: 2
deviceClass: hdd
parameters:
min_size: "1"
preservePoolsOnDelete: true
gateway:
port: 80
instances: 1
resources:
requests:
cpu: "200m"
memory: "512Mi"
storageClass:
name: ceph-bucket
enabled: true
reclaimPolicy: Delete
volumeBindingMode: "Immediate"
route:
enabled: true
host:
name: s3.noxxos.nl
path: "/"
pathType: PathPrefix
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: rook-ceph-operator
version: 1.0.0
dependencies:
- name: rook-ceph
version: v1.18.6
repository: https://charts.rook.io/release

View File

@ -1,34 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: rook-ceph-operator
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-4"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/ceph/operator
helm:
releaseName: rook-ceph-operator
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: rook-ceph
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
managedNamespaceMetadata:
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged

View File

@ -1,24 +0,0 @@
rook-ceph:
crds:
enabled: true
monitoring:
enabled: true
priorityClassName: system-cluster-critical
csi:
enableMetadata: true
serviceMonitor:
enabled: true
enableDiscoveryDaemon: true
discoveryDaemonInterval: 6h
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: cert-manager
version: 1.0.0
dependencies:
- name: cert-manager
version: v1.19.1
repository: oci://quay.io/jetstack/charts

View File

@ -1,31 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cert-manager
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-5"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/cert-manager
helm:
releaseName: cert-manager
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: cert-manager
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- PruneLast=true
- PrunePropagationPolicy=foreground
- ServerSideApply=true

View File

@ -1,93 +0,0 @@
cert-manager:
installCRDs: true
global:
leaderElection:
namespace: cert-manager
prometheus:
enabled: true
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
cpu: 100m
memory: 128Mi
webhook:
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
cpu: 100m
memory: 128Mi
cainjector:
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
cpu: 100m
memory: 128Mi
extraObjects:
- |
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: selfsigned-issuer
spec:
selfSigned: {}
# CA Certificate (acts as root CA)
- |
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: selfsigned-ca
namespace: cert-manager
spec:
isCA: true
commonName: noxxos.nl
secretName: selfsigned-ca-secret
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: selfsigned-issuer
kind: ClusterIssuer
# CA ClusterIssuer (uses the CA cert above)
- |
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: ca-issuer
spec:
ca:
secretName: selfsigned-ca-secret
# Wildcard certificate for *.noxxos.nl
- |
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: wildcard-noxxos-nl
namespace: traefik
spec:
secretName: wildcard-noxxos-nl-tls
issuerRef:
name: ca-issuer
kind: ClusterIssuer
dnsNames:
- "*.noxxos.nl"
- "noxxos.nl"
duration: 2160h # 90 days
renewBefore: 360h # 15 days

View File

@ -1,132 +0,0 @@
# Mirroring CloudNativePG Barman Plugin
## Setup Mirror Repository
1. **Clone the upstream repository:**
```bash
cd /tmp
git clone --mirror https://github.com/cloudnative-pg/plugin-barman-cloud.git
cd plugin-barman-cloud.git
```
2. **Push to your Git server:**
```bash
# Create repo on your Git server first (git.mvzijl.nl)
# Then push:
git push --mirror https://git.mvzijl.nl/marco/plugin-barman-cloud.git
```
3. **Set up periodic sync (optional):**
```bash
# Create a script to sync weekly
cat > /usr/local/bin/sync-barman-plugin.sh <<'EOF'
#!/bin/bash
cd /var/git/mirrors/plugin-barman-cloud.git
git fetch --prune origin
git push --mirror https://git.mvzijl.nl/marco/plugin-barman-cloud.git
EOF
chmod +x /usr/local/bin/sync-barman-plugin.sh
# Add to cron (weekly on Sunday at 2 AM)
echo "0 2 * * 0 /usr/local/bin/sync-barman-plugin.sh" | crontab -
```
## Update Application Reference
After mirroring, update the application.yaml to use your mirror:
```yaml
spec:
source:
repoURL: https://git.mvzijl.nl/marco/plugin-barman-cloud.git
targetRevision: main # or specific tag like v1.0.0
path: deployments/manifests
```
## Version Pinning Strategy
Instead of tracking `main`, pin to specific releases:
```yaml
spec:
source:
repoURL: https://git.mvzijl.nl/marco/plugin-barman-cloud.git
targetRevision: v1.0.0 # Pin to specific version
path: deployments/manifests
```
This gives you:
- ✅ Predictable deployments
- ✅ Controlled updates
- ✅ Rollback capability
## Update Process
When a new version is released:
1. **Check upstream for updates:**
```bash
cd /var/git/mirrors/plugin-barman-cloud.git
git fetch origin
git tag -l
```
2. **Review changes:**
```bash
git log HEAD..origin/main --oneline
git diff HEAD..origin/main deployments/manifests/
```
3. **Sync to your mirror:**
```bash
git push --mirror https://git.mvzijl.nl/marco/plugin-barman-cloud.git
```
4. **Update application.yaml:**
```yaml
targetRevision: v1.1.0 # Update to new version
```
5. **Test and deploy:**
```bash
git add apps/cloudnative-pg-plugin/application.yaml
git commit -m "Update barman plugin to v1.1.0"
git push
```
## Monitoring Upstream
Subscribe to releases:
- GitHub: Watch → Custom → Releases only
- RSS: `https://github.com/cloudnative-pg/plugin-barman-cloud/releases.atom`
## Alternative: Subtree Approach
Instead of mirroring, you could use git subtree:
```bash
cd /Users/marco/Documents/Hobby/Veda/talos
git subtree add --prefix vendor/plugin-barman-cloud \
https://github.com/cloudnative-pg/plugin-barman-cloud.git main --squash
# Then reference in application:
# path: vendor/plugin-barman-cloud/deployments/manifests
```
Update when needed:
```bash
git subtree pull --prefix vendor/plugin-barman-cloud \
https://github.com/cloudnative-pg/plugin-barman-cloud.git main --squash
```
## Recommended Approach
For your setup, I recommend:
1. **Mirror to your Git server** at `git.mvzijl.nl/marco/plugin-barman-cloud`
2. **Pin to specific versions** (not `main`)
3. **Review updates** before applying
4. **Set up monitoring** for new releases
This gives you the best balance of control and maintainability.

View File

@ -1,301 +0,0 @@
# CloudNativePG Barman-Cloud Plugin
## Overview
The Barman Cloud Plugin provides object storage backup capabilities for CloudNativePG using the Barman toolset.
**Important**: As of CloudNativePG v1.26+, the native `barmanObjectStore` backup method is **deprecated**. You should use this plugin instead.
## Why This Plugin is Required
From the CloudNativePG 1.27 documentation:
> Starting with version 1.26, native backup and recovery capabilities are being progressively phased out of the core operator and moved to official CNPG-I plugins.
The built-in barman integration (`method: barmanObjectStore`) is deprecated and will be removed in future versions. This plugin provides the official replacement.
## What This Plugin Provides
- ✅ **WAL archiving** to S3-compatible object stores
- ✅ **Base backups** with compression and encryption
- ✅ **Point-in-time recovery (PITR)**
- ✅ **Retention policies** for automated cleanup
- ✅ **Backup from standby** servers
- ✅ **Support for multiple storage backends**: S3, Azure Blob, GCS, MinIO, Ceph S3 (RGW)
## Installation
This application deploys the plugin to the `cnpg-system` namespace where the CloudNativePG operator runs.
The plugin will be available for all PostgreSQL clusters managed by CloudNativePG.
## Configuration in PostgreSQL Clusters
### Using the Plugin (New Method)
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: my-cluster
spec:
backup:
target: prefer-standby
# Use the plugin method (required for v1.26+)
method: plugin
# Plugin configuration
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
# S3 configuration
barmanObjectStore:
destinationPath: s3://postgres-backups/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
# Credentials
s3Credentials:
accessKeyId:
name: backup-credentials
key: ACCESS_KEY_ID
secretAccessKey:
name: backup-credentials
key: ACCESS_SECRET_KEY
# Compression and parallelism
data:
compression: bzip2
jobs: 2
immediateCheckpoint: true
wal:
compression: bzip2
maxParallel: 2
# Retention policy
retentionPolicy: "30d"
# Tags for organization
tags:
environment: "production"
cluster: "my-cluster"
```
### Old Method (Deprecated)
```yaml
# ❌ DON'T USE - This is deprecated
spec:
backup:
method: barmanObjectStore # Deprecated!
barmanObjectStore:
# ... config
```
## WAL Archiving
The plugin also handles WAL archiving. Configure it at the cluster level:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: my-cluster
spec:
backup:
# Backup configuration (as above)
...
# WAL archiving uses the same plugin configuration
# Automatically enabled when backup is configured
```
## Scheduled Backups
Create scheduled backups using the plugin:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: daily-backup
spec:
schedule: "0 0 2 * * *" # 2 AM daily
backupOwnerReference: self
cluster:
name: my-cluster
# Use plugin method
method: plugin
# Plugin configuration (or inherits from cluster)
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
```
## On-Demand Backups
Trigger manual backups:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Backup
metadata:
name: manual-backup
spec:
cluster:
name: my-cluster
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
```
Or use kubectl:
```bash
kubectl cnpg backup my-cluster --method plugin
```
## Retention Policies
The plugin supports advanced retention policies:
```yaml
pluginConfiguration:
barmanObjectStore:
retentionPolicy: "30d" # Keep backups for 30 days
# or
# retentionPolicy: "7 days"
# retentionPolicy: "4 weeks"
# retentionPolicy: "3 months"
```
## Supported Storage Backends
### AWS S3
```yaml
destinationPath: s3://bucket-name/
# endpointURL not needed for AWS S3
```
### Ceph S3 (RGW) - Your Setup
```yaml
destinationPath: s3://postgres-backups/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
```
### Azure Blob Storage
```yaml
destinationPath: https://storageaccount.blob.core.windows.net/container/
```
### Google Cloud Storage
```yaml
destinationPath: gs://bucket-name/
```
### MinIO
```yaml
destinationPath: s3://bucket-name/
endpointURL: http://minio:9000
```
## Verification
After deploying, verify the plugin is running:
```bash
# Check plugin deployment
kubectl get deployment -n cnpg-system | grep plugin
# Check plugin pods
kubectl get pods -n cnpg-system -l app=barman-cloud-plugin
# Verify plugin is registered
kubectl get configmap -n cnpg-system cnpg-plugin-registry -o yaml
```
## Troubleshooting
### Plugin Not Found
If you see errors like "plugin not found":
```bash
# Check if plugin is deployed
kubectl get pods -n cnpg-system -l app=barman-cloud-plugin
# Check operator logs
kubectl logs -n cnpg-system -l app.kubernetes.io/name=cloudnative-pg
```
### Backup Failures
```bash
# Check backup status
kubectl get backup -n <namespace>
# Check backup logs
kubectl describe backup <backup-name> -n <namespace>
# Check PostgreSQL pod logs
kubectl logs -n <namespace> <postgres-pod> | grep -i backup
```
### WAL Archiving Issues
```bash
# Check WAL archive status
kubectl exec -it -n <namespace> <postgres-pod> -- \
psql -c "SELECT * FROM pg_stat_archiver;"
# Check plugin logs
kubectl logs -n cnpg-system -l app=barman-cloud-plugin
```
## Migration from Built-in to Plugin
If you're migrating from the deprecated `barmanObjectStore` method:
1. **Deploy this plugin application**
2. **Update your Cluster resource**:
```yaml
spec:
backup:
method: plugin # Change from barmanObjectStore
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
barmanObjectStore:
# Keep same configuration
```
3. **Existing backups remain accessible** - the plugin can read backups created by the built-in method
## Best Practices
1. ✅ **Always use the plugin** for CloudNativePG v1.26+
2. ✅ **Configure retention policies** to manage storage costs
3. ✅ **Enable backup from standby** to reduce primary load
4. ✅ **Use compression** (bzip2) to reduce storage usage
5. ✅ **Set up scheduled backups** for automated protection
6. ✅ **Test recovery procedures** regularly
7. ✅ **Monitor backup status** with Prometheus metrics
8. ✅ **Tag backups** for easy identification and filtering
## Next Steps
1. Deploy this application: `git add . && git commit && git push`
2. Wait for ArgoCD to sync
3. Update your PostgreSQL Cluster to use `method: plugin`
4. Create an S3 bucket for backups (ObjectBucketClaim)
5. Configure backup credentials
6. Test with an on-demand backup
## Additional Resources
- [Barman Cloud Plugin Documentation](https://cloudnative-pg.io/plugin-barman-cloud/)
- [CloudNativePG Backup Guide](https://cloudnative-pg.io/documentation/1.27/backup/)
- [CNPG-I Plugin Architecture](https://cloudnative-pg.io/documentation/1.27/cnpg_i/)

View File

@ -1,34 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cloudnative-pg-plugin
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "0"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/plugin-barman-cloud.git
targetRevision: v0.9.0
path: .
directory:
include: 'manifest.yaml'
destination:
server: https://kubernetes.default.svc
namespace: cnpg-system
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false
- ServerSideApply=true
# Ensure operator is healthy before deploying plugin
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: cloudnative-pg
description: CloudNativePG operator wrapper chart
type: application
version: 1.0.0
appVersion: "1.27.1"
dependencies:
- name: cloudnative-pg
version: 0.26.1
repository: https://cloudnative-pg.github.io/charts

View File

@ -1,470 +0,0 @@
# CloudNativePG Setup Guide
## Overview
CloudNativePG is a Kubernetes operator that manages PostgreSQL clusters using Kubernetes custom resources. It provides:
- High availability with automatic failover
- Automated backups to S3-compatible storage
- Point-in-time recovery (PITR)
- Rolling updates with zero downtime
- Connection pooling with PgBouncer
- Monitoring with Prometheus
## Architecture
```ascii
┌─────────────────────────────────────┐
│ CloudNativePG Operator │
│ (Manages PostgreSQL Clusters) │
└─────────────────────────────────────┘
├─────────────────────────┐
│ │
┌────────▼────────┐ ┌───────▼────────┐
│ PostgreSQL │ │ PostgreSQL │
│ Primary │◄─────►│ Replica │
│ (Read/Write) │ │ (Read-only) │
└─────────────────┘ └────────────────┘
│ (Backups)
┌─────────────────┐
│ Ceph S3 (RGW) │
│ Object Storage │
└─────────────────┘
```
## Current Configuration
### Operator Settings
- **Namespace**: `cnpg-system`
- **Monitoring**: Enabled (PodMonitor for Prometheus)
- **Grafana Dashboard**: Auto-created
- **Priority Class**: `system-cluster-critical`
- **Resource Limits**: Conservative (50m CPU, 100Mi RAM)
### Example Cluster (Commented Out)
The `values.yaml` includes a commented example cluster configuration. See "Creating Your First Cluster" below.
## Creating Your First Cluster
### Option 1: Using extraObjects in values.yaml
Uncomment the `extraObjects` section in `values.yaml` and customize:
```yaml
extraObjects:
- apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: my-postgres
namespace: cnpg-system
spec:
instances: 2 # 1 primary + 1 replica
storage:
size: 50Gi
storageClass: ceph-block
```
### Option 2: Separate Application
For production, create a separate ArgoCD Application for each database cluster:
```bash
mkdir -p apps/databases/my-app-db
```
Create `apps/databases/my-app-db/cluster.yaml`:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: my-app-db
namespace: my-app
spec:
instances: 3
postgresql:
parameters:
max_connections: "200"
shared_buffers: "256MB"
storage:
size: 100Gi
storageClass: ceph-block
monitoring:
enablePodMonitor: true
backup:
retentionPolicy: "30d"
barmanObjectStore:
destinationPath: s3://my-app-backups/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: backup-credentials
key: ACCESS_KEY_ID
secretAccessKey:
name: backup-credentials
key: ACCESS_SECRET_KEY
data:
compression: bzip2
wal:
compression: bzip2
```
## Backup Configuration
### Prerequisites
1. **Create S3 Bucket** (using Ceph Object Storage):
```yaml
apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: postgres-backups
namespace: cnpg-system
spec:
bucketName: postgres-backups
storageClassName: ceph-bucket
additionalConfig:
maxSize: "500Gi"
```
2. **Create Credentials Secret**:
After creating the ObjectBucketClaim, Rook will generate credentials:
```bash
# Get the generated credentials
kubectl get secret postgres-backups -n cnpg-system -o yaml
# The secret will contain:
# - AWS_ACCESS_KEY_ID
# - AWS_SECRET_ACCESS_KEY
```
3. **Reference in Cluster Spec**:
Use these credentials in your PostgreSQL cluster backup configuration (see example above).
## Database Access
### Connect to Primary (Read/Write)
```bash
# Service name pattern: <cluster-name>-rw
kubectl port-forward -n cnpg-system svc/my-postgres-rw 5432:5432
# Connect with psql
psql -h localhost -U postgres -d postgres
```
### Connect to Replica (Read-Only)
```bash
# Service name pattern: <cluster-name>-ro
kubectl port-forward -n cnpg-system svc/my-postgres-ro 5432:5432
```
### Get Superuser Password
```bash
# Password stored in secret: <cluster-name>-superuser
kubectl get secret my-postgres-superuser -n cnpg-system -o jsonpath='{.data.password}' | base64 -d
```
### Create Application User
```bash
# Connect to database
kubectl exec -it -n cnpg-system my-postgres-1 -- psql
-- Create database and user
CREATE DATABASE myapp;
CREATE USER myapp_user WITH PASSWORD 'secure-password';
GRANT ALL PRIVILEGES ON DATABASE myapp TO myapp_user;
```
## Connection from Applications
### Create Secret for Application
```yaml
apiVersion: v1
kind: Secret
metadata:
name: postgres-credentials
namespace: my-app
type: Opaque
stringData:
username: myapp_user
password: secure-password
database: myapp
host: my-postgres-rw.cnpg-system.svc
port: "5432"
```
### Use in Application
```yaml
env:
- name: DATABASE_URL
value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@$(POSTGRES_HOST):$(POSTGRES_PORT)/$(POSTGRES_DATABASE)"
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: postgres-credentials
key: username
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-credentials
key: password
- name: POSTGRES_HOST
valueFrom:
secretKeyRef:
name: postgres-credentials
key: host
- name: POSTGRES_PORT
valueFrom:
secretKeyRef:
name: postgres-credentials
key: port
- name: POSTGRES_DATABASE
valueFrom:
secretKeyRef:
name: postgres-credentials
key: database
```
## Monitoring
### Prometheus Metrics
CloudNativePG exposes metrics via PodMonitor. Check Prometheus for:
- `cnpg_pg_stat_database_*` - Database statistics
- `cnpg_pg_replication_*` - Replication lag
- `cnpg_backends_*` - Connection pool stats
### Check Cluster Status
```bash
# Get cluster status
kubectl get cluster -n cnpg-system
# Detailed cluster info
kubectl describe cluster my-postgres -n cnpg-system
# Check pods
kubectl get pods -n cnpg-system -l cnpg.io/cluster=my-postgres
```
### View Logs
```bash
# Operator logs
kubectl logs -n cnpg-system -l app.kubernetes.io/name=cloudnative-pg --tail=100
# PostgreSQL logs
kubectl logs -n cnpg-system my-postgres-1 --tail=100
```
## Backup and Recovery
### Manual Backup
```bash
kubectl cnpg backup my-postgres -n cnpg-system
```
### List Backups
```bash
kubectl get backup -n cnpg-system
```
### Point-in-Time Recovery
Create a new cluster from a backup:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: restored-cluster
spec:
instances: 2
bootstrap:
recovery:
source: my-postgres
recoveryTarget:
targetTime: "2024-11-09 10:00:00"
externalClusters:
- name: my-postgres
barmanObjectStore:
destinationPath: s3://postgres-backups/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: backup-credentials
key: ACCESS_KEY_ID
secretAccessKey:
name: backup-credentials
key: ACCESS_SECRET_KEY
```
## Maintenance Operations
### Scale Replicas
```bash
kubectl cnpg scale my-postgres --replicas=3 -n cnpg-system
```
### Switchover (Promote Replica)
```bash
kubectl cnpg promote my-postgres-2 -n cnpg-system
```
### Restart Cluster
```bash
kubectl cnpg restart my-postgres -n cnpg-system
```
## Production Recommendations
### 1. High Availability
- Use at least 3 instances (1 primary + 2 replicas)
- Spread across availability zones using pod anti-affinity
- Configure automatic failover thresholds
### 2. Resource Configuration
- **Small databases** (<10GB): 2 CPU, 4Gi RAM, 2 instances
- **Medium databases** (10-100GB): 4 CPU, 8Gi RAM, 3 instances
- **Large databases** (>100GB): 8 CPU, 16Gi RAM, 3+ instances
### 3. PostgreSQL Tuning
- Adjust `shared_buffers` to 25% of RAM
- Set `effective_cache_size` to 50-75% of RAM
- Tune connection limits based on application needs
- Use `random_page_cost: 1.1` for SSD storage
### 4. Backup Strategy
- Enable automated backups to S3
- Set retention policy (e.g., 30 days)
- Test recovery procedures regularly
- Monitor backup success
### 5. Monitoring
- Set up alerts for replication lag
- Monitor connection pool saturation
- Track query performance
- Watch disk space usage
### 6. Security
- Use strong passwords (stored in Kubernetes secrets)
- Enable SSL/TLS for connections
- Implement network policies
- Regular security updates
## Troubleshooting
### Cluster Not Starting
```bash
# Check operator logs
kubectl logs -n cnpg-system -l app.kubernetes.io/name=cloudnative-pg
# Check events
kubectl get events -n cnpg-system --sort-by='.lastTimestamp'
# Check PVC status
kubectl get pvc -n cnpg-system
```
### Replication Issues
```bash
# Check replication status
kubectl exec -it -n cnpg-system my-postgres-1 -- psql -c "SELECT * FROM pg_stat_replication;"
# Check cluster status
kubectl get cluster my-postgres -n cnpg-system -o yaml
```
### Backup Failures
```bash
# Check backup status
kubectl get backup -n cnpg-system
# View backup logs
kubectl logs -n cnpg-system my-postgres-1 | grep -i backup
# Test S3 connectivity
kubectl exec -it -n cnpg-system my-postgres-1 -- curl -I http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
```
### High Resource Usage
```bash
# Check resource usage
kubectl top pods -n cnpg-system
# Check PostgreSQL connections
kubectl exec -it -n cnpg-system my-postgres-1 -- psql -c "SELECT count(*) FROM pg_stat_activity;"
# Identify slow queries
kubectl exec -it -n cnpg-system my-postgres-1 -- psql -c "SELECT pid, now() - pg_stat_activity.query_start AS duration, query FROM pg_stat_activity WHERE state = 'active' ORDER BY duration DESC;"
```
## Useful Commands
```bash
# Install kubectl-cnpg plugin
curl -sSfL https://github.com/cloudnative-pg/cloudnative-pg/raw/main/hack/install-cnpg-plugin.sh | sh -s -- -b /usr/local/bin
# List all clusters
kubectl cnpg status -n cnpg-system
# Get cluster details
kubectl cnpg status my-postgres -n cnpg-system
# Promote a replica
kubectl cnpg promote my-postgres-2 -n cnpg-system
# Create backup
kubectl cnpg backup my-postgres -n cnpg-system
# Reload configuration
kubectl cnpg reload my-postgres -n cnpg-system
# Get connection info
kubectl get secret my-postgres-superuser -n cnpg-system -o jsonpath='{.data.password}' | base64 -d && echo
```
## Next Steps
1. Deploy the operator: `git add . && git commit -m "Add CloudNativePG" && git push`
2. Wait for ArgoCD to sync
3. Create your first cluster (uncomment extraObjects or create separate app)
4. Set up S3 backups with Ceph Object Storage
5. Test backup and recovery
6. Configure monitoring and alerts
## Additional Resources
- [CloudNativePG Documentation](https://cloudnative-pg.io/documentation/)
- [API Reference](https://cloudnative-pg.io/documentation/current/api_reference/)
- [Best Practices](https://cloudnative-pg.io/documentation/current/guidelines/)
- [Monitoring Guide](https://cloudnative-pg.io/documentation/current/monitoring/)

View File

@ -1,29 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cloudnative-pg
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/cloudnative-pg
helm:
releaseName: cloudnative-pg
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: cnpg-system
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true

View File

@ -1,62 +0,0 @@
cloudnative-pg:
monitoring:
podMonitorEnabled: true
grafanaDashboard:
create: true
resources:
requests:
cpu: 50m
memory: 100Mi
limits:
memory: 256Mi
priorityClassName: system-cluster-critical
# Example PostgreSQL cluster configuration
# Uncomment and customize to create a test cluster
# extraObjects:
# - apiVersion: postgresql.cnpg.io/v1
# kind: Cluster
# metadata:
# name: postgres-example
# namespace: cnpg-system
# spec:
# instances: 2
# resources:
# requests:
# memory: 128Mi
# cpu: 100m
# limits:
# memory: 1Gi
# cpu: '1'
# postgresql:
# parameters:
# max_connections: "200"
# shared_buffers: "128MB"
# effective_cache_size: "256MB"
# maintenance_work_mem: "16MB"
# random_page_cost: "1.1"
# effective_io_concurrency: "300"
# monitoring:
# enablePodMonitor: true
# storage:
# size: 50Gi
# storageClass: ceph-block
# backup:
# retentionPolicy: "30d"
# barmanObjectStore:
# destinationPath: s3://postgres-backups/
# endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
# s3Credentials:
# accessKeyId:
# name: postgres-backup-credentials
# key: ACCESS_KEY_ID
# secretAccessKey:
# name: postgres-backup-credentials
# key: ACCESS_SECRET_KEY
# data:
# compression: bzip2
# wal:
# compression: bzip2

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: harbor
version: 0.1.0
dependencies:
- name: harbor
version: 1.18.0
repository: https://helm.goharbor.io

View File

View File

@ -1,41 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: harbor
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/harbor
helm:
releaseName: harbor
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: harbor
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jsonPointers:
- /spec/parentRefs/0/group
- /spec/parentRefs/0/kind
- /spec/rules/0/backendRefs/0/group
- /spec/rules/0/backendRefs/0/kind
- /spec/rules/0/backendRefs/0/weight
- /spec/rules/1/backendRefs/0/group
- /spec/rules/1/backendRefs/0/kind
- /spec/rules/1/backendRefs/0/weight
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true

View File

@ -1,96 +0,0 @@
harbor:
expose:
type: route
tls:
enabled: false
route:
# Attach to your Traefik Gateway + HTTPS listener
parentRefs:
- name: traefik-gateway # your Gateway name
namespace: traefik # Gateway namespace
sectionName: websecure # listener name on the Gateway
hosts:
- harbor.noxxos.nl # external hostname for Harbor
# What Harbor advertises to clients (docker login/push)
externalURL: https://harbor.noxxos.nl
# Single-writer PVCs: roll pods with Recreate to avoid multi-writer needs
updateStrategy:
type: Recreate
# Persistence via PVCs
persistence:
enabled: true
imageChartStorage:
type: filesystem
persistentVolumeClaim:
registry:
storageClass: ceph-block
accessMode: ReadWriteOnce
size: 100Gi
database:
storageClass: ceph-block
accessMode: ReadWriteOnce
size: 10Gi
redis:
storageClass: ceph-block
accessMode: ReadWriteOnce
size: 2Gi
jobservice:
jobLog:
storageClass: ceph-block
accessMode: ReadWriteOnce
size: 2Gi
trivy:
storageClass: ceph-block
accessMode: ReadWriteOnce
size: 10Gi
# Keep replicas at 1 for components that write to RWO PVCs
core:
replicas: 1
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 512Mi
registry:
replicas: 1
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 512Mi
portal:
replicas: 1
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
memory: 256Mi
jobservice:
replicas: 1
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 512Mi
trivy:
replicas: 1
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
memory: 1Gi
harborAdminPassword: "harboradmin"

View File

@ -1,413 +0,0 @@
# Local Path Provisioner
## Overview
The Local Path Provisioner provides local storage using hostPath volumes. It's useful for:
- Testing and development
- Stateful workloads that don't require high availability
- Single-node scenarios
- Temporary storage needs
**Important**: This is NOT recommended for production workloads that require:
- High availability
- Data replication
- Cross-node pod mobility
- Disaster recovery
For production, use Ceph block storage (`ceph-block` StorageClass) instead.
## Current Configuration
- **Namespace**: `local-path-storage`
- **Storage Path**: `/var/mnt/local-path-provisioner` (Talos-compatible path)
- **Default StorageClass**: `false` (Ceph is the default)
- **Pod Security**: `privileged` (required for hostPath access)
- **Sync Wave**: `-2` (deploys before storage operators)
## When to Use Local Path vs Ceph
### Use Local Path For:
✅ **Development/Testing**
- Quick pod restarts on the same node
- No network overhead
- Fast I/O for local development
✅ **Node-Specific Data**
- Logs collection
- Monitoring agents data
- Cache that can be rebuilt
✅ **Temporary Storage**
- Build artifacts
- Scratch space
- Non-critical data
### Use Ceph Block Storage For:
✅ **Production Databases**
- PostgreSQL, MySQL, MongoDB
- Requires replication and HA
✅ **Stateful Applications**
- When pods need to move between nodes
- Data must survive node failures
✅ **Critical Data**
- Persistent volumes that need backups
- Data requiring disaster recovery
✅ **Multi-Replica Apps**
- ReadWriteOnce volumes that may reschedule
## Architecture & Limitations
```
┌─────────────────────────────────────────┐
│ Node 1 │
│ ┌─────────────────────────────────┐ │
│ │ /var/mnt/local-path-provisioner │ │
│ │ └── pvc-abc123/ │ │
│ │ └── data │ │
│ └─────────────────────────────────┘ │
│ ▲ │
│ │ hostPath mount │
│ ┌──────┴──────┐ │
│ │ Pod (fixed) │ │
│ └─────────────┘ │
└─────────────────────────────────────────┘
⚠️ If pod moves to Node 2, data is NOT accessible!
```
### Key Limitations:
1. **Node Affinity**: Pods are pinned to the node where PVC was created
2. **No Replication**: Data exists only on one node
3. **No HA**: If node fails, data is inaccessible until node recovers
4. **No Migration**: Cannot move volumes between nodes
5. **Disk Space**: Limited by node's local disk capacity
## Storage Classes Comparison
| Feature | local-path | ceph-block | ceph-filesystem |
|---------|-----------|------------|-----------------|
| **HA** | ❌ No | ✅ Yes | ✅ Yes |
| **Replication** | ❌ No | ✅ Yes (2x) | ✅ Yes (2x) |
| **Multi-node** | ❌ No | ✅ Yes (RWO) | ✅ Yes (RWX) |
| **Performance** | ⚡ Fast | 📊 Medium | 📊 Medium |
| **Snapshots** | ❌ No | ✅ Yes | ✅ Yes |
| **Resize** | ⚠️ Manual | ✅ Auto | ✅ Auto |
| **Backup** | ❌ Difficult | ✅ Velero | ✅ Velero |
| **Use Case** | Dev/Test | Production | Shared Data |
## Usage Examples
### Basic PVC
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: local-test-pvc
namespace: default
spec:
storageClassName: local-path
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
```
### StatefulSet with Local Path
```yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: test-app
spec:
serviceName: test-app
replicas: 1 # ⚠️ Keep at 1 for local-path
selector:
matchLabels:
app: test-app
volumeClaimTemplates:
- metadata:
name: data
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
template:
metadata:
labels:
app: test-app
spec:
containers:
- name: app
image: nginx
volumeMounts:
- name: data
mountPath: /data
```
### Pod with Explicit Node Affinity
```yaml
apiVersion: v1
kind: Pod
metadata:
name: local-path-test
spec:
# Pin to specific node where PVC exists
nodeSelector:
kubernetes.io/hostname: worker1
volumes:
- name: data
persistentVolumeClaim:
claimName: local-test-pvc
containers:
- name: test
image: busybox
command: ["sh", "-c", "sleep 3600"]
volumeMounts:
- name: data
mountPath: /data
```
## Recommendations
### 1. Storage Path Configuration ✅
Current path `/var/mnt/local-path-provisioner` is correct for Talos Linux:
- Talos only allows persistent storage in `/var`
- This path persists across reboots
- Properly configured in your setup
### 2. Not Default StorageClass ✅
Good decision to keep `ceph-block` as default:
```yaml
storageclass.kubernetes.io/is-default-class: "false"
```
This ensures:
- PVCs without explicit `storageClassName` use Ceph
- Production workloads default to HA storage
- Local path is opt-in only
### 3. Use Cases for Your Cluster
**Good Uses:**
```yaml
# Development namespace with local-path default
apiVersion: v1
kind: Namespace
metadata:
name: dev-test
annotations:
# All PVCs in this namespace can use local-path
storageclass.kubernetes.io/is-default-class: "local-path"
---
# Prometheus node exporter temp storage
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: node-exporter-data
spec:
storageClassName: local-path
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 5Gi
```
**Bad Uses (Use Ceph Instead):**
```yaml
# ❌ Don't do this - use ceph-block
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-data # Database needs HA!
spec:
storageClassName: local-path # ❌ Wrong choice
# ✅ Do this instead
spec:
storageClassName: ceph-block # ✅ Right choice
```
### 4. Monitoring Storage Usage
```bash
# Check PVs and their node
kubectl get pv -o custom-columns=NAME:.metadata.name,STORAGECLASS:.spec.storageClassName,NODE:.spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0],SIZE:.spec.capacity.storage
# Check disk usage on nodes
talosctl -n <node-ip> df | grep local-path-provisioner
# List all local-path PVCs
kubectl get pvc -A --field-selector spec.storageClassName=local-path
```
### 5. Migration Strategy
If you need to migrate from local-path to Ceph:
```bash
# 1. Create new PVC with ceph-block
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: data-ceph
namespace: default
spec:
storageClassName: ceph-block
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 10Gi
EOF
# 2. Use a temporary pod to copy data
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: data-migration
spec:
volumes:
- name: source
persistentVolumeClaim:
claimName: data-local-path
- name: dest
persistentVolumeClaim:
claimName: data-ceph
containers:
- name: migrate
image: ubuntu
command: ["sh", "-c", "cp -av /source/* /dest/ && echo 'Done'"]
volumeMounts:
- name: source
mountPath: /source
- name: dest
mountPath: /dest
EOF
# 3. Verify data, then update your app to use new PVC
```
### 6. Cleanup Old PVCs
Local path PVs can accumulate. Regular cleanup:
```bash
# List unused PVCs
kubectl get pvc -A --field-selector status.phase=Bound
# Delete unused PVCs (will also remove hostPath data)
kubectl delete pvc <pvc-name> -n <namespace>
# Cleanup orphaned directories on nodes (if needed)
talosctl -n <node-ip> ls /var/mnt/local-path-provisioner
```
### 7. Resource Limits
Consider node disk capacity:
```bash
# Check available space on each node
for node in $(kubectl get nodes -o name | cut -d/ -f2); do
echo "=== $node ==="
talosctl -n $node df | grep "/$"
done
# Set PVC size limits based on node capacity
# Example: If node has 100GB free, don't create PVCs > 50GB
```
## Troubleshooting
### PVC Stuck in Pending
```bash
# Check events
kubectl describe pvc <pvc-name> -n <namespace>
# Common causes:
# 1. No nodes have enough disk space
# 2. Provisioner pod not running
kubectl get pods -n local-path-storage
```
### Pod Can't Mount Volume
```bash
# Check if pod is on same node as PV
kubectl get pod <pod-name> -o jsonpath='{.spec.nodeName}'
kubectl get pv <pv-name> -o jsonpath='{.spec.nodeAffinity}'
# If different nodes, delete and recreate pod
# (pod will reschedule to correct node)
```
### Disk Space Issues
```bash
# Check node disk usage
kubectl get nodes -o custom-columns=NAME:.metadata.name,STORAGE:.status.allocatable.ephemeral-storage
# Free up space by deleting old PVCs
kubectl get pvc -A --sort-by=.metadata.creationTimestamp
```
## Best Practices
1. ✅ **Use for ephemeral workloads only** - Anything that can afford data loss
2. ✅ **Set PVC size limits** - Prevent filling up node disks
3. ✅ **Monitor disk usage** - Set up alerts for node disk space
4. ✅ **Document dependencies** - Note which apps use local-path and why
5. ✅ **Plan for migration** - Have a strategy to move to Ceph if needed
6. ❌ **Don't use for databases** - Use ceph-block instead
7. ❌ **Don't use for multi-replica apps** - Pods will be pinned to one node
8. ❌ **Don't assume HA** - Data is lost if node fails
## Decision Tree
```
Need persistent storage?
├─ Is data critical? ──→ YES ──→ Use ceph-block
├─ Need HA/replication? ──→ YES ──→ Use ceph-block
├─ Pod needs to move between nodes? ──→ YES ──→ Use ceph-block
├─ Multiple replicas need same data? ──→ YES ──→ Use ceph-filesystem
├─ Development/testing only? ──→ YES ──→ Consider local-path
└─ Temporary/cache data? ──→ YES ──→ Consider local-path or emptyDir
```
## Summary
Local path provisioner is included in your cluster but should be used sparingly:
**When to use**: Dev/test, non-critical data, node-specific storage
**When NOT to use**: Production databases, HA apps, critical data
Your configuration is correct:
- Talos-compatible path ✅
- Not default StorageClass ✅
- Privileged pod security ✅
- Automated prune enabled ✅
For most production workloads, stick with `ceph-block` (your default).

View File

@ -1,59 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: local-path-provisioner
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-5"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://github.com/rancher/local-path-provisioner.git
targetRevision: v0.0.32
path: deploy
kustomize:
patches:
# Configure storage path for Talos
- target:
kind: ConfigMap
name: local-path-config
patch: |-
- op: replace
path: /data/config.json
value: |-
{
"nodePathMap":[
{
"node":"DEFAULT_PATH_FOR_NON_LISTED_NODES",
"paths":["/var/mnt/local-path-provisioner"]
}
]
}
# Don't set as default StorageClass (Ceph is default)
- target:
kind: StorageClass
name: local-path
patch: |-
- op: replace
path: /metadata/annotations/storageclass.kubernetes.io~1is-default-class
value: "false"
destination:
server: https://kubernetes.default.svc
namespace: local-path-storage
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
managedNamespaceMetadata:
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: loki
description: Grafana Loki logging stack wrapper chart
type: application
version: 1.0.0
appVersion: "3.5.7"
dependencies:
- name: loki
version: 6.46.0
repository: https://grafana.github.io/helm-charts

View File

@ -1,35 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: loki
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/logging/loki
helm:
releaseName: loki
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: logging
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- SkipDryRunOnMissingResource=true
managedNamespaceMetadata:
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged

View File

@ -1,4 +0,0 @@
{{- range .Values.extraObjects }}
---
{{ toYaml . }}
{{- end }}

View File

@ -1,160 +0,0 @@
loki:
# Single binary deployment mode
deploymentMode: SingleBinary
# Disable other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
# Disable memberlist for single binary
memberlist:
enable_ipv6: false
# Disable caching services for single binary
chunksCache:
enabled: false
resultsCache:
enabled: false
loki:
# Authentication
auth_enabled: false
# Server configuration
server:
log_level: info
log_format: json
# Common configuration
commonConfig:
replication_factor: 1
# Storage configuration
schemaConfig:
configs:
- from: "2024-01-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
# Storage backend configuration
storage:
type: s3
bucketNames:
chunks: loki-logs
ruler: loki-logs
admin: loki-logs
s3:
endpoint: rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
region: us-east-1
insecure: true
s3ForcePathStyle: true
accessKeyId: ${AWS_ACCESS_KEY_ID}
secretAccessKey: ${AWS_SECRET_ACCESS_KEY}
# Limits and retention
limits_config:
retention_period: 90d
ingestion_rate_mb: 10
ingestion_burst_size_mb: 20
max_query_series: 10000
max_query_parallelism: 32
reject_old_samples: true
reject_old_samples_max_age: 168h
# Compactor configuration for retention
compactor:
working_directory: /var/loki/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
delete_request_store: s3
# Storage config
storage_config:
tsdb_shipper:
active_index_directory: /var/loki/tsdb-index
cache_location: /var/loki/tsdb-cache
# Hedging requests
hedging:
at: 250ms
max_per_second: 20
up_to: 3
# Query configuration
query_scheduler:
max_outstanding_requests_per_tenant: 2048
# Frontend configuration
frontend:
max_outstanding_per_tenant: 2048
# Single binary configuration
singleBinary:
replicas: 1
extraArgs:
- '-config.expand-env=true'
persistence:
enabled: true
storageClass: ceph-block
size: 10Gi
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 1Gi
extraEnvFrom:
- secretRef:
name: loki-logs
# Gateway
gateway:
enabled: true
replicas: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
# Monitoring
monitoring:
selfMonitoring:
enabled: false
grafanaAgent:
installOperator: false
serviceMonitor:
enabled: false
# Service configuration
service:
type: ClusterIP
# S3 Bucket and credentials provisioning
extraObjects:
# ObjectBucketClaim for Loki logs
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: loki-logs
namespace: logging
spec:
bucketName: loki-logs
storageClassName: ceph-bucket
additionalConfig:
maxSize: "200Gi"

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: promtail
description: Promtail log collection agent wrapper chart
type: application
version: 1.0.0
appVersion: "3.3.2"
dependencies:
- name: promtail
version: 6.17.1
repository: https://grafana.github.io/helm-charts

View File

@ -1,29 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: promtail
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "3"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/logging/promtail
helm:
releaseName: promtail
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: logging
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false
- ServerSideApply=true

View File

@ -1,156 +0,0 @@
promtail:
# DaemonSet configuration
daemonset:
enabled: true
# Resources
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
# Configuration
config:
# Loki endpoint
clients:
- url: http://loki-gateway.logging.svc.cluster.local/loki/api/v1/push
tenant_id: ""
batchwait: 1s
batchsize: 1048576
timeout: 10s
# Positions file (persisted)
positions:
filename: /run/promtail/positions.yaml
# Server config
server:
log_level: info
log_format: json
http_listen_port: 3101
# Scrape configs
scrape_configs:
# Kubernetes pods
- job_name: kubernetes-pods
pipeline_stages:
# Extract log level
- regex:
expression: '(?i)(?P<level>trace|debug|info|warn|warning|error|err|fatal|critical|panic)'
# Parse JSON logs
- json:
expressions:
level: level
timestamp: timestamp
message: message
# Drop high-cardinality labels
- labeldrop:
- pod_uid
- container_id
- image_id
- stream
# Add log level as label (only keep certain levels)
- labels:
level:
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Only scrape running pods
- source_labels: [__meta_kubernetes_pod_phase]
action: keep
regex: Running
# Keep essential labels
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_pod_label_app]
target_label: app
- source_labels: [__meta_kubernetes_pod_container_name]
target_label: container
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
# Add cluster label
- replacement: homelab
target_label: cluster
# Drop pods in kube-system namespace (optional)
# - source_labels: [__meta_kubernetes_namespace]
# action: drop
# regex: kube-system
# Container log path
- source_labels: [__meta_kubernetes_pod_uid, __meta_kubernetes_pod_container_name]
target_label: __path__
separator: /
replacement: /var/log/pods/*$1/*.log
# Journald logs (systemd)
- job_name: systemd-journal
journal:
path: /var/log/journal
max_age: 12h
labels:
job: systemd-journal
cluster: homelab
pipeline_stages:
# Parse priority to log level
- match:
selector: '{job="systemd-journal"}'
stages:
- template:
source: level
template: '{{ if eq .PRIORITY "0" }}fatal{{ else if eq .PRIORITY "1" }}alert{{ else if eq .PRIORITY "2" }}crit{{ else if eq .PRIORITY "3" }}error{{ else if eq .PRIORITY "4" }}warning{{ else if eq .PRIORITY "5" }}notice{{ else if eq .PRIORITY "6" }}info{{ else }}debug{{ end }}'
- labels:
level:
relabel_configs:
- source_labels: [__journal__systemd_unit]
target_label: unit
- source_labels: [__journal__hostname]
target_label: node
- source_labels: [__journal_syslog_identifier]
target_label: syslog_identifier
# Volumes
extraVolumes:
- name: journal
hostPath:
path: /var/log/journal
extraVolumeMounts:
- name: journal
mountPath: /var/log/journal
readOnly: true
# Tolerations to run on all nodes
tolerations:
- effect: NoSchedule
operator: Exists
# Service Monitor
serviceMonitor:
enabled: true
# Update strategy
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: grafana
description: Grafana visualization platform wrapper chart
type: application
version: 1.0.0
appVersion: "12.2.1"
dependencies:
- name: grafana
version: 10.1.4
repository: https://grafana.github.io/helm-charts

View File

@ -1,38 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: grafana
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/monitoring/grafana
helm:
releaseName: grafana
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false
- ServerSideApply=true
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jsonPointers:
- /spec/parentRefs/0/group
- /spec/parentRefs/0/kind
- /spec/rules/0/backendRefs/0/group
- /spec/rules/0/backendRefs/0/kind
- /spec/rules/0/backendRefs/0/weight

View File

@ -1,232 +0,0 @@
grafana:
persistence:
enabled: false
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
memory: 512Mi
extraSecretMounts:
- name: db-secret
secretName: grafana-pg-cluster-app
mountPath: /secrets/my-db
readOnly: true
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090
isDefault: true
editable: false
jsonData:
timeInterval: 30s
queryTimeout: 60s
- name: Loki
type: loki
access: proxy
url: http://loki-gateway.logging.svc.cluster.local
editable: false
jsonData:
maxLines: 1000
derivedFields:
- datasourceUid: Prometheus
matcherRegex: "traceID=(\\w+)"
name: TraceID
url: "$${__value.raw}"
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
- name: 'kubernetes'
orgId: 1
folder: 'Kubernetes'
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes
dashboards:
default:
node-exporter:
gnetId: 1860
revision: 37
datasource: Prometheus
k8s-cluster:
gnetId: 7249
revision: 1
datasource: Prometheus
kubernetes:
k8s-pods:
gnetId: 6417
revision: 1
datasource: Prometheus
loki-logs:
gnetId: 13639
revision: 2
datasource: Loki
grafana.ini:
server:
root_url: https://grafana.noxxos.nl
serve_from_sub_path: false
database:
type: postgres
host: "$__file{/secrets/my-db/host}:$__file{/secrets/my-db/port}"
name: "$__file{/secrets/my-db/dbname}"
user: "$__file{/secrets/my-db/user}"
password: "$__file{/secrets/my-db/password}"
auth.generic_oauth:
enabled: true
name: Authentik
#client_id:
#client_secret:
scopes: openid profile email groups
auth_url: https://sso.noxxos.nl/application/o/authorize/
token_url: https://sso.noxxos.nl/application/o/token/
api_url: https://sso.noxxos.nl/application/o/userinfo/
role_attribute_path: contains(groups[*], 'Admin') && 'GrafanaAdmin' || 'Viewer'
allow_sign_up: true
allow_assign_grafana_admin: true
analytics:
reporting_enabled: false
check_for_updates: false
log:
mode: console
level: info
console:
format: json
users:
auto_assign_org: true
auto_assign_org_role: Viewer
serviceMonitor:
enabled: false
plugins:
- grafana-piechart-panel
- grafana-clock-panel
route:
main:
enabled: true
hostnames:
- grafana.noxxos.nl
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
extraObjects:
- apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: grafana-pg-cluster
namespace: monitoring
spec:
instances: 2
postgresql:
parameters:
max_connections: "20"
shared_buffers: "25MB"
effective_cache_size: "75MB"
maintenance_work_mem: "6400kB"
checkpoint_completion_target: "0.9"
wal_buffers: "768kB"
default_statistics_target: "100"
random_page_cost: "1.1"
effective_io_concurrency: "300"
work_mem: "640kB"
huge_pages: "off"
max_wal_size: "128MB"
bootstrap:
initdb:
database: grafana
owner: grafana
storage:
size: 15Gi
storageClass: local-path
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
memory: 512Mi
plugins:
- enabled: true
name: barman-cloud.cloudnative-pg.io
isWALArchiver: true
parameters:
barmanObjectName: grafana-backup-store
- apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: grafana-backup-store
namespace: monitoring
spec:
retentionPolicy: "30d"
configuration:
destinationPath: s3://cnpg-backup-grafana/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: grafana-pg-backups
key: AWS_ACCESS_KEY_ID
secretAccessKey:
name: grafana-pg-backups
key: AWS_SECRET_ACCESS_KEY
wal:
compression: bzip2
data:
compression: bzip2
immediateCheckpoint: true
- apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: grafana-pg-backup
namespace: monitoring
spec:
method: plugin
immediate: true
schedule: "0 30 2 * * *" # 2:30 AM daily
backupOwnerReference: self
cluster:
name: grafana-pg-cluster
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: grafana-pg-backups
namespace: monitoring
spec:
generateBucketName: cnpg-backup-grafana
storageClassName: ceph-bucket
additionalConfig:
maxSize: "50Gi"

View File

@ -1,11 +0,0 @@
apiVersion: v2
name: prometheus
description: Prometheus monitoring stack with Thanos sidecar wrapper chart
type: application
version: 1.0.0
appVersion: "0.86.2"
dependencies:
- name: kube-prometheus-stack
version: 79.4.1
repository: oci://ghcr.io/prometheus-community/charts

View File

@ -1,41 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: prometheus
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-2"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/monitoring/prometheus
helm:
releaseName: prometheus
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- SkipDryRunOnMissingResource=true
managedNamespaceMetadata:
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jqPathExpressions:
- .spec.parentRefs[] | .group, .kind
- .spec.rules[].backendRefs[] | .group, .kind, .weight

View File

@ -1,124 +0,0 @@
kube-prometheus-stack:
crds:
enabled: true
defaultRules:
create: false
alertmanager:
enabled: false
grafana:
enabled: false
kubeProxy:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
prometheusOperator:
enabled: true
logLevel: info
logFormat: json
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
networkPolicy:
enabled: true
flavor: Cilium
prometheus:
enabled: true
networkPolicy:
enabled: true
flavor: Cilium
cilium: {}
# Disable Thanos integration
thanosService:
enabled: false
thanosServiceMonitor:
enabled: false
thanosServiceExternal:
enabled: false
thanosIngress:
enabled: false
route:
main:
enabled: true
hostnames:
- prometheus.noxxos.nl
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
serviceMonitor:
selfMonitor: false
prometheusSpec:
# Enable compaction (was disabled for Thanos)
disableCompaction: false
scrapeInterval: 30s
# Log format
logLevel: info
logFormat: json
# 3 months retention (~90 days)
retention: 90d
retentionSize: 100GB
replicas: 1
resources:
requests:
cpu: 100m
memory: 400Mi
limits:
memory: 2Gi
# Increased storage for 3 month retention
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: ceph-block
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 150Gi
# Service monitors
scrapeConfigSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
ruleSelectorNilUsesHelmValues: false
# Additional scrape configs
additionalScrapeConfigs: []
# Node Exporter
nodeExporter:
enabled: true
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
memory: 128Mi
# Kube State Metrics
kubeStateMetrics:
enabled: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi

View File

@ -1,10 +0,0 @@
apiVersion: v2
name: openproject
type: application
version: 1.0.0
appVersion: "16.6.1"
dependencies:
- name: openproject
version: 11.4.2
repository: https://charts.openproject.org

View File

@ -1,35 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: openproject
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "5"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/openproject
helm:
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: openproject
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- SkipDryRunOnMissingResource=true
ignoreDifferences:
- group: gateway.networking.k8s.io
kind: HTTPRoute
jqPathExpressions:
- .spec.parentRefs[] | .group, .kind
- .spec.rules[].backendRefs[] | .group, .kind, .weight

View File

@ -1,4 +0,0 @@
{{- range .Values.extraObjects }}
---
{{ toYaml . }}
{{- end }}

View File

@ -1,582 +0,0 @@
openproject:
ingress:
enabled: false
persistence:
enabled: false
s3:
enabled: true
auth:
existingSecret: "openproject-s3-mapped"
bucketName: "openproject"
endpoint: "http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80"
pathStyle: true
region: "us-east-1"
postgresql:
bundled: false
connection:
host: openproject-pg-cluster-rw.openproject.svc.cluster.local
port: 5432
auth:
existingSecret: openproject-pg-cluster-app
secretKeys:
userPasswordKey: password
options:
sslmode: require
replicaCount: 1
metrics:
enabled: false
extraVolumes:
- name: enterprise-token
configMap:
name: openproject-enterprise-token
defaultMode: 0644
extraVolumeMounts:
- name: enterprise-token
mountPath: /app/app/models/enterprise_token.rb
subPath: enterprise_token.rb
readOnly: true
appInit:
resources:
requests:
memory: "512Mi"
limits:
memory: "1Gi"
extraObjects:
- apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: openproject
namespace: openproject
spec:
parentRefs:
- name: traefik-gateway
namespace: traefik
sectionName: websecure
hostnames:
- openproject.noxxos.nl
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: openproject
port: 8080
- apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: openproject-pg-cluster
namespace: openproject
spec:
instances: 2
postgresql:
parameters:
max_connections: "100"
shared_buffers: "25MB"
effective_cache_size: "75MB"
maintenance_work_mem: "6400kB"
checkpoint_completion_target: "0.9"
wal_buffers: "768kB"
default_statistics_target: "100"
random_page_cost: "1.1"
effective_io_concurrency: "300"
work_mem: "640kB"
huge_pages: "off"
max_wal_size: "128MB"
bootstrap:
initdb:
database: openproject
owner: openproject
storage:
size: 20Gi
storageClass: local-path
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
memory: 512Mi
plugins:
- enabled: true
name: barman-cloud.cloudnative-pg.io
isWALArchiver: true
parameters:
barmanObjectName: backup-store
- apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: backup-store
namespace: openproject
spec:
retentionPolicy: "30d"
configuration:
destinationPath: s3://cnpg-backup-openproject/
endpointURL: http://rook-ceph-rgw-ceph-objectstore.rook-ceph.svc:80
s3Credentials:
accessKeyId:
name: cnpg-backup
key: AWS_ACCESS_KEY_ID
secretAccessKey:
name: cnpg-backup
key: AWS_SECRET_ACCESS_KEY
wal:
compression: bzip2
data:
compression: bzip2
immediateCheckpoint: true
- apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: cnpg-backup
namespace: openproject
spec:
method: plugin
immediate: true
schedule: "0 45 2 * * *" # 2:45 AM daily
backupOwnerReference: self
cluster:
name: openproject-pg-cluster
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: cnpg-backup
namespace: openproject
spec:
bucketName: cnpg-backup-openproject
storageClassName: ceph-bucket
additionalConfig:
maxSize: "50Gi"
- apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: openproject
namespace: openproject
spec:
bucketName: openproject
storageClassName: ceph-bucket
additionalConfig:
maxSize: "200Gi"
- apiVersion: v1
kind: ServiceAccount
metadata:
name: openproject-s3-sync
namespace: openproject
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: openproject-s3-sync
namespace: openproject
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "create", "update", "patch"]
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: openproject-s3-sync
namespace: openproject
subjects:
- kind: ServiceAccount
name: openproject-s3-sync
namespace: openproject
roleRef:
kind: Role
name: openproject-s3-sync
apiGroup: rbac.authorization.k8s.io
- apiVersion: batch/v1
kind: Job
metadata:
name: openproject-s3-sync
namespace: openproject
spec:
template:
spec:
serviceAccountName: openproject-s3-sync
containers:
- name: sync
image: bitnami/kubectl:latest
command:
- /bin/sh
- -c
- |
echo "Waiting for secret openproject..."
until kubectl get secret openproject -n openproject; do sleep 5; done
ACCESS_KEY=$(kubectl get secret openproject -n openproject -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)
SECRET_KEY=$(kubectl get secret openproject -n openproject -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)
kubectl create secret generic openproject-s3-mapped -n openproject \
--from-literal=OPENPROJECT_FOG_CREDENTIALS_AWS__ACCESS__KEY__ID="$ACCESS_KEY" \
--from-literal=OPENPROJECT_FOG_CREDENTIALS_AWS__SECRET__ACCESS__KEY="$SECRET_KEY" \
--dry-run=client -o yaml | kubectl apply -f -
restartPolicy: OnFailure
- apiVersion: v1
kind: ConfigMap
metadata:
name: openproject-enterprise-token
namespace: openproject
data:
enterprise_token.rb: |
# OpenProject is an open source project management software.
# Copyright (C) the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
class EnterpriseToken < ApplicationRecord
class << self
# On the backend, features are checked only using `allows_to?`, which we can hardcode to return `true`.
# On the frontend, however, it instead checks if particular strings are included in the `available_features`
# Unfortunately there is no canonical variable with all the features, so we have to hardcode.
# Use `rg --pcre2 -INo "(?<=allows_to\?[^:*]:|allowsTo\(')[a-z_]*" | sort -u` to generate this list:
TRUE_FEATURES = %i[
allowed_action
baseline_comparison
board_view
calculated_values
conditional_highlighting
custom_actions
custom_field_hierarchies
customize_life_cycle
date_alerts
define_custom_style
edit_attribute_groups
forbidden_action
gantt_pdf_export
internal_comments
ldap_groups
nextcloud_sso
one_drive_sharepoint_file_storage
placeholder_users
readonly_work_packages
scim_api
sso_auth_providers
team_planner_view
time_entry_time_restrictions
virus_scanning
work_package_query_relation_columns
work_package_sharing
work_package_subject_generation
].freeze
# Not all the methods here are ever actually called outside the enterprise_token.rb file itself
# in upstream openproject, but I'll include all of them that can be reasonably implemented here,
# just in case openproject changes in the future to start using the extra methods.
def current
self.new
end
def all_tokens
[self.new]
end
def active_tokens
[self.new]
end
def active_non_trial_tokens
[self.new]
end
def active_trial_tokens
[]
end
def active_trial_token
nil
end
def allows_to?(feature)
true
end
def active?
true
end
def trial_only?
false
end
def available_features
TRUE_FEATURES
end
def non_trialling_features
TRUE_FEATURES
end
def trialling_features
[]
end
def trialling?(feature)
false
end
def hide_banners?
true
end
def show_banners?
false
end
def user_limit
nil
end
def non_trial_user_limit
nil
end
def trial_user_limit
nil
end
def banner_type_for(feature:)
nil
end
def get_user_limit_of(tokens)
nil
end
end
FAR_FUTURE_DATE = Date.new(9999, 1, 1)
def token_object
Class.new do
def id
"lmao"
end
def has_feature?(feature)
true
end
def will_expire?
false
end
def mail
"admin@example.com"
end
def subscriber
"markasoftware-free-enterprise-mode"
end
def company
"markasoftware"
end
def domain
"markasoftware.com"
end
def issued_at
Time.zone.today - 1
end
def starts_at
Time.zone.today - 1
end
def expires_at
Time.zone.today + 1
end
def reprieve_days
nil
end
def reprieve_days_left
69
end
def restrictions
nil
end
def available_features
EnterpriseToken.TRUE_FEATURES
end
def plan
"markasoftware_free_enterprise_mode"
end
def features
EnterpriseToken.TRUE_FEATURES
end
def version
69
end
def started?
true
end
def trial?
false
end
def active?
true
end
end.new
end
def id
"lmao"
end
def encoded_token
"oaml"
end
def will_expire?
false
end
def mail
"admin@example.com"
end
def subscriber
"markasoftware-free-enterprise-mode"
end
def company
"markasoftware"
end
def domain
"markasoftware.com"
end
def issued_at
Time.zone.today - 1
end
def starts_at
Time.zone.today - 1
end
def expires_at
Time.zone.today + 1
end
def reprieve_days
nil
end
def reprieve_days_left
69
end
def restrictions
nil
end
def available_features
EnterpriseToken.TRUE_FEATURES
end
def plan
"markasoftware_free_enterprise_mode"
end
def features
EnterpriseToken.TRUE_FEATURES
end
def version
69
end
def started?
true
end
def trial?
false
end
def active?
true
end
def allows_to?(action)
true
end
def expiring_soon?
false
end
def in_grace_period?
false
end
def expired?(reprieve: true)
false
end
def statuses
[]
end
def invalid_domain?
false
end
def unlimited_users?
true
end
def max_active_users
nil
end
def sort_key
[FAR_FUTURE_DATE, FAR_FUTURE_DATE]
end
def days_left
69
end
end

View File

@ -1,7 +0,0 @@
apiVersion: v2
name: traefik
version: 1.0.0
dependencies:
- name: traefik
version: 37.2.0
repository: https://traefik.github.io/charts

View File

@ -1,36 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: traefik
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps/traefik
helm:
releaseName: traefik
valueFiles:
- values.yaml
destination:
server: https://kubernetes.default.svc
namespace: traefik
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- PruneLast=true
- PrunePropagationPolicy=foreground
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

View File

@ -1,79 +0,0 @@
traefik:
global:
checkNewVersion: false
installCRDs: true
additionalArguments:
- "--entrypoints.web.forwardedHeaders.trustedIPs=192.168.1.2/32"
api:
dashboard: true
insecure: true
service:
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: "192.168.0.2"
providers:
kubernetesCRD:
enabled: true
allowCrossNamespace: true
kubernetesIngress:
enabled: false
kubernetesGateway:
enabled: true
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "500m"
memory: "512Mi"
deployment:
replicas: 2
metrics:
prometheus:
enabled: true
gateway:
listeners:
web:
namespacePolicy:
from: All
websecure:
port: 8443
protocol: HTTPS
namespacePolicy:
from: All
certificateRefs:
- name: wildcard-noxxos-nl-tls
kind: Secret
extraObjects:
- apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: traefik-dashboard
namespace: traefik
spec:
parentRefs:
- name: traefik-gateway
sectionName: websecure
hostnames:
- "traefik.noxxos.nl"
rules:
- matches:
- path: { type: PathPrefix, value: /dashboard/ }
- path: { type: PathPrefix, value: /api }
backendRefs:
- group: traefik.io
kind: TraefikService
name: api@internal

View File

@ -1,281 +0,0 @@
#!/bin/bash
# Kubernetes/Helm Configuration Validator
# Validates all applications without deploying them
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Counters
TOTAL=0
PASSED=0
FAILED=0
echo -e "${BLUE}=== Kubernetes Configuration Validator ===${NC}\n"
# Function to validate a Helm chart
validate_helm_chart() {
local app_path=$1
local app_name=$(basename "$app_path")
local namespace=$2
TOTAL=$((TOTAL + 1))
echo -e "${YELLOW}[$TOTAL] Validating: $app_name (namespace: $namespace)${NC}"
# Check if Chart.yaml exists
if [ ! -f "$app_path/Chart.yaml" ]; then
echo -e "${YELLOW} → Not a Helm chart - skipping Helm validation${NC}\n"
TOTAL=$((TOTAL - 1))
return 0
fi
# Check if dependencies are built (build to temp location if not)
local temp_dir=""
if [ -f "$app_path/Chart.yaml" ] && grep -q "dependencies:" "$app_path/Chart.yaml"; then
if [ ! -d "$app_path/charts" ]; then
echo " → Dependencies not built - building to temporary location..."
# Create temp directory
temp_dir=$(mktemp -d)
# Copy chart to temp location (remove trailing slash if present)
local clean_path="${app_path%/}"
cp -r "$clean_path" "$temp_dir/"
local temp_chart="$temp_dir/$(basename "$clean_path")"
# Build dependencies in temp location
if ! (cd "$temp_chart" && helm dependency build > /dev/null 2>&1); then
echo -e "${RED} ✗ Failed to build dependencies${NC}\n"
rm -rf "$temp_dir"
FAILED=$((FAILED + 1))
return 1
fi
# Use temp location for validation
app_path="$temp_chart"
fi
fi
# Lint the chart
echo " → Running Helm lint..."
if ! (cd "$app_path" && helm lint . 2>&1 | grep -q "0 chart(s) failed"); then
echo -e "${RED} ✗ Helm lint failed${NC}"
(cd "$app_path" && helm lint .)
echo ""
FAILED=$((FAILED + 1))
return 1
fi
# Template the chart
echo " → Rendering Helm templates..."
# Try rendering with validation first (redirect to temp file to avoid hanging on large output)
local temp_output=$(mktemp)
if (cd "$app_path" && helm template "$app_name" . --namespace "$namespace" --validate > "$temp_output" 2>&1); then
template_exit=0
else
template_exit=$?
fi
if [ $template_exit -ne 0 ]; then
# Check if it's just CRD validation warnings
if grep -Eqi "(no matches for kind|ensure CRDs are installed)" "$temp_output"; then
echo -e "${YELLOW} ⚠ Template validation skipped - requires CRDs to be installed${NC}"
# Still try to render without validation
if (cd "$app_path" && helm template "$app_name" . --namespace "$namespace" > /dev/null 2>&1); then
# Rendering works without validation, this is acceptable
rm -f "$temp_output"
# Continue with other checks...
else
echo -e "${RED} ✗ Helm template rendering failed${NC}"
head -20 "$temp_output"
echo ""
rm -f "$temp_output"
FAILED=$((FAILED + 1))
return 1
fi
elif grep -qi "exists and cannot be imported into the current release" "$temp_output"; then
echo -e "${YELLOW} ⚠ Resource ownership validation skipped - resources may already exist in cluster${NC}"
# This is expected when resources already exist, try without validation
if (cd "$app_path" && helm template "$app_name" . --namespace "$namespace" > /dev/null 2>&1); then
rm -f "$temp_output"
# Continue with other checks...
else
echo -e "${RED} ✗ Helm template rendering failed${NC}"
head -20 "$temp_output"
echo ""
rm -f "$temp_output"
FAILED=$((FAILED + 1))
return 1
fi
else
echo -e "${RED} ✗ Helm template failed${NC}"
head -20 "$temp_output"
echo ""
rm -f "$temp_output"
FAILED=$((FAILED + 1))
return 1
fi
fi
rm -f "$temp_output"
# Validate with kubeval (if installed)
if command -v kubeval &> /dev/null; then
echo " → Validating manifests with kubeval..."
if ! (cd "$app_path" && helm template "$app_name" . --namespace "$namespace" | kubeval --ignore-missing-schemas > /dev/null 2>&1); then
echo -e "${YELLOW} ⚠ Kubeval warnings (may be acceptable)${NC}"
fi
fi
# Check for common issues
echo " → Checking for common issues..."
local rendered=$(cd "$app_path" && helm template "$app_name" . --namespace "$namespace" 2>&1)
# Check for placeholder secrets
if echo "$rendered" | grep -qi "changeme\|placeholder\|CHANGE_ME\|TODO"; then
echo -e "${YELLOW} ⚠ Warning: Found placeholder values (changeme/placeholder/TODO)${NC}"
fi
# Check for resource requests/limits
if ! echo "$rendered" | grep -q "resources:"; then
echo -e "${YELLOW} ⚠ Warning: No resource requests/limits found${NC}"
fi
# Cleanup temp directory if created
if [ -n "$temp_dir" ] && [ -d "$temp_dir" ]; then
rm -rf "$temp_dir"
fi
echo -e "${GREEN} ✓ Validation passed${NC}\n"
PASSED=$((PASSED + 1))
return 0
}
# Function to validate an ArgoCD Application manifest
validate_argocd_app() {
local app_file=$1
local app_name=$(basename "$(dirname "$app_file")")
TOTAL=$((TOTAL + 1))
echo -e "${YELLOW}[$TOTAL] Validating ArgoCD Application: $app_name${NC}"
# Check YAML syntax using yq or basic validation
if command -v yq &> /dev/null; then
if ! yq eval '.' "$app_file" > /dev/null 2>&1; then
echo -e "${RED} ✗ Invalid YAML syntax${NC}\n"
FAILED=$((FAILED + 1))
return 1
fi
elif ! grep -q "^apiVersion:" "$app_file"; then
echo -e "${RED} ✗ Invalid YAML - missing apiVersion${NC}\n"
FAILED=$((FAILED + 1))
return 1
fi
# Check for required fields
local missing_fields=()
grep -q "kind: Application" "$app_file" || missing_fields+=("kind: Application")
grep -q "metadata:" "$app_file" || missing_fields+=("metadata")
grep -q "spec:" "$app_file" || missing_fields+=("spec")
grep -q "source:" "$app_file" || missing_fields+=("source")
grep -q "destination:" "$app_file" || missing_fields+=("destination")
if [ ${#missing_fields[@]} -gt 0 ]; then
echo -e "${RED} ✗ Missing required fields: ${missing_fields[*]}${NC}\n"
FAILED=$((FAILED + 1))
return 1
fi
echo -e "${GREEN} ✓ Validation passed${NC}\n"
PASSED=$((PASSED + 1))
return 0
}
# Main validation flow
echo -e "${BLUE}Validating Monitoring Stack...${NC}\n"
# Thanos
if [ -d "monitoring/thanos" ]; then
validate_helm_chart "monitoring/thanos" "monitoring"
validate_argocd_app "monitoring/thanos/application.yaml"
fi
# Prometheus
if [ -d "monitoring/prometheus" ]; then
validate_helm_chart "monitoring/prometheus" "monitoring"
validate_argocd_app "monitoring/prometheus/application.yaml"
fi
# Grafana
if [ -d "monitoring/grafana" ]; then
validate_helm_chart "monitoring/grafana" "monitoring"
validate_argocd_app "monitoring/grafana/application.yaml"
fi
echo -e "${BLUE}Validating Logging Stack...${NC}\n"
# Loki
if [ -d "logging/loki" ]; then
validate_helm_chart "logging/loki" "logging"
validate_argocd_app "logging/loki/application.yaml"
fi
# Promtail
if [ -d "logging/promtail" ]; then
validate_helm_chart "logging/promtail" "logging"
validate_argocd_app "logging/promtail/application.yaml"
fi
# Additional apps (if they exist)
echo -e "${BLUE}Validating Other Applications...${NC}\n"
for app_dir in */; do
# Skip special directories
if [[ "$app_dir" == "monitoring/" ]] || [[ "$app_dir" == "logging/" ]]; then
continue
fi
# Check if it's a Helm chart
if [ -f "$app_dir/Chart.yaml" ] && [ -f "$app_dir/application.yaml" ]; then
app_name=$(basename "$app_dir")
# Try to extract namespace from application.yaml
namespace=$(grep -A 10 "destination:" "$app_dir/application.yaml" | grep "namespace:" | head -1 | awk '{print $2}')
[ -z "$namespace" ] && namespace="default"
validate_helm_chart "$app_dir" "$namespace"
validate_argocd_app "$app_dir/application.yaml"
fi
# Check for nested charts (like ceph/operator, ceph/cluster)
for nested_dir in "$app_dir"*/; do
if [ -f "$nested_dir/Chart.yaml" ] && [ -f "$nested_dir/application.yaml" ]; then
nested_name=$(basename "$nested_dir")
# Try to extract namespace from application.yaml
namespace=$(grep -A 10 "destination:" "$nested_dir/application.yaml" | grep "namespace:" | head -1 | awk '{print $2}')
[ -z "$namespace" ] && namespace="default"
validate_helm_chart "$nested_dir" "$namespace"
validate_argocd_app "$nested_dir/application.yaml"
fi
done
done
# Summary
echo -e "${BLUE}=== Validation Summary ===${NC}"
echo -e "Total checks: $TOTAL"
echo -e "${GREEN}Passed: $PASSED${NC}"
echo -e "${RED}Failed: $FAILED${NC}\n"
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}✓ All validations passed!${NC}"
exit 0
else
echo -e "${RED}✗ Some validations failed. Please review the errors above.${NC}"
exit 1
fi

19
argocd-ingress.yaml Normal file
View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: argocd-ingress
namespace: argocd
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
rules:
- host: argocd.noxxos.nl
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: argocd-server
port:
name: http

View File

@ -15,7 +15,7 @@ metadata:
namespace: kube-system
spec:
blocks:
- cidr: "192.168.0.2/32"
- cidr: "192.168.0.1/32"
serviceSelector:
matchLabels:
io.kubernetes.service.namespace: "traefik"

View File

@ -1,57 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
echo "Starting Cilium installation..."
# Add Cilium Helm repository
echo "Adding Cilium Helm repository..."
helm repo add cilium https://helm.cilium.io/
helm repo update
# Install Cilium
echo "Installing Cilium..."
helm upgrade --install \
cilium \
cilium/cilium \
--version 1.18.3 \
--namespace kube-system \
--create-namespace \
--values "$(dirname "$0")/values.yaml" \
--wait
# Wait for Cilium to be ready
echo "Waiting for Cilium DaemonSet to be ready..."
kubectl rollout status daemonset/cilium -n kube-system --timeout=300s
# Wait for Hubble components if enabled
echo "Waiting for Hubble components..."
kubectl rollout status deployment/hubble-relay -n kube-system --timeout=300s
kubectl rollout status deployment/hubble-ui -n kube-system --timeout=300s
# Apply post-install configurations if any exist
if [ -d "$(dirname "$0")/post-install" ]; then
echo "Applying post-install configurations..."
kubectl apply --recursive -f "$(dirname "$0")/post-install/"
fi
echo "Checking Cilium status..."
if command -v cilium &> /dev/null; then
cilium status
else
echo "Cilium CLI not found. To install:"
echo "brew install cilium-cli"
fi
echo
echo "Installation complete!"
echo
echo "To access Hubble UI:"
echo "1. Run port-forward:"
echo " kubectl port-forward -n kube-system svc/hubble-ui 12000:80"
echo "2. Visit: http://localhost:12000"
echo
echo "To verify installation:"
echo "1. Check pod status: kubectl get pods -n kube-system -l k8s-app=cilium"
echo "2. Check Hubble UI: kubectl get deployment -n kube-system hubble-ui"
echo "3. Install Cilium CLI: brew install cilium-cli"

View File

@ -1,75 +0,0 @@
# Cilium Component
## Overview
Cilium is our CNI (Container Network Interface) solution that provides networking, security, and observability for Kubernetes using eBPF.
## Configuration
The following configurations are available:
- Version: 1.18.3
- IPAM Mode: kubernetes
- Hubble UI: Enabled
- L2 Announcements: Enabled
- kube-proxy Replacement: Enabled
## Features
- **Hubble UI**: Web interface for network observability
- **L2 Announcements**: For LoadBalancer service type support
- **Enhanced Security**: Using eBPF for network policy enforcement
- **Kube-proxy Replacement**: Native handling of service load-balancing
## Post-Install
After installation:
1. Cilium core components will be installed
2. Hubble UI and Relay will be deployed
3. LoadBalancer IP pools will be configured
4. Initial access to Hubble UI is available through port-forward:
```bash
kubectl port-forward -n kube-system svc/hubble-ui 12000:80
```
Then visit: `http://localhost:12000`
## Dependencies
- Kubernetes cluster
- Helm v3+
- Linux kernel >= 4.9.17
## Troubleshooting
1. Check if Cilium pods are running:
```bash
kubectl get pods -n kube-system -l k8s-app=cilium
```
2. Check Cilium status (requires Cilium CLI):
```bash
cilium status
```
3. Check Hubble UI deployment:
```bash
kubectl get deployment -n kube-system hubble-ui
```
4. View Cilium logs:
```bash
kubectl logs -n kube-system -l k8s-app=cilium
```
To install Cilium CLI:
```bash
brew install cilium-cli
```

View File

@ -1,45 +0,0 @@
ipam:
mode: kubernetes
hubble:
relay:
enabled: true
ui:
enabled: true
ingress:
enabled: true
className: traefik
hosts:
- hubble.noxxos.nl
l2announcements:
enabled: true
kubeProxyReplacement: true
securityContext:
capabilities:
ciliumAgent:
- CHOWN
- KILL
- NET_ADMIN
- NET_RAW
- IPC_LOCK
- SYS_ADMIN
- SYS_RESOURCE
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE
cgroup:
autoMount:
enabled: false
hostRoot: /sys/fs/cgroup
k8sServiceHost: localhost
k8sServicePort: 7445

View File

@ -1,51 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
echo "Starting ArgoCD installation..."
# Add Argo CD Helm repository
echo "Adding Argo CD Helm repository..."
helm repo add argo https://argoproj.github.io/argo-helm
helm repo update
# Install Argo CD
echo "Installing Argo CD..."
helm upgrade --install \
argocd \
argo/argo-cd \
--namespace argocd \
--create-namespace \
--version 9.1.0 \
--values "$(dirname "$0")/values.yaml" \
--wait
# Wait for the Argo CD server to be ready
echo "Waiting for Argo CD server to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/argocd-server -n argocd
# Apply post-install configurations if they exist
if [ -n "$(find "$(dirname "$0")/post-install" -type f \( -name '*.yaml' -o -name '*.yml' -o -name '*.json' \) 2>/dev/null)" ]; then
echo "Applying post-install configurations..."
kubectl apply --recursive -f "$(dirname "$0")/post-install/"
fi
# Get the initial admin password
echo
echo "Initial admin password:"
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
echo
echo
echo "ArgoCD installation complete!"
echo
echo "Temporary access (until ingress is ready):"
echo "1. Run: kubectl port-forward svc/argocd-server -n argocd 8080:443"
echo "2. Open: https://localhost:8080"
echo
echo "Credentials:"
echo " Username: admin"
echo " Password: (shown above)"
echo
echo "Once Traefik ingress is running, access ArgoCD at:"
echo " https://argocd.noxxos.nl"

View File

@ -1,33 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: root
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://git.mvzijl.nl/marco/veda.git
targetRevision: applicationset-rewrite
path: apps
directory:
recurse: true
include: '{*/application.yaml,*/application.yml}' # Only Application manifests
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- PruneLast=true
- PrunePropagationPolicy=foreground
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

View File

@ -1,49 +0,0 @@
# ArgoCD Component
## Overview
ArgoCD is our GitOps continuous delivery tool for Kubernetes.
## Configuration
The following configurations are available:
- Domain: argocd.noxxos.nl
- Ingress: Enabled with Traefik
- Version: 9.1.0
## Post-Install
After installation:
1. The admin password will be displayed
2. Initial access is available through port-forward:
```bash
kubectl port-forward svc/argocd-server -n argocd 8080:443
```
Then visit: `https://localhost:8080`
3. Once Traefik is running, access through ingress will be available
## Dependencies
- Kubernetes cluster
- Helm v3+
- Traefik (for ingress)
## Troubleshooting
If you can't access ArgoCD:
1. Check if the pods are running:
```bash
kubectl get pods -n argocd
```
2. Check ingress status
```bash
kubectl get ingress -n argocd
````

View File

@ -1,6 +0,0 @@
global:
domain: argocd.noxxos.nl
server:
ingress:
enabled: false

View File

@ -1,62 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# Get the directory where the script is located
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMPONENTS_DIR="$(cd "${SCRIPT_DIR}/../components" && pwd)"
# Function to check prerequisites
check_prerequisites() {
echo "Checking prerequisites..."
command -v kubectl >/dev/null 2>&1 || { echo "kubectl is required but not installed"; exit 1; }
command -v helm >/dev/null 2>&1 || { echo "helm is required but not installed"; exit 1; }
# Check if we can connect to the cluster
kubectl cluster-info >/dev/null 2>&1 || { echo "Cannot connect to Kubernetes cluster"; exit 1; }
}
# Function to install a component
install_component() {
local component_dir=$1
local component_name=$(basename "${component_dir}")
echo
echo "================================================================"
echo "Installing component: ${component_name}"
echo "================================================================"
if [[ -f "${component_dir}/install.sh" ]]; then
bash "${component_dir}/install.sh"
else
echo "No install.sh found for ${component_name}, skipping..."
fi
}
# Main installation process
main() {
echo "Starting platform installation..."
echo
# Check prerequisites
check_prerequisites
# Get all component directories in order
components=($(find "${COMPONENTS_DIR}" -maxdepth 1 -mindepth 1 -type d | sort))
# Install each component
for component in "${components[@]}"; do
install_component "${component}"
done
echo
echo "================================================================"
echo "Platform installation complete!"
echo "================================================================"
echo
echo "To validate the installation, run:"
echo " ./validate.sh"
}
# Run main function
main "$@"

View File

@ -1,116 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# Get the directory where the script is located
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMPONENTS_DIR="$(cd "${SCRIPT_DIR}/../components" && pwd)"
# Colors for output
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Function to check if a deployment is ready
check_deployment() {
local namespace=$1
local deployment=$2
local description=$3
echo -n "Checking ${description}... "
if kubectl get deployment -n "${namespace}" "${deployment}" >/dev/null 2>&1; then
if kubectl wait --for=condition=available --timeout=5s deployment/"${deployment}" -n "${namespace}" >/dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}Not Ready${NC}"
return 1
fi
else
echo -e "${RED}Not Found${NC}"
return 1
fi
}
# Function to check post-install manifests
check_post_install() {
local component_dir=$1
local description=$2
echo -n "Checking ${description} post-install configurations... "
if [ -n "$(find "${component_dir}/post-install" -type f \( -name '*.yaml' -o -name '*.yml' -o -name '*.json' \) 2>/dev/null)" ]; then
if kubectl diff -f "${component_dir}/post-install/" >/dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}Out of sync${NC}"
return 1
fi
else
echo -e "${GREEN}No post-install configs${NC}"
return 0
fi
}
# Function to check if a daemon set is ready
check_daemonset() {
local namespace=$1
local daemonset=$2
local description=$3
echo -n "Checking ${description}... "
if kubectl get daemonset -n "${namespace}" "${daemonset}" >/dev/null 2>&1; then
if kubectl rollout status daemonset/"${daemonset}" -n "${namespace}" --timeout=5s >/dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}Not Ready${NC}"
return 1
fi
else
echo -e "${RED}Not Found${NC}"
return 1
fi
}
# Main validation process
main() {
local errors=0
echo "Validating platform components..."
echo
# Validate Cilium
echo "Checking Cilium components:"
check_daemonset kube-system cilium "Cilium CNI" || ((errors++))
check_deployment kube-system hubble-relay "Hubble Relay" || ((errors++))
check_deployment kube-system hubble-ui "Hubble UI" || ((errors++))
check_post_install "${COMPONENTS_DIR}/01-cilium" "Cilium" || ((errors++))
echo
# Validate ArgoCD
echo "Checking ArgoCD components:"
check_deployment argocd argocd-server "ArgoCD Server" || ((errors++))
check_deployment argocd argocd-repo-server "ArgoCD Repo Server" || ((errors++))
check_deployment argocd argocd-applicationset-controller "ArgoCD ApplicationSet Controller" || ((errors++))
check_post_install "${COMPONENTS_DIR}/02-argocd" "ArgoCD" || ((errors++))
echo
# Summary
echo "================================================================"
if [ "${errors}" -eq 0 ]; then
echo -e "${GREEN}All components are running correctly!${NC}"
exit 0
else
echo -e "${RED}Found ${errors} component(s) with issues${NC}"
echo "Check the component logs for more details:"
echo " kubectl logs -n <namespace> deployment/<deployment-name>"
exit 1
fi
}
# Run main function
main "$@"

View File

@ -82,10 +82,11 @@ talosctl gen config \
--output-types controlplane \
--with-secrets secrets.yaml \
--config-patch @nodes/master1.yaml \
--config-patch @patches/network.yaml \
--config-patch @patches/argocd.yaml \
--config-patch @patches/cilium.yaml \
--config-patch @patches/scheduling.yaml \
--config-patch @patches/discovery.yaml \
--config-patch @patches/diskselector.yaml \
--config-patch @patches/disk.yaml \
--config-patch @patches/vip.yaml \
--config-patch @patches/metrics.yaml \
--config-patch @patches/hostpath.yaml \
@ -101,7 +102,8 @@ talosctl gen config \
--output-types worker \
--with-secrets secrets.yaml \
--config-patch @nodes/worker1.yaml \
--config-patch @patches/network.yaml \
--config-patch @patches/argocd.yaml \
--config-patch @patches/cilium.yaml \
--config-patch @patches/scheduling.yaml \
--config-patch @patches/discovery.yaml \
--config-patch @patches/diskselector.yaml \
@ -139,24 +141,12 @@ Finally, retrieve the kubeconfig, it will merge with `~/.kube/config`, if it exi
talosctl -n 192.168.0.10 kubeconfig
```
Check nodes, note the NotReady status, since the Cilium CNI is not running yet:
Check nodes:
```bash
kubectl get nodes
```
Install the Gateway API:
```bash
kubectl apply --server-side -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.4.0/standard-install.yaml
```
Install Cilium:
```bash
bash scripts/cilium.sh
```
## TODO
- Remove secrets from config

26678
talos/patches/argocd.yaml Normal file

File diff suppressed because it is too large Load Diff

37
talos/patches/cilium.sh Normal file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
cat > template.yaml << 'EOF'
cluster:
network:
cni:
name: none
proxy:
disabled: true
inlineManifests:
- name: cilium
contents: |
__CILIUM_MANIFEST__
EOF
helm repo add cilium https://helm.cilium.io/
helm template \
cilium \
cilium/cilium \
--version 1.17.3 \
--namespace kube-system \
--set ipam.mode=kubernetes \
--set hubble.relay.enabled=true \
--set hubble.ui.enabled=true \
--set l2announcements.enabled=true \
--set kubeProxyReplacement=true \
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
--set cgroup.autoMount.enabled=false \
--set cgroup.hostRoot=/sys/fs/cgroup \
--set k8sServiceHost=localhost \
--set k8sServicePort=7445 | sed 's/^/ /' > manifest.tmp
sed -e '/__CILIUM_MANIFEST__/r manifest.tmp' -e '/__CILIUM_MANIFEST__/d' template.yaml > cilium.yaml
rm manifest.tmp
rm template.yaml

2003
talos/patches/cilium.yaml Normal file

File diff suppressed because one or more lines are too long

View File

@ -3,21 +3,18 @@ machine:
mirrors:
docker.io:
endpoints:
- https://harbor.noxxos.nl/v2/proxy-docker.io
- http://harbor.noxxos.nl/v2/proxy-docker.io
- https://registry-1.docker.io
overridePath: true
ghcr.io:
endpoints:
- https://harbor.noxxos.nl/v2/proxy-ghcr.io
- https://ghcr.io
- http://harbor.noxxos.nl/v2/proxy-ghcr.io
overridePath: true
gcr.io:
endpoints:
- https://harbor.noxxos.nl/v2/proxy-gcr.io
- https://gcr.io
- http://harbor.noxxos.nl/v2/proxy-gcr.io
overridePath: true
registry.k8s.io:
endpoints:
- https://harbor.noxxos.nl/v2/proxy-registry.k8s.io
- https://registry.k8s.io
- http://harbor.noxxos.nl/v2/proxy-registry.k8s.io
overridePath: true

View File

@ -1,6 +0,0 @@
cluster:
network:
cni:
name: none
proxy:
disabled: true