From 9d626b45d13e256bbfdaff2725f3f3c4311323d4 Mon Sep 17 00:00:00 2001 From: Marco van Zijl Date: Sun, 9 Nov 2025 08:43:49 +0100 Subject: [PATCH] Add application for Local Path Provisioner --- apps/local-path-provisioner/README.md | 413 +++++++++++++++++++ apps/local-path-provisioner/application.yaml | 59 +++ 2 files changed, 472 insertions(+) create mode 100644 apps/local-path-provisioner/README.md create mode 100644 apps/local-path-provisioner/application.yaml diff --git a/apps/local-path-provisioner/README.md b/apps/local-path-provisioner/README.md new file mode 100644 index 0000000..4a60eac --- /dev/null +++ b/apps/local-path-provisioner/README.md @@ -0,0 +1,413 @@ +# Local Path Provisioner + +## Overview + +The Local Path Provisioner provides local storage using hostPath volumes. It's useful for: +- Testing and development +- Stateful workloads that don't require high availability +- Single-node scenarios +- Temporary storage needs + +**Important**: This is NOT recommended for production workloads that require: +- High availability +- Data replication +- Cross-node pod mobility +- Disaster recovery + +For production, use Ceph block storage (`ceph-block` StorageClass) instead. + +## Current Configuration + +- **Namespace**: `local-path-storage` +- **Storage Path**: `/var/mnt/local-path-provisioner` (Talos-compatible path) +- **Default StorageClass**: `false` (Ceph is the default) +- **Pod Security**: `privileged` (required for hostPath access) +- **Sync Wave**: `-2` (deploys before storage operators) + +## When to Use Local Path vs Ceph + +### Use Local Path For: +✅ **Development/Testing** +- Quick pod restarts on the same node +- No network overhead +- Fast I/O for local development + +✅ **Node-Specific Data** +- Logs collection +- Monitoring agents data +- Cache that can be rebuilt + +✅ **Temporary Storage** +- Build artifacts +- Scratch space +- Non-critical data + +### Use Ceph Block Storage For: +✅ **Production Databases** +- PostgreSQL, MySQL, MongoDB +- Requires replication and HA + +✅ **Stateful Applications** +- When pods need to move between nodes +- Data must survive node failures + +✅ **Critical Data** +- Persistent volumes that need backups +- Data requiring disaster recovery + +✅ **Multi-Replica Apps** +- ReadWriteOnce volumes that may reschedule + +## Architecture & Limitations + +``` +┌─────────────────────────────────────────┐ +│ Node 1 │ +│ ┌─────────────────────────────────┐ │ +│ │ /var/mnt/local-path-provisioner │ │ +│ │ └── pvc-abc123/ │ │ +│ │ └── data │ │ +│ └─────────────────────────────────┘ │ +│ ▲ │ +│ │ hostPath mount │ +│ ┌──────┴──────┐ │ +│ │ Pod (fixed) │ │ +│ └─────────────┘ │ +└─────────────────────────────────────────┘ + +⚠️ If pod moves to Node 2, data is NOT accessible! +``` + +### Key Limitations: + +1. **Node Affinity**: Pods are pinned to the node where PVC was created +2. **No Replication**: Data exists only on one node +3. **No HA**: If node fails, data is inaccessible until node recovers +4. **No Migration**: Cannot move volumes between nodes +5. **Disk Space**: Limited by node's local disk capacity + +## Storage Classes Comparison + +| Feature | local-path | ceph-block | ceph-filesystem | +|---------|-----------|------------|-----------------| +| **HA** | ❌ No | ✅ Yes | ✅ Yes | +| **Replication** | ❌ No | ✅ Yes (2x) | ✅ Yes (2x) | +| **Multi-node** | ❌ No | ✅ Yes (RWO) | ✅ Yes (RWX) | +| **Performance** | ⚡ Fast | 📊 Medium | 📊 Medium | +| **Snapshots** | ❌ No | ✅ Yes | ✅ Yes | +| **Resize** | ⚠️ Manual | ✅ Auto | ✅ Auto | +| **Backup** | ❌ Difficult | ✅ Velero | ✅ Velero | +| **Use Case** | Dev/Test | Production | Shared Data | + +## Usage Examples + +### Basic PVC + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: local-test-pvc + namespace: default +spec: + storageClassName: local-path + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi +``` + +### StatefulSet with Local Path + +```yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-app +spec: + serviceName: test-app + replicas: 1 # ⚠️ Keep at 1 for local-path + selector: + matchLabels: + app: test-app + volumeClaimTemplates: + - metadata: + name: data + spec: + storageClassName: local-path + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + template: + metadata: + labels: + app: test-app + spec: + containers: + - name: app + image: nginx + volumeMounts: + - name: data + mountPath: /data +``` + +### Pod with Explicit Node Affinity + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: local-path-test +spec: + # Pin to specific node where PVC exists + nodeSelector: + kubernetes.io/hostname: worker1 + + volumes: + - name: data + persistentVolumeClaim: + claimName: local-test-pvc + + containers: + - name: test + image: busybox + command: ["sh", "-c", "sleep 3600"] + volumeMounts: + - name: data + mountPath: /data +``` + +## Recommendations + +### 1. Storage Path Configuration ✅ + +Current path `/var/mnt/local-path-provisioner` is correct for Talos Linux: +- Talos only allows persistent storage in `/var` +- This path persists across reboots +- Properly configured in your setup + +### 2. Not Default StorageClass ✅ + +Good decision to keep `ceph-block` as default: +```yaml +storageclass.kubernetes.io/is-default-class: "false" +``` + +This ensures: +- PVCs without explicit `storageClassName` use Ceph +- Production workloads default to HA storage +- Local path is opt-in only + +### 3. Use Cases for Your Cluster + +**Good Uses:** +```yaml +# Development namespace with local-path default +apiVersion: v1 +kind: Namespace +metadata: + name: dev-test + annotations: + # All PVCs in this namespace can use local-path + storageclass.kubernetes.io/is-default-class: "local-path" + +--- +# Prometheus node exporter temp storage +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: node-exporter-data +spec: + storageClassName: local-path + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 5Gi +``` + +**Bad Uses (Use Ceph Instead):** +```yaml +# ❌ Don't do this - use ceph-block +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres-data # Database needs HA! +spec: + storageClassName: local-path # ❌ Wrong choice + +# ✅ Do this instead +spec: + storageClassName: ceph-block # ✅ Right choice +``` + +### 4. Monitoring Storage Usage + +```bash +# Check PVs and their node +kubectl get pv -o custom-columns=NAME:.metadata.name,STORAGECLASS:.spec.storageClassName,NODE:.spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0],SIZE:.spec.capacity.storage + +# Check disk usage on nodes +talosctl -n df | grep local-path-provisioner + +# List all local-path PVCs +kubectl get pvc -A --field-selector spec.storageClassName=local-path +``` + +### 5. Migration Strategy + +If you need to migrate from local-path to Ceph: + +```bash +# 1. Create new PVC with ceph-block +kubectl apply -f - < -n + +# Cleanup orphaned directories on nodes (if needed) +talosctl -n ls /var/mnt/local-path-provisioner +``` + +### 7. Resource Limits + +Consider node disk capacity: + +```bash +# Check available space on each node +for node in $(kubectl get nodes -o name | cut -d/ -f2); do + echo "=== $node ===" + talosctl -n $node df | grep "/$" +done + +# Set PVC size limits based on node capacity +# Example: If node has 100GB free, don't create PVCs > 50GB +``` + +## Troubleshooting + +### PVC Stuck in Pending + +```bash +# Check events +kubectl describe pvc -n + +# Common causes: +# 1. No nodes have enough disk space +# 2. Provisioner pod not running +kubectl get pods -n local-path-storage +``` + +### Pod Can't Mount Volume + +```bash +# Check if pod is on same node as PV +kubectl get pod -o jsonpath='{.spec.nodeName}' +kubectl get pv -o jsonpath='{.spec.nodeAffinity}' + +# If different nodes, delete and recreate pod +# (pod will reschedule to correct node) +``` + +### Disk Space Issues + +```bash +# Check node disk usage +kubectl get nodes -o custom-columns=NAME:.metadata.name,STORAGE:.status.allocatable.ephemeral-storage + +# Free up space by deleting old PVCs +kubectl get pvc -A --sort-by=.metadata.creationTimestamp +``` + +## Best Practices + +1. ✅ **Use for ephemeral workloads only** - Anything that can afford data loss +2. ✅ **Set PVC size limits** - Prevent filling up node disks +3. ✅ **Monitor disk usage** - Set up alerts for node disk space +4. ✅ **Document dependencies** - Note which apps use local-path and why +5. ✅ **Plan for migration** - Have a strategy to move to Ceph if needed +6. ❌ **Don't use for databases** - Use ceph-block instead +7. ❌ **Don't use for multi-replica apps** - Pods will be pinned to one node +8. ❌ **Don't assume HA** - Data is lost if node fails + +## Decision Tree + +``` +Need persistent storage? + │ + ├─ Is data critical? ──→ YES ──→ Use ceph-block + │ + ├─ Need HA/replication? ──→ YES ──→ Use ceph-block + │ + ├─ Pod needs to move between nodes? ──→ YES ──→ Use ceph-block + │ + ├─ Multiple replicas need same data? ──→ YES ──→ Use ceph-filesystem + │ + ├─ Development/testing only? ──→ YES ──→ Consider local-path + │ + └─ Temporary/cache data? ──→ YES ──→ Consider local-path or emptyDir +``` + +## Summary + +Local path provisioner is included in your cluster but should be used sparingly: + +✅ **When to use**: Dev/test, non-critical data, node-specific storage +❌ **When NOT to use**: Production databases, HA apps, critical data + +Your configuration is correct: +- Talos-compatible path ✅ +- Not default StorageClass ✅ +- Privileged pod security ✅ +- Automated prune enabled ✅ + +For most production workloads, stick with `ceph-block` (your default). diff --git a/apps/local-path-provisioner/application.yaml b/apps/local-path-provisioner/application.yaml new file mode 100644 index 0000000..024f865 --- /dev/null +++ b/apps/local-path-provisioner/application.yaml @@ -0,0 +1,59 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: local-path-provisioner + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "-2" + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/rancher/local-path-provisioner.git + targetRevision: v0.0.32 + path: deploy + kustomize: + patches: + # Configure storage path for Talos + - target: + kind: ConfigMap + name: local-path-config + patch: |- + - op: replace + path: /data/config.json + value: |- + { + "nodePathMap":[ + { + "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES", + "paths":["/var/mnt/local-path-provisioner"] + } + ] + } + + # Don't set as default StorageClass (Ceph is default) + - target: + kind: StorageClass + name: local-path + patch: |- + - op: replace + path: /metadata/annotations/storageclass.kubernetes.io~1is-default-class + value: "false" + + destination: + server: https://kubernetes.default.svc + namespace: local-path-storage + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + managedNamespaceMetadata: + labels: + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged