diff --git a/api/v1/objectstore_types.go b/api/v1/objectstore_types.go index 0db706a..171eb61 100644 --- a/api/v1/objectstore_types.go +++ b/api/v1/objectstore_types.go @@ -22,6 +22,35 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// ProbeConfig holds configuration for probe timing and thresholds +// This is a subset of the corev1.Probe type, with only the fields that we want to expose as configuration. +type ProbeConfig struct { + // InitialDelaySeconds is the number of seconds after the container has started before startup probes are initiated. + // +kubebuilder:default:=0 + // +optional + InitialDelaySeconds int32 `json:"initialDelaySeconds,omitempty"` + + // TimeoutSeconds is the number of seconds after which the probe times out. + // +kubebuilder:default:=10 + // +optional + TimeoutSeconds int32 `json:"timeoutSeconds,omitempty"` + + // PeriodSeconds is how often (in seconds) to perform the probe. + // +kubebuilder:default:=10 + // +optional + PeriodSeconds int32 `json:"periodSeconds,omitempty"` + + // SuccessThreshold is the minimum consecutive successes for the probe to be considered successful. + // +kubebuilder:default:=1 + // +optional + SuccessThreshold int32 `json:"successThreshold,omitempty"` + + // FailureThreshold is the minimum consecutive failures for the probe to be considered failed. + // +kubebuilder:default:=10 + // +optional + FailureThreshold int32 `json:"failureThreshold,omitempty"` +} + // InstanceSidecarConfiguration defines the configuration for the sidecar that runs in the instance pods. type InstanceSidecarConfiguration struct { // The environment to be explicitly passed to the sidecar @@ -37,6 +66,10 @@ type InstanceSidecarConfiguration struct { // Resources define cpu/memory requests and limits for the sidecar that runs in the instance pods. // +optional Resources corev1.ResourceRequirements `json:"resources,omitempty"` + + // StartupProbe defines the configuration for the startup probe of the sidecar container. + // +optional + StartupProbe *ProbeConfig `json:"startupProbe,omitempty"` } // ObjectStoreSpec defines the desired state of ObjectStore. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 1f92d88..031dcbb 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -36,6 +36,11 @@ func (in *InstanceSidecarConfiguration) DeepCopyInto(out *InstanceSidecarConfigu } } in.Resources.DeepCopyInto(&out.Resources) + if in.StartupProbe != nil { + in, out := &in.StartupProbe, &out.StartupProbe + *out = new(ProbeConfig) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceSidecarConfiguration. @@ -146,6 +151,21 @@ func (in *ObjectStoreStatus) DeepCopy() *ObjectStoreStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProbeConfig) DeepCopyInto(out *ProbeConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProbeConfig. +func (in *ProbeConfig) DeepCopy() *ProbeConfig { + if in == nil { + return nil + } + out := new(ProbeConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RecoveryWindow) DeepCopyInto(out *RecoveryWindow) { *out = *in diff --git a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml index be1348d..c4a9c1f 100644 --- a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml +++ b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml @@ -577,6 +577,42 @@ spec: The retentionCheckInterval defines the frequency at which the system checks and enforces retention policies. type: integer + startupProbe: + description: StartupProbe defines the configuration for the startup + probe of the sidecar container. + properties: + failureThreshold: + default: 10 + description: FailureThreshold is the minimum consecutive failures + for the probe to be considered failed. + format: int32 + type: integer + initialDelaySeconds: + default: 0 + description: InitialDelaySeconds is the number of seconds + after the container has started before startup probes are + initiated. + format: int32 + type: integer + periodSeconds: + default: 10 + description: PeriodSeconds is how often (in seconds) to perform + the probe. + format: int32 + type: integer + successThreshold: + default: 1 + description: SuccessThreshold is the minimum consecutive successes + for the probe to be considered successful. + format: int32 + type: integer + timeoutSeconds: + default: 10 + description: TimeoutSeconds is the number of seconds after + which the probe times out. + format: int32 + type: integer + type: object type: object retentionPolicy: description: |- diff --git a/hack/examples/minio-store.yaml b/hack/examples/minio-store.yaml index de47ea7..b36fb24 100644 --- a/hack/examples/minio-store.yaml +++ b/hack/examples/minio-store.yaml @@ -13,6 +13,12 @@ spec: limits: memory: "512Mi" cpu: "500m" + startupProbe: + initialDelaySeconds: 1 + timeoutSeconds: 10 + periodSeconds: 1 + failureThreshold: 10 + successThreshold: 1 configuration: endpointCA: name: minio-server-tls diff --git a/internal/cnpgi/operator/config/config.go b/internal/cnpgi/operator/config/config.go index 53a8e67..f040aee 100644 --- a/internal/cnpgi/operator/config/config.go +++ b/internal/cnpgi/operator/config/config.go @@ -1,7 +1,6 @@ package config import ( - "strconv" "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" @@ -58,29 +57,6 @@ type PluginConfiguration struct { ReplicaSourceBarmanObjectName string ReplicaSourceServerName string - - // Probe configuration - StartupProbeConfig *ProbeConfig -} - -// ProbeConfig holds configuration for Kubernetes probes -type ProbeConfig struct { - InitialDelaySeconds int32 - TimeoutSeconds int32 - PeriodSeconds int32 - FailureThreshold int32 - SuccessThreshold int32 -} - -// DefaultProbeConfig returns the default probe configuration -func DefaultProbeConfig() *ProbeConfig { - return &ProbeConfig{ - InitialDelaySeconds: 0, - TimeoutSeconds: 10, - PeriodSeconds: 10, - FailureThreshold: 10, - SuccessThreshold: 1, - } } // GetBarmanObjectKey gets the namespaced name of the barman object @@ -190,50 +166,11 @@ func NewFromCluster(cluster *cnpgv1.Cluster) *PluginConfiguration { // used for wal_restore in the designed primary of a replica cluster ReplicaSourceServerName: replicaSourceServerName, ReplicaSourceBarmanObjectName: replicaSourceBarmanObjectName, - // probe configuration - StartupProbeConfig: parseProbeConfig(helper.Parameters), } return result } -// parseProbeConfig parses probe configuration from plugin parameters -func parseProbeConfig(parameters map[string]string) *ProbeConfig { - config := DefaultProbeConfig() - - if val, ok := parameters["startupProbe.initialDelaySeconds"]; ok { - if parsed, err := strconv.ParseInt(val, 10, 32); err == nil { - config.InitialDelaySeconds = int32(parsed) - } - } - - if val, ok := parameters["startupProbe.timeoutSeconds"]; ok { - if parsed, err := strconv.ParseInt(val, 10, 32); err == nil { - config.TimeoutSeconds = int32(parsed) - } - } - - if val, ok := parameters["startupProbe.periodSeconds"]; ok { - if parsed, err := strconv.ParseInt(val, 10, 32); err == nil { - config.PeriodSeconds = int32(parsed) - } - } - - if val, ok := parameters["startupProbe.failureThreshold"]; ok { - if parsed, err := strconv.ParseInt(val, 10, 32); err == nil { - config.FailureThreshold = int32(parsed) - } - } - - if val, ok := parameters["startupProbe.successThreshold"]; ok { - if parsed, err := strconv.ParseInt(val, 10, 32); err == nil { - config.SuccessThreshold = int32(parsed) - } - } - - return config -} - func getRecoveryParameters(cluster *cnpgv1.Cluster) map[string]string { recoveryPluginConfiguration := getRecoverySourcePlugin(cluster) if recoveryPluginConfiguration == nil { diff --git a/internal/cnpgi/operator/lifecycle.go b/internal/cnpgi/operator/lifecycle.go index 143d746..3e5299b 100644 --- a/internal/cnpgi/operator/lifecycle.go +++ b/internal/cnpgi/operator/lifecycle.go @@ -17,6 +17,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1" "github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/metadata" "github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config" ) @@ -125,11 +126,16 @@ func (impl LifecycleImplementation) reconcileJob( return nil, err } + startupProbe, err := impl.collectSidecarStartupProbeForRecoveryJob(ctx, pluginConfiguration) + if err != nil { + return nil, err + } + return reconcileJob(ctx, cluster, request, sidecarConfiguration{ env: env, certificates: certificates, resources: resources, - probeConfig: pluginConfiguration.StartupProbeConfig, + startupProbe: startupProbe, }) } @@ -137,7 +143,7 @@ type sidecarConfiguration struct { env []corev1.EnvVar certificates []corev1.VolumeProjection resources corev1.ResourceRequirements - probeConfig *config.ProbeConfig + startupProbe *barmancloudv1.ProbeConfig } func reconcileJob( @@ -219,11 +225,16 @@ func (impl LifecycleImplementation) reconcilePod( return nil, err } + startupProbe, err := impl.collectSidecarStartupProbeForInstancePod(ctx, pluginConfiguration) + if err != nil { + return nil, err + } + return reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{ env: env, certificates: certificates, resources: resources, - probeConfig: pluginConfiguration.StartupProbeConfig, + startupProbe: startupProbe, }) } @@ -315,12 +326,13 @@ func reconcilePodSpec( } // Apply configurable probe settings if available - if config.probeConfig != nil { - baseProbe.InitialDelaySeconds = config.probeConfig.InitialDelaySeconds - baseProbe.TimeoutSeconds = config.probeConfig.TimeoutSeconds - baseProbe.PeriodSeconds = config.probeConfig.PeriodSeconds - baseProbe.FailureThreshold = config.probeConfig.FailureThreshold - baseProbe.SuccessThreshold = config.probeConfig.SuccessThreshold + if config.startupProbe != nil { + // Copy timing and threshold settings from user configuration + baseProbe.InitialDelaySeconds = config.startupProbe.InitialDelaySeconds + baseProbe.TimeoutSeconds = config.startupProbe.TimeoutSeconds + baseProbe.PeriodSeconds = config.startupProbe.PeriodSeconds + baseProbe.FailureThreshold = config.startupProbe.FailureThreshold + baseProbe.SuccessThreshold = config.startupProbe.SuccessThreshold } else { // Fallback to default values baseProbe.FailureThreshold = 10 diff --git a/internal/cnpgi/operator/lifecycle_probes.go b/internal/cnpgi/operator/lifecycle_probes.go new file mode 100644 index 0000000..4bee0df --- /dev/null +++ b/internal/cnpgi/operator/lifecycle_probes.go @@ -0,0 +1,59 @@ +package operator + +import ( + "context" + + barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1" + "github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config" +) + +func (impl LifecycleImplementation) collectSidecarStartupProbeForRecoveryJob( + ctx context.Context, + configuration *config.PluginConfiguration, +) (*barmancloudv1.ProbeConfig, error) { + if len(configuration.RecoveryBarmanObjectName) > 0 { + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil { + return nil, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil + } + + return nil, nil +} + +func (impl LifecycleImplementation) collectSidecarStartupProbeForInstancePod( + ctx context.Context, + configuration *config.PluginConfiguration, +) (*barmancloudv1.ProbeConfig, error) { + if len(configuration.BarmanObjectName) > 0 { + // On a replica cluster that also archives, the designated primary + // will use both the replica source object store and the object store + // of the cluster. + // In this case, we use the cluster object store for configuring + // the startup probe of the sidecar container. + + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetBarmanObjectKey(), &barmanObjectStore); err != nil { + return nil, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil + } + + if len(configuration.RecoveryBarmanObjectName) > 0 { + // On a replica cluster that doesn't archive, the designated primary + // uses only the replica source object store. + // In this case, we use the replica source object store for configuring + // the startup probe of the sidecar container. + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil { + return nil, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil + } + + return nil, nil +} diff --git a/internal/cnpgi/operator/lifecycle_test.go b/internal/cnpgi/operator/lifecycle_test.go index 24139b7..3653201 100644 --- a/internal/cnpgi/operator/lifecycle_test.go +++ b/internal/cnpgi/operator/lifecycle_test.go @@ -10,6 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1" "github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config" . "github.com/onsi/ginkgo/v2" @@ -173,8 +174,8 @@ var _ = Describe("LifecycleImplementation", func() { Describe("reconcilePod", func() { It("returns a patch for a valid pod with probe configuration", func(ctx SpecContext) { - // Configure plugin with custom probe settings - pluginConfiguration.StartupProbeConfig = &config.ProbeConfig{ + // Configure sidecar with custom probe settings + startupProbeConfig := &barmancloudv1.ProbeConfig{ InitialDelaySeconds: 1, TimeoutSeconds: 15, PeriodSeconds: 2, @@ -204,7 +205,7 @@ var _ = Describe("LifecycleImplementation", func() { } response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{ - probeConfig: pluginConfiguration.StartupProbeConfig, + startupProbe: startupProbeConfig, }) Expect(err).NotTo(HaveOccurred()) Expect(response).NotTo(BeNil())