Make also the liveness and readyness probes configurable

Signed-off-by: Tudor Golubenco <tudor@xata.io>
This commit is contained in:
Tudor Golubenco 2025-09-14 10:22:33 -07:00
parent 8c2e72a7a6
commit ce7d8e9fbe
8 changed files with 442 additions and 34 deletions

View File

@ -70,6 +70,14 @@ type InstanceSidecarConfiguration struct {
// StartupProbe defines the configuration for the startup probe of the sidecar container.
// +optional
StartupProbe *ProbeConfig `json:"startupProbe,omitempty"`
// LivenessProbe defines the configuration for the liveness probe of the sidecar container.
// +optional
LivenessProbe *ProbeConfig `json:"livenessProbe,omitempty"`
// ReadinessProbe defines the configuration for the readiness probe of the sidecar container.
// +optional
ReadinessProbe *ProbeConfig `json:"readinessProbe,omitempty"`
}
// ObjectStoreSpec defines the desired state of ObjectStore.

View File

@ -41,6 +41,16 @@ func (in *InstanceSidecarConfiguration) DeepCopyInto(out *InstanceSidecarConfigu
*out = new(ProbeConfig)
**out = **in
}
if in.LivenessProbe != nil {
in, out := &in.LivenessProbe, &out.LivenessProbe
*out = new(ProbeConfig)
**out = **in
}
if in.ReadinessProbe != nil {
in, out := &in.ReadinessProbe, &out.ReadinessProbe
*out = new(ProbeConfig)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceSidecarConfiguration.

View File

@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.18.0
controller-gen.kubebuilder.io/version: v0.16.1
name: objectstores.barmancloud.cnpg.io
spec:
group: barmancloud.cnpg.io
@ -511,6 +511,78 @@ spec:
- name
type: object
type: array
livenessProbe:
description: LivenessProbe defines the configuration for the liveness
probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
readinessProbe:
description: ReadinessProbe defines the configuration for the
readiness probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
resources:
description: Resources define cpu/memory requests and limits for
the sidecar that runs in the instance pods.

View File

@ -19,6 +19,18 @@ spec:
periodSeconds: 1
failureThreshold: 10
successThreshold: 1
livenessProbe:
initialDelaySeconds: 30
timeoutSeconds: 5
periodSeconds: 10
failureThreshold: 3
successThreshold: 1
readinessProbe:
initialDelaySeconds: 5
timeoutSeconds: 5
periodSeconds: 5
failureThreshold: 3
successThreshold: 1
configuration:
endpointCA:
name: minio-server-tls

View File

@ -131,19 +131,33 @@ func (impl LifecycleImplementation) reconcileJob(
return nil, err
}
livenessProbe, err := impl.collectSidecarLivenessProbeForRecoveryJob(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
readinessProbe, err := impl.collectSidecarReadinessProbeForRecoveryJob(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
return reconcileJob(ctx, cluster, request, sidecarConfiguration{
env: env,
certificates: certificates,
resources: resources,
startupProbe: startupProbe,
env: env,
certificates: certificates,
resources: resources,
startupProbe: startupProbe,
livenessProbe: livenessProbe,
readinessProbe: readinessProbe,
})
}
type sidecarConfiguration struct {
env []corev1.EnvVar
certificates []corev1.VolumeProjection
resources corev1.ResourceRequirements
startupProbe *barmancloudv1.ProbeConfig
env []corev1.EnvVar
certificates []corev1.VolumeProjection
resources corev1.ResourceRequirements
startupProbe *barmancloudv1.ProbeConfig
livenessProbe *barmancloudv1.ProbeConfig
readinessProbe *barmancloudv1.ProbeConfig
}
func reconcileJob(
@ -230,11 +244,23 @@ func (impl LifecycleImplementation) reconcilePod(
return nil, err
}
livenessProbe, err := impl.collectSidecarLivenessProbeForInstancePod(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
readinessProbe, err := impl.collectSidecarReadinessProbeForInstancePod(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
return reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{
env: env,
certificates: certificates,
resources: resources,
startupProbe: startupProbe,
env: env,
certificates: certificates,
resources: resources,
startupProbe: startupProbe,
livenessProbe: livenessProbe,
readinessProbe: readinessProbe,
})
}
@ -325,25 +351,37 @@ func reconcilePodSpec(
},
}
// Apply configurable probe settings if available
if config.startupProbe != nil {
// Copy timing and threshold settings from user configuration
baseProbe.InitialDelaySeconds = config.startupProbe.InitialDelaySeconds
baseProbe.TimeoutSeconds = config.startupProbe.TimeoutSeconds
baseProbe.PeriodSeconds = config.startupProbe.PeriodSeconds
baseProbe.FailureThreshold = config.startupProbe.FailureThreshold
baseProbe.SuccessThreshold = config.startupProbe.SuccessThreshold
} else {
// Fallback to default values
baseProbe.FailureThreshold = 10
baseProbe.TimeoutSeconds = 10
}
startupProbe := createProbe(baseProbe, config.startupProbe, &barmancloudv1.ProbeConfig{
FailureThreshold: 10,
TimeoutSeconds: 10,
InitialDelaySeconds: 0,
SuccessThreshold: 1,
PeriodSeconds: 10,
})
livenessProbe := createProbe(baseProbe, config.livenessProbe, &barmancloudv1.ProbeConfig{
FailureThreshold: 3,
TimeoutSeconds: 10,
InitialDelaySeconds: 0,
SuccessThreshold: 1,
PeriodSeconds: 10,
})
readinessProbe := createProbe(baseProbe, config.readinessProbe, &barmancloudv1.ProbeConfig{
FailureThreshold: 3,
TimeoutSeconds: 10,
InitialDelaySeconds: 0,
SuccessThreshold: 1,
PeriodSeconds: 10,
})
// fixed values
sidecarTemplate.Name = "plugin-barman-cloud"
sidecarTemplate.Image = viper.GetString("sidecar-image")
sidecarTemplate.ImagePullPolicy = cluster.Spec.ImagePullPolicy
sidecarTemplate.StartupProbe = baseProbe.DeepCopy()
sidecarTemplate.StartupProbe = startupProbe
sidecarTemplate.LivenessProbe = livenessProbe
sidecarTemplate.ReadinessProbe = readinessProbe
sidecarTemplate.SecurityContext = &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To(false),
RunAsNonRoot: ptr.To(true),
@ -567,3 +605,33 @@ func getCNPGJobRole(job *batchv1.Job) string {
return ""
}
// createProbe creates a probe using the base probe's handler and applies configuration or default values
func createProbe(baseProbe *corev1.Probe, config *barmancloudv1.ProbeConfig, defaults *barmancloudv1.ProbeConfig) *corev1.Probe {
probe := baseProbe.DeepCopy()
probe.FailureThreshold = defaults.FailureThreshold
probe.TimeoutSeconds = defaults.TimeoutSeconds
probe.InitialDelaySeconds = defaults.InitialDelaySeconds
probe.SuccessThreshold = defaults.SuccessThreshold
probe.PeriodSeconds = defaults.PeriodSeconds
if config != nil {
if config.InitialDelaySeconds != 0 {
probe.InitialDelaySeconds = config.InitialDelaySeconds
}
if config.TimeoutSeconds != 0 {
probe.TimeoutSeconds = config.TimeoutSeconds
}
if config.PeriodSeconds != 0 {
probe.PeriodSeconds = config.PeriodSeconds
}
if config.SuccessThreshold != 0 {
probe.SuccessThreshold = config.SuccessThreshold
}
if config.FailureThreshold != 0 {
probe.FailureThreshold = config.FailureThreshold
}
}
return probe
}

View File

@ -7,9 +7,14 @@ import (
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config"
)
func (impl LifecycleImplementation) collectSidecarStartupProbeForRecoveryJob(
// probeAccessor is a function type that extracts a specific probe configuration from an ObjectStore
type probeAccessor func(*barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig
// collectSidecarProbeForRecoveryJob is a generic function to collect probe configurations for recovery jobs
func (impl LifecycleImplementation) collectSidecarProbeForRecoveryJob(
ctx context.Context,
configuration *config.PluginConfiguration,
accessor probeAccessor,
) (*barmancloudv1.ProbeConfig, error) {
if len(configuration.RecoveryBarmanObjectName) > 0 {
var barmanObjectStore barmancloudv1.ObjectStore
@ -17,43 +22,102 @@ func (impl LifecycleImplementation) collectSidecarStartupProbeForRecoveryJob(
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
return accessor(&barmanObjectStore), nil
}
return nil, nil
}
func (impl LifecycleImplementation) collectSidecarStartupProbeForInstancePod(
// collectSidecarProbeForInstancePod is a generic function to collect probe configurations for instance pods
func (impl LifecycleImplementation) collectSidecarProbeForInstancePod(
ctx context.Context,
configuration *config.PluginConfiguration,
accessor probeAccessor,
probeType string,
) (*barmancloudv1.ProbeConfig, error) {
if len(configuration.BarmanObjectName) > 0 {
// On a replica cluster that also archives, the designated primary
// will use both the replica source object store and the object store
// of the cluster.
// In this case, we use the cluster object store for configuring
// the startup probe of the sidecar container.
// the probe of the sidecar container.
var barmanObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetBarmanObjectKey(), &barmanObjectStore); err != nil {
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
return accessor(&barmanObjectStore), nil
}
if len(configuration.RecoveryBarmanObjectName) > 0 {
// On a replica cluster that doesn't archive, the designated primary
// uses only the replica source object store.
// In this case, we use the replica source object store for configuring
// the startup probe of the sidecar container.
// the probe of the sidecar container.
var barmanObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil {
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
return accessor(&barmanObjectStore), nil
}
return nil, nil
}
// Specific probe collection methods that use the generic functions
func (impl LifecycleImplementation) collectSidecarStartupProbeForRecoveryJob(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForRecoveryJob(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.StartupProbe
})
}
func (impl LifecycleImplementation) collectSidecarStartupProbeForInstancePod(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForInstancePod(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.StartupProbe
}, "startup")
}
func (impl LifecycleImplementation) collectSidecarLivenessProbeForRecoveryJob(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForRecoveryJob(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.LivenessProbe
})
}
func (impl LifecycleImplementation) collectSidecarLivenessProbeForInstancePod(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForInstancePod(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.LivenessProbe
}, "liveness")
}
func (impl LifecycleImplementation) collectSidecarReadinessProbeForRecoveryJob(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForRecoveryJob(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.ReadinessProbe
})
}
func (impl LifecycleImplementation) collectSidecarReadinessProbeForInstancePod(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
return impl.collectSidecarProbeForInstancePod(ctx, configuration, func(store *barmancloudv1.ObjectStore) *barmancloudv1.ProbeConfig {
return store.Spec.InstanceSidecarConfiguration.ReadinessProbe
}, "readiness")
}

View File

@ -220,6 +220,72 @@ var _ = Describe("LifecycleImplementation", func() {
Expect(string(response.JsonPatch)).To(ContainSubstring("\"successThreshold\":1"))
})
It("decouples probe configurations - startupProbe doesn't affect other probes", func(ctx SpecContext) {
// Configure only startupProbe with custom settings
startupProbeConfig := &barmancloudv1.ProbeConfig{
InitialDelaySeconds: 5,
TimeoutSeconds: 20,
PeriodSeconds: 3,
FailureThreshold: 8,
SuccessThreshold: 2,
}
pod := &corev1.Pod{
TypeMeta: podTypeMeta,
ObjectMeta: metav1.ObjectMeta{
Name: "test-pod",
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "postgres",
},
},
},
}
podJSON, err := json.Marshal(pod)
Expect(err).NotTo(HaveOccurred())
request := &lifecycle.OperatorLifecycleRequest{
ObjectDefinition: podJSON,
}
response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{
startupProbe: startupProbeConfig,
// livenessProbe and readinessProbe are nil - should use defaults
})
Expect(err).NotTo(HaveOccurred())
Expect(response).NotTo(BeNil())
Expect(response.JsonPatch).NotTo(BeEmpty())
patchStr := string(response.JsonPatch)
// Verify startupProbe has custom settings
Expect(patchStr).To(ContainSubstring("startupProbe"))
Expect(patchStr).To(ContainSubstring("\"initialDelaySeconds\":5"))
Expect(patchStr).To(ContainSubstring("\"timeoutSeconds\":20"))
Expect(patchStr).To(ContainSubstring("\"periodSeconds\":3"))
Expect(patchStr).To(ContainSubstring("\"failureThreshold\":8"))
Expect(patchStr).To(ContainSubstring("\"successThreshold\":2"))
// Verify livenessProbe has default settings (not affected by startupProbe)
Expect(patchStr).To(ContainSubstring("livenessProbe"))
Expect(patchStr).To(ContainSubstring("\"failureThreshold\":3")) // default for liveness
Expect(patchStr).To(ContainSubstring("\"timeoutSeconds\":10")) // default for liveness
// initialDelaySeconds: 0 is omitted from JSON when it's the zero value
// Verify readinessProbe has default settings (not affected by startupProbe)
Expect(patchStr).To(ContainSubstring("readinessProbe"))
Expect(patchStr).To(ContainSubstring("\"failureThreshold\":3")) // default for readiness
Expect(patchStr).To(ContainSubstring("\"timeoutSeconds\":10")) // default for readiness
// initialDelaySeconds: 0 is omitted from JSON when it's the zero value
// Verify that livenessProbe and readinessProbe don't have startupProbe values
Expect(patchStr).NotTo(MatchRegexp(`"livenessProbe"[^}]*"initialDelaySeconds":5`))
Expect(patchStr).NotTo(MatchRegexp(`"readinessProbe"[^}]*"initialDelaySeconds":5`))
})
It("returns a patch for a valid pod", func(ctx SpecContext) {
pod := &corev1.Pod{
TypeMeta: podTypeMeta,

View File

@ -510,6 +510,78 @@ spec:
- name
type: object
type: array
livenessProbe:
description: LivenessProbe defines the configuration for the liveness
probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
readinessProbe:
description: ReadinessProbe defines the configuration for the
readiness probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
resources:
description: Resources define cpu/memory requests and limits for
the sidecar that runs in the instance pods.
@ -576,6 +648,42 @@ spec:
The retentionCheckInterval defines the frequency at which the
system checks and enforces retention policies.
type: integer
startupProbe:
description: StartupProbe defines the configuration for the startup
probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
type: object
retentionPolicy:
description: |-