Move the startup probe configuration for the sidecar in the ObjectStore config

Signed-off-by: Tudor Golubenco <tudor@xata.io>
This commit is contained in:
Tudor Golubenco 2025-09-14 09:56:10 -07:00
parent ab6b7a684c
commit 8c2e72a7a6
8 changed files with 179 additions and 75 deletions

View File

@ -22,6 +22,35 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// ProbeConfig holds configuration for probe timing and thresholds
// This is a subset of the corev1.Probe type, with only the fields that we want to expose as configuration.
type ProbeConfig struct {
// InitialDelaySeconds is the number of seconds after the container has started before startup probes are initiated.
// +kubebuilder:default:=0
// +optional
InitialDelaySeconds int32 `json:"initialDelaySeconds,omitempty"`
// TimeoutSeconds is the number of seconds after which the probe times out.
// +kubebuilder:default:=10
// +optional
TimeoutSeconds int32 `json:"timeoutSeconds,omitempty"`
// PeriodSeconds is how often (in seconds) to perform the probe.
// +kubebuilder:default:=10
// +optional
PeriodSeconds int32 `json:"periodSeconds,omitempty"`
// SuccessThreshold is the minimum consecutive successes for the probe to be considered successful.
// +kubebuilder:default:=1
// +optional
SuccessThreshold int32 `json:"successThreshold,omitempty"`
// FailureThreshold is the minimum consecutive failures for the probe to be considered failed.
// +kubebuilder:default:=10
// +optional
FailureThreshold int32 `json:"failureThreshold,omitempty"`
}
// InstanceSidecarConfiguration defines the configuration for the sidecar that runs in the instance pods.
type InstanceSidecarConfiguration struct {
// The environment to be explicitly passed to the sidecar
@ -37,6 +66,10 @@ type InstanceSidecarConfiguration struct {
// Resources define cpu/memory requests and limits for the sidecar that runs in the instance pods.
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
// StartupProbe defines the configuration for the startup probe of the sidecar container.
// +optional
StartupProbe *ProbeConfig `json:"startupProbe,omitempty"`
}
// ObjectStoreSpec defines the desired state of ObjectStore.

View File

@ -36,6 +36,11 @@ func (in *InstanceSidecarConfiguration) DeepCopyInto(out *InstanceSidecarConfigu
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.StartupProbe != nil {
in, out := &in.StartupProbe, &out.StartupProbe
*out = new(ProbeConfig)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceSidecarConfiguration.
@ -146,6 +151,21 @@ func (in *ObjectStoreStatus) DeepCopy() *ObjectStoreStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProbeConfig) DeepCopyInto(out *ProbeConfig) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProbeConfig.
func (in *ProbeConfig) DeepCopy() *ProbeConfig {
if in == nil {
return nil
}
out := new(ProbeConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RecoveryWindow) DeepCopyInto(out *RecoveryWindow) {
*out = *in

View File

@ -577,6 +577,42 @@ spec:
The retentionCheckInterval defines the frequency at which the
system checks and enforces retention policies.
type: integer
startupProbe:
description: StartupProbe defines the configuration for the startup
probe of the sidecar container.
properties:
failureThreshold:
default: 10
description: FailureThreshold is the minimum consecutive failures
for the probe to be considered failed.
format: int32
type: integer
initialDelaySeconds:
default: 0
description: InitialDelaySeconds is the number of seconds
after the container has started before startup probes are
initiated.
format: int32
type: integer
periodSeconds:
default: 10
description: PeriodSeconds is how often (in seconds) to perform
the probe.
format: int32
type: integer
successThreshold:
default: 1
description: SuccessThreshold is the minimum consecutive successes
for the probe to be considered successful.
format: int32
type: integer
timeoutSeconds:
default: 10
description: TimeoutSeconds is the number of seconds after
which the probe times out.
format: int32
type: integer
type: object
type: object
retentionPolicy:
description: |-

View File

@ -13,6 +13,12 @@ spec:
limits:
memory: "512Mi"
cpu: "500m"
startupProbe:
initialDelaySeconds: 1
timeoutSeconds: 10
periodSeconds: 1
failureThreshold: 10
successThreshold: 1
configuration:
endpointCA:
name: minio-server-tls

View File

@ -1,7 +1,6 @@
package config
import (
"strconv"
"strings"
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
@ -58,29 +57,6 @@ type PluginConfiguration struct {
ReplicaSourceBarmanObjectName string
ReplicaSourceServerName string
// Probe configuration
StartupProbeConfig *ProbeConfig
}
// ProbeConfig holds configuration for Kubernetes probes
type ProbeConfig struct {
InitialDelaySeconds int32
TimeoutSeconds int32
PeriodSeconds int32
FailureThreshold int32
SuccessThreshold int32
}
// DefaultProbeConfig returns the default probe configuration
func DefaultProbeConfig() *ProbeConfig {
return &ProbeConfig{
InitialDelaySeconds: 0,
TimeoutSeconds: 10,
PeriodSeconds: 10,
FailureThreshold: 10,
SuccessThreshold: 1,
}
}
// GetBarmanObjectKey gets the namespaced name of the barman object
@ -190,50 +166,11 @@ func NewFromCluster(cluster *cnpgv1.Cluster) *PluginConfiguration {
// used for wal_restore in the designed primary of a replica cluster
ReplicaSourceServerName: replicaSourceServerName,
ReplicaSourceBarmanObjectName: replicaSourceBarmanObjectName,
// probe configuration
StartupProbeConfig: parseProbeConfig(helper.Parameters),
}
return result
}
// parseProbeConfig parses probe configuration from plugin parameters
func parseProbeConfig(parameters map[string]string) *ProbeConfig {
config := DefaultProbeConfig()
if val, ok := parameters["startupProbe.initialDelaySeconds"]; ok {
if parsed, err := strconv.ParseInt(val, 10, 32); err == nil {
config.InitialDelaySeconds = int32(parsed)
}
}
if val, ok := parameters["startupProbe.timeoutSeconds"]; ok {
if parsed, err := strconv.ParseInt(val, 10, 32); err == nil {
config.TimeoutSeconds = int32(parsed)
}
}
if val, ok := parameters["startupProbe.periodSeconds"]; ok {
if parsed, err := strconv.ParseInt(val, 10, 32); err == nil {
config.PeriodSeconds = int32(parsed)
}
}
if val, ok := parameters["startupProbe.failureThreshold"]; ok {
if parsed, err := strconv.ParseInt(val, 10, 32); err == nil {
config.FailureThreshold = int32(parsed)
}
}
if val, ok := parameters["startupProbe.successThreshold"]; ok {
if parsed, err := strconv.ParseInt(val, 10, 32); err == nil {
config.SuccessThreshold = int32(parsed)
}
}
return config
}
func getRecoveryParameters(cluster *cnpgv1.Cluster) map[string]string {
recoveryPluginConfiguration := getRecoverySourcePlugin(cluster)
if recoveryPluginConfiguration == nil {

View File

@ -17,6 +17,7 @@ import (
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/metadata"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config"
)
@ -125,11 +126,16 @@ func (impl LifecycleImplementation) reconcileJob(
return nil, err
}
startupProbe, err := impl.collectSidecarStartupProbeForRecoveryJob(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
return reconcileJob(ctx, cluster, request, sidecarConfiguration{
env: env,
certificates: certificates,
resources: resources,
probeConfig: pluginConfiguration.StartupProbeConfig,
startupProbe: startupProbe,
})
}
@ -137,7 +143,7 @@ type sidecarConfiguration struct {
env []corev1.EnvVar
certificates []corev1.VolumeProjection
resources corev1.ResourceRequirements
probeConfig *config.ProbeConfig
startupProbe *barmancloudv1.ProbeConfig
}
func reconcileJob(
@ -219,11 +225,16 @@ func (impl LifecycleImplementation) reconcilePod(
return nil, err
}
startupProbe, err := impl.collectSidecarStartupProbeForInstancePod(ctx, pluginConfiguration)
if err != nil {
return nil, err
}
return reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{
env: env,
certificates: certificates,
resources: resources,
probeConfig: pluginConfiguration.StartupProbeConfig,
startupProbe: startupProbe,
})
}
@ -315,12 +326,13 @@ func reconcilePodSpec(
}
// Apply configurable probe settings if available
if config.probeConfig != nil {
baseProbe.InitialDelaySeconds = config.probeConfig.InitialDelaySeconds
baseProbe.TimeoutSeconds = config.probeConfig.TimeoutSeconds
baseProbe.PeriodSeconds = config.probeConfig.PeriodSeconds
baseProbe.FailureThreshold = config.probeConfig.FailureThreshold
baseProbe.SuccessThreshold = config.probeConfig.SuccessThreshold
if config.startupProbe != nil {
// Copy timing and threshold settings from user configuration
baseProbe.InitialDelaySeconds = config.startupProbe.InitialDelaySeconds
baseProbe.TimeoutSeconds = config.startupProbe.TimeoutSeconds
baseProbe.PeriodSeconds = config.startupProbe.PeriodSeconds
baseProbe.FailureThreshold = config.startupProbe.FailureThreshold
baseProbe.SuccessThreshold = config.startupProbe.SuccessThreshold
} else {
// Fallback to default values
baseProbe.FailureThreshold = 10

View File

@ -0,0 +1,59 @@
package operator
import (
"context"
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config"
)
func (impl LifecycleImplementation) collectSidecarStartupProbeForRecoveryJob(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
if len(configuration.RecoveryBarmanObjectName) > 0 {
var barmanObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil {
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
}
return nil, nil
}
func (impl LifecycleImplementation) collectSidecarStartupProbeForInstancePod(
ctx context.Context,
configuration *config.PluginConfiguration,
) (*barmancloudv1.ProbeConfig, error) {
if len(configuration.BarmanObjectName) > 0 {
// On a replica cluster that also archives, the designated primary
// will use both the replica source object store and the object store
// of the cluster.
// In this case, we use the cluster object store for configuring
// the startup probe of the sidecar container.
var barmanObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetBarmanObjectKey(), &barmanObjectStore); err != nil {
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
}
if len(configuration.RecoveryBarmanObjectName) > 0 {
// On a replica cluster that doesn't archive, the designated primary
// uses only the replica source object store.
// In this case, we use the replica source object store for configuring
// the startup probe of the sidecar container.
var barmanObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil {
return nil, err
}
return barmanObjectStore.Spec.InstanceSidecarConfiguration.StartupProbe, nil
}
return nil, nil
}

View File

@ -10,6 +10,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config"
. "github.com/onsi/ginkgo/v2"
@ -173,8 +174,8 @@ var _ = Describe("LifecycleImplementation", func() {
Describe("reconcilePod", func() {
It("returns a patch for a valid pod with probe configuration", func(ctx SpecContext) {
// Configure plugin with custom probe settings
pluginConfiguration.StartupProbeConfig = &config.ProbeConfig{
// Configure sidecar with custom probe settings
startupProbeConfig := &barmancloudv1.ProbeConfig{
InitialDelaySeconds: 1,
TimeoutSeconds: 15,
PeriodSeconds: 2,
@ -204,7 +205,7 @@ var _ = Describe("LifecycleImplementation", func() {
}
response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{
probeConfig: pluginConfiguration.StartupProbeConfig,
startupProbe: startupProbeConfig,
})
Expect(err).NotTo(HaveOccurred())
Expect(response).NotTo(BeNil())