diff --git a/.wordlist.txt b/.wordlist.txt index a634c07..f520ed0 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -33,6 +33,7 @@ README RPO RTO RecoveryWindow +ResourceRequirements RetentionPolicy SAS SFO @@ -64,6 +65,7 @@ cmctl cnpg codebase containerPort +cpu creds csi customresourcedefinition @@ -102,6 +104,7 @@ repos retentionCheckInterval retentionPolicy rolebinding +rollout sc secretKeyRef selfsigned diff --git a/api/v1/objectstore_types.go b/api/v1/objectstore_types.go index 0b15d71..80c4742 100644 --- a/api/v1/objectstore_types.go +++ b/api/v1/objectstore_types.go @@ -33,6 +33,10 @@ type InstanceSidecarConfiguration struct { // +kubebuilder:default:=1800 // +optional RetentionPolicyIntervalSeconds int `json:"retentionPolicyIntervalSeconds,omitempty"` + + // Resources define cpu/memory requests and limits for the sidecar that runs in the instance pods. + // +optional + Resources corev1.ResourceRequirements `json:"resources,omitempty"` } // ObjectStoreSpec defines the desired state of ObjectStore. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index c70da27..11fb2ae 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -35,6 +35,7 @@ func (in *InstanceSidecarConfiguration) DeepCopyInto(out *InstanceSidecarConfigu (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.Resources.DeepCopyInto(&out.Resources) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceSidecarConfiguration. diff --git a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml index f55fda4..6fb87b5 100644 --- a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml +++ b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml @@ -511,6 +511,66 @@ spec: - name type: object type: array + resources: + description: Resources define cpu/memory requests and limits for + the sidecar that runs in the instance pods. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object retentionPolicyIntervalSeconds: default: 1800 description: |- diff --git a/hack/examples/minio-store.yaml b/hack/examples/minio-store.yaml index 38b116f..de47ea7 100644 --- a/hack/examples/minio-store.yaml +++ b/hack/examples/minio-store.yaml @@ -5,7 +5,14 @@ metadata: spec: retentionPolicy: "1m" instanceSidecarConfiguration: - retentionPolicyIntervalSeconds: 30 + retentionPolicyIntervalSeconds: 1800 + resources: + requests: + memory: "64Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" configuration: endpointCA: name: minio-server-tls @@ -27,4 +34,3 @@ spec: - "--min-chunk-size=5MB" - "--read-timeout=60" - "-vv" - diff --git a/internal/cnpgi/operator/lifecycle.go b/internal/cnpgi/operator/lifecycle.go index 2862da7..308c9e7 100644 --- a/internal/cnpgi/operator/lifecycle.go +++ b/internal/cnpgi/operator/lifecycle.go @@ -123,15 +123,29 @@ func (impl LifecycleImplementation) reconcileJob( return nil, err } - return reconcileJob(ctx, cluster, request, env, certificates) + resources, err := impl.collectSidecarResourcesForRecoveryJob(ctx, pluginConfiguration) + if err != nil { + return nil, err + } + + return reconcileJob(ctx, cluster, request, sidecarConfiguration{ + env: env, + certificates: certificates, + resources: resources, + }) +} + +type sidecarConfiguration struct { + env []corev1.EnvVar + certificates []corev1.VolumeProjection + resources corev1.ResourceRequirements } func reconcileJob( ctx context.Context, cluster *cnpgv1.Cluster, request *lifecycle.OperatorLifecycleRequest, - env []corev1.EnvVar, - certificates []corev1.VolumeProjection, + config sidecarConfiguration, ) (*lifecycle.OperatorLifecycleResponse, error) { contextLogger := log.FromContext(ctx).WithName("lifecycle") if pluginConfig := cluster.GetRecoverySourcePlugin(); pluginConfig == nil || pluginConfig.Name != metadata.PluginName { @@ -169,8 +183,7 @@ func reconcileJob( corev1.Container{ Args: []string{"restore"}, }, - env, - certificates, + config, ); err != nil { return nil, fmt.Errorf("while reconciling pod spec for job: %w", err) } @@ -202,7 +215,16 @@ func (impl LifecycleImplementation) reconcilePod( return nil, err } - return reconcilePod(ctx, cluster, request, pluginConfiguration, env, certificates) + resources, err := impl.collectSidecarResourcesForPod(ctx, pluginConfiguration) + if err != nil { + return nil, err + } + + return reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{ + env: env, + certificates: certificates, + resources: resources, + }) } func reconcilePod( @@ -210,8 +232,7 @@ func reconcilePod( cluster *cnpgv1.Cluster, request *lifecycle.OperatorLifecycleRequest, pluginConfiguration *config.PluginConfiguration, - env []corev1.EnvVar, - certificates []corev1.VolumeProjection, + config sidecarConfiguration, ) (*lifecycle.OperatorLifecycleResponse, error) { pod, err := decoder.DecodePodJSON(request.GetObjectDefinition()) if err != nil { @@ -232,8 +253,7 @@ func reconcilePod( corev1.Container{ Args: []string{"instance"}, }, - env, - certificates, + config, ); err != nil { return nil, fmt.Errorf("while reconciling pod spec for pod: %w", err) } @@ -256,9 +276,8 @@ func reconcilePodSpec( cluster *cnpgv1.Cluster, spec *corev1.PodSpec, mainContainerName string, - sidecarConfig corev1.Container, - additionalEnvs []corev1.EnvVar, - certificates []corev1.VolumeProjection, + sidecarTemplate corev1.Container, + config sidecarConfiguration, ) error { envs := []corev1.EnvVar{ { @@ -285,7 +304,7 @@ func reconcilePodSpec( }, } - envs = append(envs, additionalEnvs...) + envs = append(envs, config.env...) baseProbe := &corev1.Probe{ FailureThreshold: 10, @@ -298,11 +317,11 @@ func reconcilePodSpec( } // fixed values - sidecarConfig.Name = "plugin-barman-cloud" - sidecarConfig.Image = viper.GetString("sidecar-image") - sidecarConfig.ImagePullPolicy = cluster.Spec.ImagePullPolicy - sidecarConfig.StartupProbe = baseProbe.DeepCopy() - sidecarConfig.SecurityContext = &corev1.SecurityContext{ + sidecarTemplate.Name = "plugin-barman-cloud" + sidecarTemplate.Image = viper.GetString("sidecar-image") + sidecarTemplate.ImagePullPolicy = cluster.Spec.ImagePullPolicy + sidecarTemplate.StartupProbe = baseProbe.DeepCopy() + sidecarTemplate.SecurityContext = &corev1.SecurityContext{ AllowPrivilegeEscalation: ptr.To(false), RunAsNonRoot: ptr.To(true), Privileged: ptr.To(false), @@ -314,20 +333,21 @@ func reconcilePodSpec( Drop: []corev1.Capability{"ALL"}, }, } + sidecarTemplate.Resources = config.resources // merge the main container envs if they aren't already set for _, container := range spec.Containers { if container.Name == mainContainerName { for _, env := range container.Env { found := false - for _, existingEnv := range sidecarConfig.Env { + for _, existingEnv := range sidecarTemplate.Env { if existingEnv.Name == env.Name { found = true break } } if !found { - sidecarConfig.Env = append(sidecarConfig.Env, env) + sidecarTemplate.Env = append(sidecarTemplate.Env, env) } } break @@ -337,18 +357,18 @@ func reconcilePodSpec( // merge the default envs if they aren't already set for _, env := range envs { found := false - for _, existingEnv := range sidecarConfig.Env { + for _, existingEnv := range sidecarTemplate.Env { if existingEnv.Name == env.Name { found = true break } } if !found { - sidecarConfig.Env = append(sidecarConfig.Env, env) + sidecarTemplate.Env = append(sidecarTemplate.Env, env) } } - if err := injectPluginSidecarPodSpec(spec, &sidecarConfig, mainContainerName); err != nil { + if err := injectPluginSidecarPodSpec(spec, &sidecarTemplate, mainContainerName); err != nil { return err } @@ -358,7 +378,7 @@ func reconcilePodSpec( Name: barmanCertificatesVolumeName, VolumeSource: corev1.VolumeSource{ Projected: &corev1.ProjectedVolumeSource{ - Sources: certificates, + Sources: config.certificates, }, }, }) diff --git a/internal/cnpgi/operator/lifecycle_envs.go b/internal/cnpgi/operator/lifecycle_envs.go index bfb5b22..e51b5c8 100644 --- a/internal/cnpgi/operator/lifecycle_envs.go +++ b/internal/cnpgi/operator/lifecycle_envs.go @@ -17,6 +17,9 @@ func (impl LifecycleImplementation) collectAdditionalEnvs( ) ([]corev1.EnvVar, error) { var result []corev1.EnvVar + // TODO: check if the environment variables are clashing and in + // that case raise an error + if len(pluginConfiguration.BarmanObjectName) > 0 { envs, err := impl.collectObjectStoreEnvs( ctx, @@ -45,6 +48,20 @@ func (impl LifecycleImplementation) collectAdditionalEnvs( result = append(result, envs...) } + if len(pluginConfiguration.ReplicaSourceBarmanObjectName) > 0 { + envs, err := impl.collectObjectStoreEnvs( + ctx, + types.NamespacedName{ + Name: pluginConfiguration.ReplicaSourceBarmanObjectName, + Namespace: namespace, + }, + ) + if err != nil { + return nil, err + } + result = append(result, envs...) + } + return result, nil } diff --git a/internal/cnpgi/operator/lifecycle_resources.go b/internal/cnpgi/operator/lifecycle_resources.go new file mode 100644 index 0000000..a68339b --- /dev/null +++ b/internal/cnpgi/operator/lifecycle_resources.go @@ -0,0 +1,61 @@ +package operator + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + + barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1" + "github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config" +) + +func (impl LifecycleImplementation) collectSidecarResourcesForRecoveryJob( + ctx context.Context, + configuration *config.PluginConfiguration, +) (corev1.ResourceRequirements, error) { + if len(configuration.RecoveryBarmanObjectName) > 0 { + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil { + return corev1.ResourceRequirements{}, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.Resources, nil + } + + return corev1.ResourceRequirements{}, nil +} + +func (impl LifecycleImplementation) collectSidecarResourcesForPod( + ctx context.Context, + configuration *config.PluginConfiguration, +) (corev1.ResourceRequirements, error) { + if len(configuration.BarmanObjectName) > 0 { + // On a replica cluster that also archives, the designated primary + // will use both the replica source object store and the object store + // of the cluster. + // In this case, we use the cluster object store for configuring + // the resources of the sidecar container. + + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetBarmanObjectKey(), &barmanObjectStore); err != nil { + return corev1.ResourceRequirements{}, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.Resources, nil + } + + if len(configuration.RecoveryBarmanObjectName) > 0 { + // On a replica cluster that doesn't archive, the designated primary + // uses only the replica source object store. + // In this case, we use the replica source object store for configuring + // the resources of the sidecar container. + var barmanObjectStore barmancloudv1.ObjectStore + if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &barmanObjectStore); err != nil { + return corev1.ResourceRequirements{}, err + } + + return barmanObjectStore.Spec.InstanceSidecarConfiguration.Resources, nil + } + + return corev1.ResourceRequirements{}, nil +} diff --git a/internal/cnpgi/operator/lifecycle_test.go b/internal/cnpgi/operator/lifecycle_test.go index 1e63b6f..3d94a3b 100644 --- a/internal/cnpgi/operator/lifecycle_test.go +++ b/internal/cnpgi/operator/lifecycle_test.go @@ -107,7 +107,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: jobJSON, } - response, err := reconcileJob(ctx, cluster, request, nil, nil) + response, err := reconcileJob(ctx, cluster, request, sidecarConfiguration{}) Expect(err).NotTo(HaveOccurred()) Expect(response).NotTo(BeNil()) Expect(response.JsonPatch).NotTo(BeEmpty()) @@ -128,7 +128,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: jobJSON, } - response, err := reconcileJob(ctx, cluster, request, nil, nil) + response, err := reconcileJob(ctx, cluster, request, sidecarConfiguration{}) Expect(err).NotTo(HaveOccurred()) Expect(response).To(BeNil()) }) @@ -138,7 +138,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: []byte("invalid-json"), } - response, err := reconcileJob(ctx, cluster, request, nil, nil) + response, err := reconcileJob(ctx, cluster, request, sidecarConfiguration{}) Expect(err).To(HaveOccurred()) Expect(response).To(BeNil()) }) @@ -165,7 +165,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: jobJSON, } - response, err := reconcileJob(ctx, cluster, request, nil, nil) + response, err := reconcileJob(ctx, cluster, request, sidecarConfiguration{}) Expect(err).NotTo(HaveOccurred()) Expect(response).To(BeNil()) }) @@ -185,7 +185,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: podJSON, } - response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, nil, nil) + response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{}) Expect(err).NotTo(HaveOccurred()) Expect(response).NotTo(BeNil()) Expect(response.JsonPatch).NotTo(BeEmpty()) @@ -203,7 +203,7 @@ var _ = Describe("LifecycleImplementation", func() { ObjectDefinition: []byte("invalid-json"), } - response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, nil, nil) + response, err := reconcilePod(ctx, cluster, request, pluginConfiguration, sidecarConfiguration{}) Expect(err).To(HaveOccurred()) Expect(response).To(BeNil()) }) diff --git a/manifest.yaml b/manifest.yaml index e19a59d..c43ef4b 100644 --- a/manifest.yaml +++ b/manifest.yaml @@ -510,6 +510,66 @@ spec: - name type: object type: array + resources: + description: Resources define cpu/memory requests and limits for + the sidecar that runs in the instance pods. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object retentionPolicyIntervalSeconds: default: 1800 description: |- diff --git a/web/docs/plugin-barman-cloud.v1.md b/web/docs/plugin-barman-cloud.v1.md index 197c1ed..552dc27 100644 --- a/web/docs/plugin-barman-cloud.v1.md +++ b/web/docs/plugin-barman-cloud.v1.md @@ -28,6 +28,7 @@ _Appears in:_ | --- | --- | --- | --- | --- | | `env` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#envvar-v1-core) array_ | The environment to be explicitly passed to the sidecar | | | | | `retentionPolicyIntervalSeconds` _integer_ | The retentionCheckInterval defines the frequency at which the
system checks and enforces retention policies. | | 1800 | | +| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)_ | Resources define cpu/memory requests and limits for the sidecar that runs in the instance pods. | | | | #### ObjectStore diff --git a/web/docs/usage.md b/web/docs/usage.md index 1c464e8..869dfe2 100644 --- a/web/docs/usage.md +++ b/web/docs/usage.md @@ -210,3 +210,49 @@ spec: parameters: barmanObjectName: minio-store-b ``` + +## Configuring the plugin instance sidecar + +The Barman Cloud Plugin runs as a sidecar container next to each PostgreSQL +instance pod. It manages backup, WAL archiving, and restore processes. + +Configuration comes from multiple `ObjectStore` resources: + +1. The one referenced in the + `.spec.plugins` section of the `Cluster`. This is the + object store used for WAL archiving and base backups. +2. The one referenced in the external cluster + used in the `.spec.replica.source` section of the `Cluster`. This is + used by the log-shipping designated primary to get the WAL files. +3. The one referenced in the + `.spec.bootstrap.recovery.source` section of the `Cluster`. Used by + the initial recovery job to create the cluster from an existing backup. + +You can fine-tune sidecar behavior in the `.spec.instanceSidecarConfiguration` +of your ObjectStore. These settings apply to all PostgreSQL instances that use +this object store. Any updates take effect at the next `Cluster` reconciliation, +and could generate a rollout of the `Cluster`. + +```yaml +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: minio-store +spec: + configuration: + # [...] + instanceSidecarConfiguration: + retentionPolicyIntervalSeconds: 1800 + resources: + requests: + memory: "XXX" + cpu: "YYY" + limits: + memory: "XXX" + cpu: "YYY" +``` + +:::note +If more than one `ObjectStore` applies, the `instanceSidecarConfiguration` of +the one set in `.spec.plugins` has priority. +:::