mirror of
https://github.com/cloudnative-pg/plugin-barman-cloud.git
synced 2026-01-11 13:23:09 +01:00
Enable the LeaderElectionReleaseOnCancel option in the controller manager to fix a deadlock issue during RollingUpdate deployments with leader election enabled. Without this setting, the old pod holds the leader lease during shutdown, preventing the new pod from becoming ready. This creates a deadlock where Kubernetes won't terminate the old pod because the new pod isn't ready, and the new pod can't become ready because it can't acquire the lease. With LeaderElectionReleaseOnCancel enabled, the old pod voluntarily releases the lease when it receives a shutdown signal, allowing the new pod to acquire leadership immediately and become ready, enabling smooth rolling updates. Closes #419 Signed-off-by: Armando Ruocco <armando.ruocco@enterprisedb.com>
166 lines
6.3 KiB
Go
166 lines
6.3 KiB
Go
/*
|
|
Copyright © contributors to CloudNativePG, established as
|
|
CloudNativePG a Series of LF Projects, LLC.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
package operator
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
|
|
// +kubebuilder:scaffold:imports
|
|
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
|
|
"github.com/cloudnative-pg/machinery/pkg/log"
|
|
"github.com/spf13/viper"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/healthz"
|
|
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
|
|
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
|
|
"sigs.k8s.io/controller-runtime/pkg/webhook"
|
|
|
|
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
|
|
"github.com/cloudnative-pg/plugin-barman-cloud/internal/controller"
|
|
|
|
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
|
|
// to ensure that exec-entrypoint and run can make use of them.
|
|
_ "k8s.io/client-go/plugin/pkg/client/auth"
|
|
)
|
|
|
|
var scheme = runtime.NewScheme()
|
|
|
|
func init() {
|
|
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
|
|
utilruntime.Must(barmancloudv1.AddToScheme(scheme))
|
|
utilruntime.Must(cnpgv1.AddToScheme(scheme))
|
|
// +kubebuilder:scaffold:scheme
|
|
}
|
|
|
|
// Start starts the manager
|
|
func Start(ctx context.Context) error {
|
|
setupLog := log.FromContext(ctx)
|
|
|
|
var tlsOpts []func(*tls.Config)
|
|
|
|
// if the enable-http2 flag is false (the default), http/2 should be disabled
|
|
// due to its vulnerabilities. More specifically, disabling http/2 will
|
|
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
|
|
// Rapid Reset CVEs. For more information see:
|
|
// - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
|
|
// - https://github.com/advisories/GHSA-4374-p667-p6c8
|
|
disableHTTP2 := func(c *tls.Config) {
|
|
setupLog.Info("disabling http/2")
|
|
c.NextProtos = []string{"http/1.1"}
|
|
}
|
|
|
|
if !viper.GetBool("enable-http2") {
|
|
tlsOpts = append(tlsOpts, disableHTTP2)
|
|
}
|
|
|
|
webhookServer := webhook.NewServer(webhook.Options{
|
|
TLSOpts: tlsOpts,
|
|
})
|
|
|
|
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
|
|
// More info:
|
|
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.0/pkg/metrics/server
|
|
// - https://book.kubebuilder.io/reference/metrics.html
|
|
metricsServerOptions := metricsserver.Options{
|
|
BindAddress: viper.GetString("metrics-bind-address"),
|
|
SecureServing: viper.GetBool("metrics-secure"),
|
|
// TODO(user): TLSOpts is used to allow configuring the TLS config used for the server. If certificates are
|
|
// not provided, self-signed certificates will be generated by default. This option is not recommended for
|
|
// production environments as self-signed certificates do not offer the same level of trust and security
|
|
// as certificates issued by a trusted Certificate Authority (CA). The primary risk is potentially allowing
|
|
// unauthorized access to sensitive metrics data. Consider replacing with CertDir, CertName, and KeyName
|
|
// to provide certificates, ensuring the server communicates using trusted and secure certificates.
|
|
TLSOpts: tlsOpts,
|
|
}
|
|
|
|
if viper.GetBool("metrics-secure") {
|
|
// FilterProvider is used to protect the metrics endpoint with authn/authz.
|
|
// These configurations ensure that only authorized users and service accounts
|
|
// can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
|
|
// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.0/pkg/metrics/filters#WithAuthenticationAndAuthorization
|
|
metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
|
|
}
|
|
|
|
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
|
Scheme: scheme,
|
|
Metrics: metricsServerOptions,
|
|
WebhookServer: webhookServer,
|
|
HealthProbeBindAddress: viper.GetString("health-probe-bind-address"),
|
|
LeaderElection: viper.GetBool("leader-elect"),
|
|
LeaderElectionID: "822e3f5c.cnpg.io",
|
|
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
|
|
// when the Manager ends. This requires the binary to immediately end when the
|
|
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
|
|
// speeds up voluntary leader transitions as the new leader don't have to wait
|
|
// LeaseDuration time first.
|
|
//
|
|
// In the default scaffold provided, the program ends immediately after
|
|
// the manager stops, so would be fine to enable this option. However,
|
|
// if you are doing or is intended to do any operation such as perform cleanups
|
|
// after the manager stops then its usage might be unsafe.
|
|
LeaderElectionReleaseOnCancel: true,
|
|
})
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to start manager")
|
|
return err
|
|
}
|
|
|
|
if err = (&controller.ObjectStoreReconciler{
|
|
Client: mgr.GetClient(),
|
|
Scheme: mgr.GetScheme(),
|
|
}).SetupWithManager(mgr); err != nil {
|
|
setupLog.Error(err, "unable to create controller", "controller", "ObjectStore")
|
|
return err
|
|
}
|
|
// +kubebuilder:scaffold:builder
|
|
|
|
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
|
|
setupLog.Error(err, "unable to set up health check")
|
|
return err
|
|
}
|
|
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
|
|
setupLog.Error(err, "unable to set up ready check")
|
|
return err
|
|
}
|
|
|
|
if err := mgr.Add(&CNPGI{
|
|
Client: mgr.GetClient(),
|
|
PluginPath: viper.GetString("plugin-path"),
|
|
ServerCertPath: viper.GetString("server-cert"),
|
|
ServerKeyPath: viper.GetString("server-key"),
|
|
ClientCertPath: viper.GetString("client-cert"),
|
|
ServerAddress: viper.GetString("server-address"),
|
|
}); err != nil {
|
|
setupLog.Error(err, "unable to create CNPGI runnable")
|
|
return err
|
|
}
|
|
|
|
setupLog.Info("starting manager")
|
|
if err := mgr.Start(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|