plugin-barman-cloud/internal/cnpgi/instance/recovery_window.go
Gabriel Jose Mouallem Rodrigues eefb45ca5a fix: add retry logic to updateRecoveryWindow for concurrent status updates
When backup completion and retention policy enforcement run concurrently,
both call updateRecoveryWindow to update the ObjectStore status. This can
cause "object has been modified" errors due to Kubernetes optimistic
concurrency control.

This change wraps the status update in retry.RetryOnConflict, matching
the pattern already used in setLastFailedBackupTime in the same file.
The retry logic fetches a fresh copy of the ObjectStore before each
update attempt, ensuring the resourceVersion is current.

Fixes #758

Signed-off-by: Gabriel Mouallem <gabriel@latitude.sh>
2026-02-03 15:01:30 -03:00

100 lines
3.2 KiB
Go

/*
Copyright © contributors to CloudNativePG, established as
CloudNativePG a Series of LF Projects, LLC.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
SPDX-License-Identifier: Apache-2.0
*/
package instance
import (
"context"
"time"
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
)
// updateRecoveryWindow updates the recovery window inside the object
// store status subresource. It uses retry logic to handle concurrent
// updates from backup completion and retention policy enforcement.
func updateRecoveryWindow(
ctx context.Context,
c client.Client,
backupList *catalog.Catalog,
objectStore *barmancloudv1.ObjectStore,
serverName string,
) error {
objectStoreKey := client.ObjectKeyFromObject(objectStore)
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
var freshObjectStore barmancloudv1.ObjectStore
if err := c.Get(ctx, objectStoreKey, &freshObjectStore); err != nil {
return err
}
// Set the recovery window inside the barman object store object
convertTime := func(t *time.Time) *metav1.Time {
if t == nil {
return nil
}
return ptr.To(metav1.NewTime(*t))
}
recoveryWindow := freshObjectStore.Status.ServerRecoveryWindow[serverName]
recoveryWindow.FirstRecoverabilityPoint = convertTime(backupList.GetFirstRecoverabilityPoint())
recoveryWindow.LastSuccessfulBackupTime = convertTime(backupList.GetLastSuccessfulBackupTime())
if freshObjectStore.Status.ServerRecoveryWindow == nil {
freshObjectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
}
freshObjectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
return c.Status().Update(ctx, &freshObjectStore)
})
}
// setLastFailedBackupTime sets the last failed backup time in the
// passed object store, for the passed server name.
func setLastFailedBackupTime(
ctx context.Context,
c client.Client,
objectStoreKey client.ObjectKey,
serverName string,
lastFailedBackupTime time.Time,
) error {
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
var objectStore barmancloudv1.ObjectStore
if err := c.Get(ctx, objectStoreKey, &objectStore); err != nil {
return err
}
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
recoveryWindow.LastFailedBackupTime = ptr.To(metav1.NewTime(lastFailedBackupTime))
if objectStore.Status.ServerRecoveryWindow == nil {
objectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
}
objectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
return c.Status().Update(ctx, &objectStore)
})
}