mirror of
https://github.com/cloudnative-pg/plugin-barman-cloud.git
synced 2026-01-11 13:23:09 +01:00
feat: last failed backup status field and metric (#467)
Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> Signed-off-by: Gabriele Bartolini <gabriele.bartolini@enterprisedb.com> Co-authored-by: Gabriele Bartolini <gabriele.bartolini@enterprisedb.com>
This commit is contained in:
parent
32a5539c18
commit
551a3cde09
@ -75,6 +75,9 @@ type RecoveryWindow struct {
|
||||
|
||||
// The last successful backup time
|
||||
LastSuccessfulBackupTime *metav1.Time `json:"lastSuccussfulBackupTime,omitempty"`
|
||||
|
||||
// The last failed backup time
|
||||
LastFailedBackupTime *metav1.Time `json:"lastFailedBackupTime,omitempty"`
|
||||
}
|
||||
|
||||
// +kubebuilder:object:root=true
|
||||
|
||||
@ -157,6 +157,10 @@ func (in *RecoveryWindow) DeepCopyInto(out *RecoveryWindow) {
|
||||
in, out := &in.LastSuccessfulBackupTime, &out.LastSuccessfulBackupTime
|
||||
*out = (*in).DeepCopy()
|
||||
}
|
||||
if in.LastFailedBackupTime != nil {
|
||||
in, out := &in.LastFailedBackupTime, &out.LastFailedBackupTime
|
||||
*out = (*in).DeepCopy()
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecoveryWindow.
|
||||
|
||||
@ -609,6 +609,10 @@ spec:
|
||||
restored.
|
||||
format: date-time
|
||||
type: string
|
||||
lastFailedBackupTime:
|
||||
description: The last failed backup time
|
||||
format: date-time
|
||||
type: string
|
||||
lastSuccussfulBackupTime:
|
||||
description: The last successful backup time
|
||||
format: date-time
|
||||
|
||||
@ -15,6 +15,7 @@ import (
|
||||
"github.com/cloudnative-pg/machinery/pkg/log"
|
||||
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/util/retry"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
|
||||
@ -101,6 +102,13 @@ func (b BackupServiceImplementation) Backup(
|
||||
postgres.BackupTemporaryDirectory,
|
||||
); err != nil {
|
||||
contextLogger.Error(err, "while taking backup")
|
||||
|
||||
if failureHandlerError := b.handleBackupError(ctx, configuration); failureHandlerError != nil {
|
||||
contextLogger.Error(
|
||||
failureHandlerError,
|
||||
"Error while handling backup failure, skipping. "+
|
||||
"BarmanObjectStore object may be not up to date.")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -166,3 +174,18 @@ func (b BackupServiceImplementation) Backup(
|
||||
Metadata: newBackupResultMetadata(configuration.Cluster.ObjectMeta.UID, executedBackupInfo.TimeLine).toMap(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (b BackupServiceImplementation) handleBackupError(ctx context.Context, cfg *config.PluginConfiguration) error {
|
||||
return retry.RetryOnConflict(
|
||||
retry.DefaultBackoff,
|
||||
func() error {
|
||||
return setLastFailedBackupTime(
|
||||
ctx,
|
||||
b.Client,
|
||||
cfg.GetBarmanObjectKey(),
|
||||
cfg.ServerName,
|
||||
time.Now(),
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@ -31,6 +31,7 @@ func buildFqName(name string) string {
|
||||
var (
|
||||
firstRecoverabilityPointMetricName = buildFqName("first_recoverability_point")
|
||||
lastAvailableBackupTimestampMetricName = buildFqName("last_available_backup_timestamp")
|
||||
lastFailedBackupTimestampMetricName = buildFqName("last_failed_backup_timestamp")
|
||||
)
|
||||
|
||||
func (m metricsImpl) GetCapabilities(
|
||||
@ -72,6 +73,11 @@ func (m metricsImpl) Define(
|
||||
Help: "The last available backup as a unix timestamp",
|
||||
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
|
||||
},
|
||||
{
|
||||
FqName: lastFailedBackupTimestampMetricName,
|
||||
Help: "The last failed backup as a unix timestamp",
|
||||
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
@ -107,18 +113,26 @@ func (m metricsImpl) Collect(
|
||||
FqName: lastAvailableBackupTimestampMetricName,
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
FqName: lastFailedBackupTimestampMetricName,
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
var firstRecoverabilityPoint float64
|
||||
var lastAvailableBackup float64
|
||||
var lastFailedBackup float64
|
||||
if x.FirstRecoverabilityPoint != nil {
|
||||
firstRecoverabilityPoint = float64(x.FirstRecoverabilityPoint.Unix())
|
||||
}
|
||||
if x.LastSuccessfulBackupTime != nil {
|
||||
lastAvailableBackup = float64(x.LastSuccessfulBackupTime.Unix())
|
||||
}
|
||||
if x.LastFailedBackupTime != nil {
|
||||
lastFailedBackup = float64(x.LastFailedBackupTime.Unix())
|
||||
}
|
||||
|
||||
return &metrics.CollectMetricsResult{
|
||||
Metrics: []*metrics.CollectMetric{
|
||||
@ -130,6 +144,10 @@ func (m metricsImpl) Collect(
|
||||
FqName: lastAvailableBackupTimestampMetricName,
|
||||
Value: lastAvailableBackup,
|
||||
},
|
||||
{
|
||||
FqName: lastFailedBackupTimestampMetricName,
|
||||
Value: lastFailedBackup,
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/util/retry"
|
||||
"k8s.io/utils/ptr"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
@ -29,10 +30,9 @@ func updateRecoveryWindow(
|
||||
return ptr.To(metav1.NewTime(*t))
|
||||
}
|
||||
|
||||
recoveryWindow := barmancloudv1.RecoveryWindow{
|
||||
FirstRecoverabilityPoint: convertTime(backupList.GetFirstRecoverabilityPoint()),
|
||||
LastSuccessfulBackupTime: convertTime(backupList.GetLastSuccessfulBackupTime()),
|
||||
}
|
||||
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
|
||||
recoveryWindow.FirstRecoverabilityPoint = convertTime(backupList.GetFirstRecoverabilityPoint())
|
||||
recoveryWindow.LastSuccessfulBackupTime = convertTime(backupList.GetLastSuccessfulBackupTime())
|
||||
|
||||
if objectStore.Status.ServerRecoveryWindow == nil {
|
||||
objectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
|
||||
@ -41,3 +41,25 @@ func updateRecoveryWindow(
|
||||
|
||||
return c.Status().Update(ctx, objectStore)
|
||||
}
|
||||
|
||||
// setLastFailedBackupTime sets the last failed backup time in the
|
||||
// passed object store, for the passed server name.
|
||||
func setLastFailedBackupTime(
|
||||
ctx context.Context,
|
||||
c client.Client,
|
||||
objectStoreKey client.ObjectKey,
|
||||
serverName string,
|
||||
lastFailedBackupTime time.Time,
|
||||
) error {
|
||||
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
|
||||
var objectStore barmancloudv1.ObjectStore
|
||||
|
||||
if err := c.Get(ctx, objectStoreKey, &objectStore); err != nil {
|
||||
return err
|
||||
}
|
||||
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
|
||||
recoveryWindow.LastFailedBackupTime = ptr.To(metav1.NewTime(lastFailedBackupTime))
|
||||
objectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
|
||||
return c.Status().Update(ctx, &objectStore)
|
||||
})
|
||||
}
|
||||
|
||||
@ -101,5 +101,6 @@ _Appears in:_
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `firstRecoverabilityPoint` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The first recoverability point in a PostgreSQL server refers to<br />the earliest point in time to which the database can be<br />restored. | True | | |
|
||||
| `lastSuccussfulBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last successful backup time | True | | |
|
||||
| `lastFailedBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last failed backup time | True | | |
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user