mirror of
https://github.com/cloudnative-pg/plugin-barman-cloud.git
synced 2026-01-11 21:23:12 +01:00
feat: last failed backup status field and metric (#467)
Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> Signed-off-by: Gabriele Bartolini <gabriele.bartolini@enterprisedb.com> Co-authored-by: Gabriele Bartolini <gabriele.bartolini@enterprisedb.com>
This commit is contained in:
parent
32a5539c18
commit
551a3cde09
@ -75,6 +75,9 @@ type RecoveryWindow struct {
|
|||||||
|
|
||||||
// The last successful backup time
|
// The last successful backup time
|
||||||
LastSuccessfulBackupTime *metav1.Time `json:"lastSuccussfulBackupTime,omitempty"`
|
LastSuccessfulBackupTime *metav1.Time `json:"lastSuccussfulBackupTime,omitempty"`
|
||||||
|
|
||||||
|
// The last failed backup time
|
||||||
|
LastFailedBackupTime *metav1.Time `json:"lastFailedBackupTime,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// +kubebuilder:object:root=true
|
// +kubebuilder:object:root=true
|
||||||
|
|||||||
@ -157,6 +157,10 @@ func (in *RecoveryWindow) DeepCopyInto(out *RecoveryWindow) {
|
|||||||
in, out := &in.LastSuccessfulBackupTime, &out.LastSuccessfulBackupTime
|
in, out := &in.LastSuccessfulBackupTime, &out.LastSuccessfulBackupTime
|
||||||
*out = (*in).DeepCopy()
|
*out = (*in).DeepCopy()
|
||||||
}
|
}
|
||||||
|
if in.LastFailedBackupTime != nil {
|
||||||
|
in, out := &in.LastFailedBackupTime, &out.LastFailedBackupTime
|
||||||
|
*out = (*in).DeepCopy()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecoveryWindow.
|
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecoveryWindow.
|
||||||
|
|||||||
@ -609,6 +609,10 @@ spec:
|
|||||||
restored.
|
restored.
|
||||||
format: date-time
|
format: date-time
|
||||||
type: string
|
type: string
|
||||||
|
lastFailedBackupTime:
|
||||||
|
description: The last failed backup time
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
lastSuccussfulBackupTime:
|
lastSuccussfulBackupTime:
|
||||||
description: The last successful backup time
|
description: The last successful backup time
|
||||||
format: date-time
|
format: date-time
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/cloudnative-pg/machinery/pkg/log"
|
"github.com/cloudnative-pg/machinery/pkg/log"
|
||||||
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
|
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/client-go/util/retry"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
|
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
|
||||||
@ -101,6 +102,13 @@ func (b BackupServiceImplementation) Backup(
|
|||||||
postgres.BackupTemporaryDirectory,
|
postgres.BackupTemporaryDirectory,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
contextLogger.Error(err, "while taking backup")
|
contextLogger.Error(err, "while taking backup")
|
||||||
|
|
||||||
|
if failureHandlerError := b.handleBackupError(ctx, configuration); failureHandlerError != nil {
|
||||||
|
contextLogger.Error(
|
||||||
|
failureHandlerError,
|
||||||
|
"Error while handling backup failure, skipping. "+
|
||||||
|
"BarmanObjectStore object may be not up to date.")
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,3 +174,18 @@ func (b BackupServiceImplementation) Backup(
|
|||||||
Metadata: newBackupResultMetadata(configuration.Cluster.ObjectMeta.UID, executedBackupInfo.TimeLine).toMap(),
|
Metadata: newBackupResultMetadata(configuration.Cluster.ObjectMeta.UID, executedBackupInfo.TimeLine).toMap(),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b BackupServiceImplementation) handleBackupError(ctx context.Context, cfg *config.PluginConfiguration) error {
|
||||||
|
return retry.RetryOnConflict(
|
||||||
|
retry.DefaultBackoff,
|
||||||
|
func() error {
|
||||||
|
return setLastFailedBackupTime(
|
||||||
|
ctx,
|
||||||
|
b.Client,
|
||||||
|
cfg.GetBarmanObjectKey(),
|
||||||
|
cfg.ServerName,
|
||||||
|
time.Now(),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|||||||
@ -31,6 +31,7 @@ func buildFqName(name string) string {
|
|||||||
var (
|
var (
|
||||||
firstRecoverabilityPointMetricName = buildFqName("first_recoverability_point")
|
firstRecoverabilityPointMetricName = buildFqName("first_recoverability_point")
|
||||||
lastAvailableBackupTimestampMetricName = buildFqName("last_available_backup_timestamp")
|
lastAvailableBackupTimestampMetricName = buildFqName("last_available_backup_timestamp")
|
||||||
|
lastFailedBackupTimestampMetricName = buildFqName("last_failed_backup_timestamp")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (m metricsImpl) GetCapabilities(
|
func (m metricsImpl) GetCapabilities(
|
||||||
@ -72,6 +73,11 @@ func (m metricsImpl) Define(
|
|||||||
Help: "The last available backup as a unix timestamp",
|
Help: "The last available backup as a unix timestamp",
|
||||||
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
|
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FqName: lastFailedBackupTimestampMetricName,
|
||||||
|
Help: "The last failed backup as a unix timestamp",
|
||||||
|
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@ -107,18 +113,26 @@ func (m metricsImpl) Collect(
|
|||||||
FqName: lastAvailableBackupTimestampMetricName,
|
FqName: lastAvailableBackupTimestampMetricName,
|
||||||
Value: 0,
|
Value: 0,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FqName: lastFailedBackupTimestampMetricName,
|
||||||
|
Value: 0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var firstRecoverabilityPoint float64
|
var firstRecoverabilityPoint float64
|
||||||
var lastAvailableBackup float64
|
var lastAvailableBackup float64
|
||||||
|
var lastFailedBackup float64
|
||||||
if x.FirstRecoverabilityPoint != nil {
|
if x.FirstRecoverabilityPoint != nil {
|
||||||
firstRecoverabilityPoint = float64(x.FirstRecoverabilityPoint.Unix())
|
firstRecoverabilityPoint = float64(x.FirstRecoverabilityPoint.Unix())
|
||||||
}
|
}
|
||||||
if x.LastSuccessfulBackupTime != nil {
|
if x.LastSuccessfulBackupTime != nil {
|
||||||
lastAvailableBackup = float64(x.LastSuccessfulBackupTime.Unix())
|
lastAvailableBackup = float64(x.LastSuccessfulBackupTime.Unix())
|
||||||
}
|
}
|
||||||
|
if x.LastFailedBackupTime != nil {
|
||||||
|
lastFailedBackup = float64(x.LastFailedBackupTime.Unix())
|
||||||
|
}
|
||||||
|
|
||||||
return &metrics.CollectMetricsResult{
|
return &metrics.CollectMetricsResult{
|
||||||
Metrics: []*metrics.CollectMetric{
|
Metrics: []*metrics.CollectMetric{
|
||||||
@ -130,6 +144,10 @@ func (m metricsImpl) Collect(
|
|||||||
FqName: lastAvailableBackupTimestampMetricName,
|
FqName: lastAvailableBackupTimestampMetricName,
|
||||||
Value: lastAvailableBackup,
|
Value: lastAvailableBackup,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FqName: lastFailedBackupTimestampMetricName,
|
||||||
|
Value: lastFailedBackup,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import (
|
|||||||
|
|
||||||
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
|
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/client-go/util/retry"
|
||||||
"k8s.io/utils/ptr"
|
"k8s.io/utils/ptr"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
@ -29,10 +30,9 @@ func updateRecoveryWindow(
|
|||||||
return ptr.To(metav1.NewTime(*t))
|
return ptr.To(metav1.NewTime(*t))
|
||||||
}
|
}
|
||||||
|
|
||||||
recoveryWindow := barmancloudv1.RecoveryWindow{
|
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
|
||||||
FirstRecoverabilityPoint: convertTime(backupList.GetFirstRecoverabilityPoint()),
|
recoveryWindow.FirstRecoverabilityPoint = convertTime(backupList.GetFirstRecoverabilityPoint())
|
||||||
LastSuccessfulBackupTime: convertTime(backupList.GetLastSuccessfulBackupTime()),
|
recoveryWindow.LastSuccessfulBackupTime = convertTime(backupList.GetLastSuccessfulBackupTime())
|
||||||
}
|
|
||||||
|
|
||||||
if objectStore.Status.ServerRecoveryWindow == nil {
|
if objectStore.Status.ServerRecoveryWindow == nil {
|
||||||
objectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
|
objectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
|
||||||
@ -41,3 +41,25 @@ func updateRecoveryWindow(
|
|||||||
|
|
||||||
return c.Status().Update(ctx, objectStore)
|
return c.Status().Update(ctx, objectStore)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setLastFailedBackupTime sets the last failed backup time in the
|
||||||
|
// passed object store, for the passed server name.
|
||||||
|
func setLastFailedBackupTime(
|
||||||
|
ctx context.Context,
|
||||||
|
c client.Client,
|
||||||
|
objectStoreKey client.ObjectKey,
|
||||||
|
serverName string,
|
||||||
|
lastFailedBackupTime time.Time,
|
||||||
|
) error {
|
||||||
|
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
|
||||||
|
var objectStore barmancloudv1.ObjectStore
|
||||||
|
|
||||||
|
if err := c.Get(ctx, objectStoreKey, &objectStore); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
|
||||||
|
recoveryWindow.LastFailedBackupTime = ptr.To(metav1.NewTime(lastFailedBackupTime))
|
||||||
|
objectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
|
||||||
|
return c.Status().Update(ctx, &objectStore)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -101,5 +101,6 @@ _Appears in:_
|
|||||||
| --- | --- | --- | --- | --- |
|
| --- | --- | --- | --- | --- |
|
||||||
| `firstRecoverabilityPoint` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The first recoverability point in a PostgreSQL server refers to<br />the earliest point in time to which the database can be<br />restored. | True | | |
|
| `firstRecoverabilityPoint` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The first recoverability point in a PostgreSQL server refers to<br />the earliest point in time to which the database can be<br />restored. | True | | |
|
||||||
| `lastSuccussfulBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last successful backup time | True | | |
|
| `lastSuccussfulBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last successful backup time | True | | |
|
||||||
|
| `lastFailedBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last failed backup time | True | | |
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user