plugin-barman-cloud/internal/cnpgi/restore/restore.go
Marco Nenciarini 1be34fe13e
feat(ip): assign copyright to the Linux Foundation (#571)
Adopt the new attribution information for contributions to
CloudNativePG:

```
Copyright © contributors to CloudNativePG, established as
CloudNativePG a Series of LF Projects, LLC.
```

Adopt the SPDX format for Apache License 2.0

Signed-off-by: Marco Nenciarini <marco.nenciarini@enterprisedb.com>
Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com>
Co-authored-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com>
2025-10-07 18:06:06 +02:00

456 lines
13 KiB
Go

/*
Copyright © contributors to CloudNativePG, established as
CloudNativePG a Series of LF Projects, LLC.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
SPDX-License-Identifier: Apache-2.0
*/
package restore
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path"
"time"
"github.com/cloudnative-pg/barman-cloud/pkg/api"
barmanArchiver "github.com/cloudnative-pg/barman-cloud/pkg/archiver"
barmanCatalog "github.com/cloudnative-pg/barman-cloud/pkg/catalog"
barmanCommand "github.com/cloudnative-pg/barman-cloud/pkg/command"
barmanCredentials "github.com/cloudnative-pg/barman-cloud/pkg/credentials"
barmanRestorer "github.com/cloudnative-pg/barman-cloud/pkg/restorer"
barmanUtils "github.com/cloudnative-pg/barman-cloud/pkg/utils"
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
"github.com/cloudnative-pg/cloudnative-pg/pkg/postgres"
"github.com/cloudnative-pg/cloudnative-pg/pkg/utils"
restore "github.com/cloudnative-pg/cnpg-i/pkg/restore/job"
"github.com/cloudnative-pg/machinery/pkg/execlog"
"github.com/cloudnative-pg/machinery/pkg/fileutils"
"github.com/cloudnative-pg/machinery/pkg/log"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/common"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/metadata"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/operator/config"
)
const (
// ScratchDataDirectory is the directory to be used for scratch data
ScratchDataDirectory = "/controller"
// RecoveryTemporaryDirectory provides a path to store temporary files
// needed in the recovery process
RecoveryTemporaryDirectory = ScratchDataDirectory + "/recovery"
)
// JobHookImpl is the implementation of the restore job hooks
type JobHookImpl struct {
restore.UnimplementedRestoreJobHooksServer
Client client.Client
SpoolDirectory string
PgDataPath string
PgWalFolderToSymlink string
}
// GetCapabilities returns the capabilities of the restore job hooks
func (impl JobHookImpl) GetCapabilities(
_ context.Context,
_ *restore.RestoreJobHooksCapabilitiesRequest,
) (*restore.RestoreJobHooksCapabilitiesResult, error) {
return &restore.RestoreJobHooksCapabilitiesResult{
Capabilities: []*restore.RestoreJobHooksCapability{
{
Kind: restore.RestoreJobHooksCapability_KIND_RESTORE,
},
},
}, nil
}
// Restore restores the cluster from a backup
func (impl JobHookImpl) Restore(
ctx context.Context,
req *restore.RestoreRequest,
) (*restore.RestoreResponse, error) {
contextLogger := log.FromContext(ctx)
configuration, err := config.NewFromClusterJSON(req.ClusterDefinition)
if err != nil {
return nil, err
}
var recoveryObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetRecoveryBarmanObjectKey(), &recoveryObjectStore); err != nil {
return nil, err
}
if configuration.BarmanObjectName != "" {
var targetObjectStore barmancloudv1.ObjectStore
if err := impl.Client.Get(ctx, configuration.GetBarmanObjectKey(), &targetObjectStore); err != nil {
return nil, err
}
if err := impl.checkBackupDestination(
ctx,
configuration.Cluster,
&targetObjectStore.Spec.Configuration,
targetObjectStore.Name,
); err != nil {
return nil, err
}
}
// Detect the backup to recover
backup, env, err := loadBackupObjectFromExternalCluster(
ctx,
impl.Client,
configuration.Cluster,
&recoveryObjectStore.Spec.Configuration,
recoveryObjectStore.Name,
configuration.RecoveryServerName,
)
if err != nil {
return nil, err
}
if err := impl.ensureArchiveContainsLastCheckpointRedoWAL(
ctx,
env,
backup,
&recoveryObjectStore.Spec.Configuration,
); err != nil {
return nil, err
}
if err := impl.restoreDataDir(
ctx,
backup,
env,
&recoveryObjectStore.Spec.Configuration,
); err != nil {
return nil, err
}
if configuration.Cluster.Spec.WalStorage != nil {
if _, err := impl.restoreCustomWalDir(ctx); err != nil {
return nil, err
}
}
config := getRestoreWalConfig()
contextLogger.Info("sending restore response", "config", config, "env", env)
return &restore.RestoreResponse{
RestoreConfig: config,
Envs: nil,
}, nil
}
// restoreDataDir restores PGDATA from an existing backup
func (impl JobHookImpl) restoreDataDir(
ctx context.Context,
backup *cnpgv1.Backup,
env []string,
barmanConfiguration *cnpgv1.BarmanObjectStoreConfiguration,
) error {
var options []string
options, err := barmanCommand.AppendCloudProviderOptionsFromConfiguration(ctx, options, barmanConfiguration)
if err != nil {
return err
}
if backup.Status.EndpointURL != "" {
options = append(options, "--endpoint-url", backup.Status.EndpointURL)
}
options = append(options, backup.Status.DestinationPath)
options = append(options, backup.Status.ServerName)
options = append(options, backup.Status.BackupID)
options = append(options, impl.PgDataPath)
log.Info("Starting barman-cloud-restore",
"options", options)
cmd := exec.Command(barmanUtils.BarmanCloudRestore, options...) // #nosec G204
cmd.Env = env
err = execlog.RunStreaming(cmd, barmanUtils.BarmanCloudRestore)
if err != nil {
var exitError *exec.ExitError
if errors.As(err, &exitError) {
err = barmanCommand.UnmarshalBarmanCloudRestoreExitCode(exitError.ExitCode())
}
log.Error(err, "Can't restore backup")
return err
}
log.Info("Restore completed")
return nil
}
func (impl JobHookImpl) ensureArchiveContainsLastCheckpointRedoWAL(
ctx context.Context,
env []string,
backup *cnpgv1.Backup,
barmanConfiguration *cnpgv1.BarmanObjectStoreConfiguration,
) error {
// it's the full path of the file that will temporarily contain the LastCheckpointRedoWAL
const testWALPath = RecoveryTemporaryDirectory + "/test.wal"
contextLogger := log.FromContext(ctx)
defer func() {
if err := fileutils.RemoveFile(testWALPath); err != nil {
contextLogger.Error(err, "while deleting the temporary wal file: %w")
}
}()
if err := fileutils.EnsureParentDirectoryExists(testWALPath); err != nil {
return err
}
rest, err := barmanRestorer.New(ctx, env, impl.SpoolDirectory)
if err != nil {
return err
}
opts, err := barmanCommand.CloudWalRestoreOptions(ctx, barmanConfiguration, backup.Status.ServerName)
if err != nil {
return err
}
if err := rest.Restore(backup.Status.BeginWal, testWALPath, opts); err != nil {
return fmt.Errorf("encountered an error while checking the presence of first needed WAL in the archive: %w", err)
}
return nil
}
func (impl *JobHookImpl) checkBackupDestination(
ctx context.Context,
cluster *cnpgv1.Cluster,
barmanConfiguration *cnpgv1.BarmanObjectStoreConfiguration,
objectStoreName string,
) error {
// Get environment from cache
env, err := barmanCredentials.EnvSetCloudCredentialsAndCertificates(ctx,
impl.Client,
cluster.Namespace,
barmanConfiguration,
os.Environ(),
common.BuildCertificateFilePath(objectStoreName),
)
if err != nil {
return fmt.Errorf("can't get credentials for cluster %v: %w", cluster.Name, err)
}
if len(env) == 0 {
return nil
}
// Instantiate the WALArchiver to get the proper configuration
var walArchiver *barmanArchiver.WALArchiver
walArchiver, err = barmanArchiver.New(
ctx,
env,
impl.SpoolDirectory,
impl.PgDataPath,
path.Join(impl.PgDataPath, metadata.CheckEmptyWalArchiveFile))
if err != nil {
return fmt.Errorf("while creating the archiver: %w", err)
}
// TODO: refactor this code elsewhere
serverName := cluster.Name
for _, plugin := range cluster.Spec.Plugins {
if plugin.IsEnabled() && plugin.Name == metadata.PluginName {
if pluginServerName, ok := plugin.Parameters["serverName"]; ok {
serverName = pluginServerName
}
}
}
// Check if we're ok to archive in the desired destination
if utils.IsEmptyWalArchiveCheckEnabled(&cluster.ObjectMeta) {
return common.CheckBackupDestination(ctx, barmanConfiguration, walArchiver, serverName)
}
return nil
}
// restoreCustomWalDir moves the current pg_wal data to the specified custom wal dir and applies the symlink
// returns indicating if any changes were made and any error encountered in the process
func (impl JobHookImpl) restoreCustomWalDir(ctx context.Context) (bool, error) {
const pgWalDirectory = "pg_wal"
contextLogger := log.FromContext(ctx)
pgDataWal := path.Join(impl.PgDataPath, pgWalDirectory)
// if the link is already present we have nothing to do.
if linkInfo, _ := os.Readlink(pgDataWal); linkInfo == impl.PgWalFolderToSymlink {
contextLogger.Info("symlink to the WAL volume already present, skipping the custom wal dir restore")
return false, nil
}
if err := fileutils.EnsureDirectoryExists(impl.PgWalFolderToSymlink); err != nil {
return false, err
}
contextLogger.Info("restoring WAL volume symlink and transferring data")
if err := fileutils.EnsureDirectoryExists(pgDataWal); err != nil {
return false, err
}
if err := fileutils.MoveDirectoryContent(pgDataWal, impl.PgWalFolderToSymlink); err != nil {
return false, err
}
if err := fileutils.RemoveFile(pgDataWal); err != nil {
return false, err
}
return true, os.Symlink(impl.PgWalFolderToSymlink, pgDataWal)
}
// getRestoreWalConfig obtains the content to append to `custom.conf` allowing PostgreSQL
// to complete the WAL recovery from the object storage and then start
// as a new primary
func getRestoreWalConfig() string {
restoreCmd := fmt.Sprintf(
"/controller/manager wal-restore --log-destination %s/%s.json %%f %%p",
postgres.LogPath, postgres.LogFileName)
recoveryFileContents := fmt.Sprintf(
"recovery_target_action = promote\n"+
"restore_command = '%s'\n",
restoreCmd)
return recoveryFileContents
}
// loadBackupObjectFromExternalCluster generates an in-memory Backup structure given a reference to
// an external cluster, loading the required information from the object store
func loadBackupObjectFromExternalCluster(
ctx context.Context,
typedClient client.Client,
cluster *cnpgv1.Cluster,
recoveryObjectStore *api.BarmanObjectStoreConfiguration,
recoveryObjectStoreName string,
serverName string,
) (*cnpgv1.Backup, []string, error) {
contextLogger := log.FromContext(ctx)
contextLogger.Info("Recovering from external cluster",
"serverName", serverName,
"objectStore", recoveryObjectStore)
env, err := barmanCredentials.EnvSetCloudCredentialsAndCertificates(
ctx,
typedClient,
cluster.Namespace,
recoveryObjectStore,
os.Environ(),
common.BuildCertificateFilePath(recoveryObjectStoreName))
if err != nil {
return nil, nil, err
}
contextLogger.Info("Downloading backup catalog")
backupCatalog, err := barmanCommand.GetBackupList(ctx, recoveryObjectStore, serverName, env)
if err != nil {
return nil, nil, err
}
contextLogger.Info("Downloaded backup catalog", "backupCatalog", shortenedCatalog(backupCatalog))
// We are now choosing the right backup to restore
var targetBackup *barmanCatalog.BarmanBackup
if cluster.Spec.Bootstrap.Recovery != nil &&
cluster.Spec.Bootstrap.Recovery.RecoveryTarget != nil {
targetBackup, err = backupCatalog.FindBackupInfo(
cluster.Spec.Bootstrap.Recovery.RecoveryTarget,
)
if err != nil {
return nil, nil, err
}
} else {
targetBackup = backupCatalog.LatestBackupInfo()
}
if targetBackup == nil {
return nil, nil, fmt.Errorf("no target backup found")
}
contextLogger.Info("Target backup found", "backup", targetBackup)
return &cnpgv1.Backup{
Spec: cnpgv1.BackupSpec{
Cluster: cnpgv1.LocalObjectReference{
Name: serverName,
},
},
Status: cnpgv1.BackupStatus{
BarmanCredentials: recoveryObjectStore.BarmanCredentials,
EndpointCA: recoveryObjectStore.EndpointCA,
EndpointURL: recoveryObjectStore.EndpointURL,
DestinationPath: recoveryObjectStore.DestinationPath,
ServerName: serverName,
BackupID: targetBackup.ID,
Phase: cnpgv1.BackupPhaseCompleted,
StartedAt: &metav1.Time{Time: targetBackup.BeginTime},
StoppedAt: &metav1.Time{Time: targetBackup.EndTime},
BeginWal: targetBackup.BeginWal,
EndWal: targetBackup.EndWal,
BeginLSN: targetBackup.BeginLSN,
EndLSN: targetBackup.EndLSN,
Error: targetBackup.Error,
CommandOutput: "",
CommandError: "",
},
}, env, nil
}
// ShortBackupCatalogEntry is used when logging the downloaded backup
// catalog and contains only the fields used for debugging
type ShortBackupCatalogEntry struct {
// BackupID is the backup ID
BackupID string `json:"backupID"`
// StartTime is the backup time
StartTime time.Time `json:"startTime"`
// EndTime is the end time
EndTime time.Time `json:"endTime"`
}
// shortenedCatalog creates a structure containing the debug information
// of a backup catalog.
func shortenedCatalog(catalog *barmanCatalog.Catalog) []ShortBackupCatalogEntry {
if catalog == nil {
return nil
}
result := make([]ShortBackupCatalogEntry, len(catalog.List))
for i, v := range catalog.List {
result[i].BackupID = v.BackupName
result[i].StartTime = v.BeginTime
result[i].EndTime = v.EndTime
}
return result
}