Skip to content

Commit

Permalink
Add support for section-based backup and restore
Browse files Browse the repository at this point in the history
To give users more granular control over which parts of a database are
backed up or restored, this commit adds a new --sections flag that
allows users to choose which backup "sections" (globals, predata, data,
postdata, and/or statistics, the same sections currently specified in
the ToC files) will be included in a specific backup or restore.

This flag covers the same functionality as --data-only, --metadata-only,
--with-globals, --with-stats, and --without-globals, so those flags have
been deprecated.  They are retained for backwards compatibility, but may
not be used in conjunction with --sections.

Co-authored-by: Jamie McAtamney <jmcatamney@vmware.com>
Co-authored-by: Andrew Repp <reppa@vmware.com>
  • Loading branch information
jmcatamney and AJR-VMware committed Nov 1, 2023
1 parent c0fc98b commit 6bb94bd
Show file tree
Hide file tree
Showing 32 changed files with 711 additions and 333 deletions.
37 changes: 21 additions & 16 deletions backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ func DoSetup() {

utils.CheckGpexpandRunning(utils.BackupPreventedByGpexpandMessage)
timestamp := history.CurrentTimestamp()

// Need to get sections this early to handle lock file location behavior
BackupSections = options.GetSections(cmdFlags)
ValidateBackupSections(cmdFlags)
createBackupLockFile(timestamp)

initializeConnectionPool(timestamp)
gplog.Info("Greenplum Database Version = %s", connectionPool.Version.VersionString)

Expand All @@ -69,7 +74,7 @@ func DoSetup() {
clusterConfigConn.Close()

globalFPInfo = filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), timestamp, "")
if MustGetFlagBool(options.METADATA_ONLY) {
if !BackupSections.Data {
_, err = globalCluster.ExecuteLocalCommand(fmt.Sprintf("mkdir -p %s", globalFPInfo.GetDirForContent(-1)))
gplog.FatalOnError(err)
} else {
Expand Down Expand Up @@ -124,12 +129,12 @@ func DoBackup() {
gplog.Info("Gathering table state information")
metadataTables, dataTables := RetrieveAndProcessTables()
dataTables, numExtOrForeignTables := GetBackupDataSet(dataTables)
if len(dataTables) == 0 && !backupReport.MetadataOnly {
if len(dataTables) == 0 && backupReport.Data {
gplog.Warn("No tables in backup set contain data. Performing metadata-only backup instead.")
backupReport.MetadataOnly = true
backupReport.Data = false
}
// This must be a full backup with --leaf-parition-data to query for incremental metadata
if !(MustGetFlagBool(options.METADATA_ONLY) || MustGetFlagBool(options.DATA_ONLY)) && MustGetFlagBool(options.LEAF_PARTITION_DATA) {
if backupReport.Data && backupReport.Predata && MustGetFlagBool(options.LEAF_PARTITION_DATA) {
backupIncrementalMetadata()
} else {
gplog.Verbose("Skipping query for incremental metadata.")
Expand All @@ -140,12 +145,12 @@ func DoBackup() {
metadataFile := utils.NewFileWithByteCountFromFile(metadataFilename)

/*
* We check this in the backup report rather than the flag because we
* We check this in the backup report rather than the flags because we
* perform a metadata only backup if the database contains no tables
* or only external tables
*/
backupSetTables := dataTables
if !backupReport.MetadataOnly {
if backupReport.Data {
targetBackupRestorePlan := make([]history.RestorePlanEntry, 0)
if targetBackupTimestamp != "" {
gplog.Info("Basing incremental backup off of backup with timestamp = %s", targetBackupTimestamp)
Expand All @@ -172,23 +177,23 @@ func DoBackup() {
}

backupSessionGUC(metadataFile)
if !MustGetFlagBool(options.DATA_ONLY) {
isFullBackup := len(MustGetFlagStringArray(options.INCLUDE_RELATION)) == 0
if isFullBackup && !MustGetFlagBool(options.WITHOUT_GLOBALS) {
backupGlobals(metadataFile)
}

isFilteredBackup := !isFullBackup
isFilteredBackup := len(MustGetFlagStringArray(options.INCLUDE_RELATION)) != 0
if backupReport.Globals && !isFilteredBackup {
backupGlobals(metadataFile)
}
if backupReport.Predata {
backupPredata(metadataFile, metadataTables, isFilteredBackup)
}
if backupReport.Postdata {
backupPostdata(metadataFile)
}

if !backupReport.MetadataOnly {
if backupReport.Data {
backupData(backupSetTables)
}

printDataBackupWarnings(numExtOrForeignTables)
if MustGetFlagBool(options.WITH_STATS) {
if backupReport.Statistics {
backupStatistics(metadataTables)
}

Expand All @@ -205,7 +210,7 @@ func DoBackup() {
if pluginConfigFlag != "" {
pluginConfig.MustBackupFile(metadataFilename)
pluginConfig.MustBackupFile(globalFPInfo.GetTOCFilePath())
if MustGetFlagBool(options.WITH_STATS) {
if backupReport.Statistics {
pluginConfig.MustBackupFile(globalFPInfo.GetStatisticsFilePath())
}
_ = utils.CopyFile(pluginConfigFlag, globalFPInfo.GetPluginConfigPath())
Expand Down
2 changes: 1 addition & 1 deletion backup/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ func GetBackupDataSet(tables []Table) ([]Table, int64) {
var backupDataSet []Table
var numExtOrForeignTables int64

if !backupReport.MetadataOnly {
if backupReport.Data {
for _, table := range tables {
if !table.SkipDataBackup() {
backupDataSet = append(backupDataSet, table)
Expand Down
4 changes: 2 additions & 2 deletions backup/data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ var _ = Describe("backup/data tests", func() {
config := history.BackupConfig{}
var testTable backup.Table
BeforeEach(func() {
config.MetadataOnly = false
config.Sections = options.Sections{Globals: true, Predata: true, Data: true, Postdata: true}
backup.SetReport(&report.Report{BackupConfig: config})
testTable = backup.Table{
Relation: backup.Relation{Schema: "public", Name: "testtable"},
Expand Down Expand Up @@ -229,7 +229,7 @@ var _ = Describe("backup/data tests", func() {
Expect(numExtOrForeignTables).To(Equal(int64(0)))
})
It("returns an empty set, if --metadata-only flag is set and a regular table is given", func() {
config.MetadataOnly = true
config.Data = false
backup.SetReport(&report.Report{BackupConfig: config})
tables := []backup.Table{testTable}

Expand Down
3 changes: 3 additions & 0 deletions backup/global_variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ var (
// Allow global access to pre-processed include and exclude table lists
IncludedRelationFqns []options.Relation
ExcludedRelationFqns []options.Relation

// sections to backup
BackupSections options.Sections
)

/*
Expand Down
59 changes: 20 additions & 39 deletions backup/incremental_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/greenplum-db/gpbackup/backup"
"github.com/greenplum-db/gpbackup/filepath"
"github.com/greenplum-db/gpbackup/history"
"github.com/greenplum-db/gpbackup/options"
"github.com/greenplum-db/gpbackup/report"
"github.com/greenplum-db/gpbackup/testutils"
"github.com/greenplum-db/gpbackup/toc"
Expand All @@ -18,6 +19,21 @@ import (
. "github.com/onsi/gomega/gbytes"
)

func basicBackupConfig(databaseName string, timestamp string, status string) history.BackupConfig {
return history.BackupConfig{
DatabaseName: databaseName,
Timestamp: timestamp,
Status: status,
ExcludeRelations: []string{},
ExcludeSchemas: []string{},
IncludeRelations: []string{},
IncludeSchemas: []string{},
RestorePlan: []history.RestorePlanEntry{},
Sections: options.Sections{},
DeprecatedMetadata: options.DeprecatedMetadata{},
}
}

var _ = Describe("backup/incremental tests", func() {
Describe("FilterTablesForIncremental", func() {
defaultEntry := toc.AOEntry{
Expand Down Expand Up @@ -83,45 +99,10 @@ var _ = Describe("backup/incremental tests", func() {
Describe("GetLatestMatchingBackupConfig", func() {
historyDBPath := "/tmp/hist.db"
contents := []history.BackupConfig{
{
DatabaseName: "test2",
Timestamp: "timestamp4",
Status: history.BackupStatusFailed,
ExcludeRelations: []string{},
ExcludeSchemas: []string{},
IncludeRelations: []string{},
IncludeSchemas: []string{},
RestorePlan: []history.RestorePlanEntry{},
},
{
DatabaseName: "test1",
Timestamp: "timestamp3",
Status: history.BackupStatusSucceed,
ExcludeRelations: []string{},
ExcludeSchemas: []string{},
IncludeRelations: []string{},
IncludeSchemas: []string{},
RestorePlan: []history.RestorePlanEntry{}},
{
DatabaseName: "test2",
Timestamp: "timestamp2",
Status: history.BackupStatusSucceed,
ExcludeRelations: []string{},
ExcludeSchemas: []string{},
IncludeRelations: []string{},
IncludeSchemas: []string{},
RestorePlan: []history.RestorePlanEntry{},
},
{
DatabaseName: "test1",
Timestamp: "timestamp1",
Status: history.BackupStatusSucceed,
ExcludeRelations: []string{},
ExcludeSchemas: []string{},
IncludeRelations: []string{},
IncludeSchemas: []string{},
RestorePlan: []history.RestorePlanEntry{},
},
basicBackupConfig("test2", "timestamp4", history.BackupStatusFailed),
basicBackupConfig("test1", "timestamp3", history.BackupStatusSucceed),
basicBackupConfig("test2", "timestamp2", history.BackupStatusSucceed),
basicBackupConfig("test1", "timestamp1", history.BackupStatusSucceed),
}
BeforeEach(func() {
os.Remove(historyDBPath)
Expand Down
2 changes: 1 addition & 1 deletion backup/queries_acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ func (dp DefaultPrivileges) GetMetadataEntry() (string, toc.MetadataEntry) {
toc.MetadataEntry{
Schema: dp.Schema,
Name: "",
ObjectType: "DEFAULT PRIVILEGES",
ObjectType: toc.OBJ_DEFAULT_PRIVILEGES,
ReferenceObject: "",
StartByte: 0,
EndByte: 0,
Expand Down
2 changes: 1 addition & 1 deletion backup/queries_externals.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func (pi PartitionInfo) GetMetadataEntry() (string, toc.MetadataEntry) {
toc.MetadataEntry{
Schema: pi.ParentSchema,
Name: pi.ParentRelationName,
ObjectType: "EXCHANGE PARTITION",
ObjectType: toc.OBJ_EXCHANGE_PARTITION,
ReferenceObject: "",
StartByte: 0,
EndByte: 0,
Expand Down
4 changes: 2 additions & 2 deletions backup/queries_postdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (i IndexDefinition) GetMetadataEntry() (string, toc.MetadataEntry) {
toc.MetadataEntry{
Schema: i.OwningSchema,
Name: i.Name,
ObjectType: "INDEX",
ObjectType: toc.OBJ_INDEX,
ReferenceObject: tableFQN,
StartByte: 0,
EndByte: 0,
Expand Down Expand Up @@ -565,7 +565,7 @@ func (p RLSPolicy) GetMetadataEntry() (string, toc.MetadataEntry) {
toc.MetadataEntry{
Schema: p.Schema,
Name: p.Table,
ObjectType: "POLICY",
ObjectType: toc.OBJ_POLICY,
ReferenceObject: tableFQN,
StartByte: 0,
EndByte: 0,
Expand Down
2 changes: 1 addition & 1 deletion backup/queries_relations.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ func GetAllViews(connectionPool *dbconn.DBConn) []View {
// locks for the target relations. This is mainly to protect the metadata
// dumping part but it also makes the main worker thread (worker 0) the
// most resilient for the later data dumping logic. Locks will still be
// taken for --data-only calls.
// taken when only backing up the data section.
func LockTables(connectionPool *dbconn.DBConn, tables []Relation) {
gplog.Info("Acquiring ACCESS SHARE locks on tables")

Expand Down
29 changes: 26 additions & 3 deletions backup/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,21 @@ func ValidateTablesExist(conn *dbconn.DBConn, tableList []string, excludeSet boo
}

func validateFlagCombinations(flags *pflag.FlagSet) {
options.CheckExclusiveFlags(flags, options.DEBUG, options.QUIET, options.VERBOSE)
// TODO: remove these three checks once the deprecated flags are removed
options.CheckExclusiveFlags(flags, options.JOBS, options.METADATA_ONLY, options.SINGLE_DATA_FILE)
options.CheckExclusiveFlags(flags, options.DATA_ONLY, options.METADATA_ONLY, options.INCREMENTAL)
options.CheckExclusiveFlags(flags, options.METADATA_ONLY, options.LEAF_PARTITION_DATA)

// Do not allow mixing --sections with the legacy flags
options.CheckExclusiveFlags(flags, options.SECTIONS, options.METADATA_ONLY)
options.CheckExclusiveFlags(flags, options.SECTIONS, options.DATA_ONLY)
options.CheckExclusiveFlags(flags, options.SECTIONS, options.WITHOUT_GLOBALS)
options.CheckExclusiveFlags(flags, options.SECTIONS, options.WITH_STATS)

options.CheckExclusiveFlags(flags, options.DEBUG, options.QUIET, options.VERBOSE)
options.CheckExclusiveFlags(flags, options.INCLUDE_SCHEMA, options.INCLUDE_SCHEMA_FILE, options.INCLUDE_RELATION, options.INCLUDE_RELATION_FILE)
options.CheckExclusiveFlags(flags, options.EXCLUDE_SCHEMA, options.EXCLUDE_SCHEMA_FILE, options.INCLUDE_SCHEMA, options.INCLUDE_SCHEMA_FILE)
options.CheckExclusiveFlags(flags, options.EXCLUDE_SCHEMA, options.EXCLUDE_SCHEMA_FILE, options.EXCLUDE_RELATION, options.INCLUDE_RELATION, options.EXCLUDE_RELATION_FILE, options.INCLUDE_RELATION_FILE)
options.CheckExclusiveFlags(flags, options.JOBS, options.METADATA_ONLY, options.SINGLE_DATA_FILE)
options.CheckExclusiveFlags(flags, options.METADATA_ONLY, options.LEAF_PARTITION_DATA)
options.CheckExclusiveFlags(flags, options.NO_COMPRESSION, options.COMPRESSION_TYPE)
options.CheckExclusiveFlags(flags, options.NO_COMPRESSION, options.COMPRESSION_LEVEL)
options.CheckExclusiveFlags(flags, options.PLUGIN_CONFIG, options.BACKUP_DIR)
Expand Down Expand Up @@ -192,3 +200,18 @@ func validateFromTimestamp(fromTimestamp string) {
"previous backup.", fromTimestampFPInfo.Timestamp), "")
}
}

// Validate some flag combinations here, since they're harder to check in DoValidation
// before the sections are parsed
func ValidateBackupSections(flags *pflag.FlagSet) {
if options.MustGetFlagBool(flags, options.SINGLE_DATA_FILE) && !BackupSections.Data {
gplog.Fatal(errors.Errorf("Cannot use --single-data-file without data section"), "")
}
if options.MustGetFlagBool(flags, options.LEAF_PARTITION_DATA) && !BackupSections.Data {
gplog.Fatal(errors.Errorf("Cannot use --leaf-partition-data without data section"), "")
}
if options.MustGetFlagBool(flags, options.INCREMENTAL) && (!BackupSections.Data || !BackupSections.Predata || !BackupSections.Postdata) {
gplog.Fatal(errors.Errorf("Cannot use --incremental without predata, data, and postdata sections in a backup"), "")
}

}
15 changes: 7 additions & 8 deletions backup/wrappers.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ func NewBackupConfig(dbName string, dbVersion string, backupVersion string, plug
CompressionType: MustGetFlagString(options.COMPRESSION_TYPE),
DatabaseName: dbName,
DatabaseVersion: dbVersion,
DataOnly: MustGetFlagBool(options.DATA_ONLY),
ExcludeRelations: MustGetFlagStringArray(options.EXCLUDE_RELATION),
ExcludeSchemaFiltered: len(MustGetFlagStringArray(options.EXCLUDE_SCHEMA)) > 0,
ExcludeSchemas: MustGetFlagStringArray(options.EXCLUDE_SCHEMA),
Expand All @@ -129,13 +128,14 @@ func NewBackupConfig(dbName string, dbVersion string, backupVersion string, plug
IncludeTableFiltered: len(opts.GetOriginalIncludedTables()) > 0,
Incremental: MustGetFlagBool(options.INCREMENTAL),
LeafPartitionData: MustGetFlagBool(options.LEAF_PARTITION_DATA),
MetadataOnly: MustGetFlagBool(options.METADATA_ONLY),
Plugin: plugin,
SingleDataFile: MustGetFlagBool(options.SINGLE_DATA_FILE),
Timestamp: timestamp,
WithoutGlobals: MustGetFlagBool(options.WITHOUT_GLOBALS),
WithStatistics: MustGetFlagBool(options.WITH_STATS),
Status: history.BackupStatusInProgress,
// initializeBackupReport is called after options.GetSection is called in Setup, so
// it's safe to assume that BackupSection has been initialized here.
Sections: BackupSections,
DeprecatedMetadata: options.DeprecatedMetadata{},
}

return &backupConfig
Expand All @@ -153,7 +153,7 @@ func initializeBackupReport(opts options.Options) {
isFilteredBackup := config.IncludeTableFiltered || config.IncludeSchemaFiltered ||
config.ExcludeTableFiltered || config.ExcludeSchemaFiltered
dbSize := ""
if !MustGetFlagBool(options.METADATA_ONLY) && !isFilteredBackup {
if BackupSections.Data && !isFilteredBackup {
gplog.Verbose("Getting database size")
//Potentially expensive query
dbSize = GetDBSize(connectionPool)
Expand All @@ -171,10 +171,9 @@ func initializeBackupReport(opts options.Options) {
func createBackupLockFile(timestamp string) {
var err error
var timestampLockFile string
metadataOnly := MustGetFlagBool(options.METADATA_ONLY)
backupDir := MustGetFlagString(options.BACKUP_DIR)
noHistory := MustGetFlagBool(options.NO_HISTORY)
if metadataOnly && noHistory && backupDir != "" {
if !BackupSections.Data && noHistory && backupDir != "" {
err = os.MkdirAll(backupDir, 0777)
gplog.FatalOnError(err)
timestampLockFile = fmt.Sprintf("%s/%s.lck", backupDir, timestamp)
Expand Down Expand Up @@ -742,7 +741,7 @@ func backupRowLevelSecurityPolicies(metadataFile *utils.FileWithByteCount) {
func backupDefaultPrivileges(metadataFile *utils.FileWithByteCount) {
gplog.Verbose("Writing ALTER DEFAULT PRIVILEGES statements to metadata file")
defaultPrivileges := GetDefaultPrivileges(connectionPool)
objectCounts["DEFAULT PRIVILEGES"] = len(defaultPrivileges)
objectCounts[toc.OBJ_DEFAULT_PRIVILEGES] = len(defaultPrivileges)
PrintDefaultPrivilegesStatements(metadataFile, globalTOC, defaultPrivileges)
}

Expand Down
2 changes: 1 addition & 1 deletion ci/gpbackup-generated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline
## using appropriate tool (example: gen_pipeline.py -p gpbackup-release).
## ----------------------------------------------------------------------
## Generated by gen_pipeline.py at: 2023-10-11 14:43:07.990078
## Generated by gen_pipeline.py at: 2023-10-11 15:43:39.665765
## Template file: gpbackup-tpl.yml
## Pipeline Name: gpbackup
## Nightly Trigger: True
Expand Down
2 changes: 1 addition & 1 deletion ci/gpbackup-release-generated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline
## using appropriate tool (example: gen_pipeline.py -p gpbackup-release).
## ----------------------------------------------------------------------
## Generated by gen_pipeline.py at: 2023-10-11 14:43:08.011601
## Generated by gen_pipeline.py at: 2023-10-11 15:43:39.691227
## Template file: gpbackup-tpl.yml
## Pipeline Name: gpbackup-release
## Nightly Trigger: True
Expand Down
Loading

0 comments on commit 6bb94bd

Please sign in to comment.