Skip to content

Commit

Permalink
Move constraints back to postdata for data load performance
Browse files Browse the repository at this point in the history
Originally, constraints were dumped in predata. This caused a restore
issue with tables that used constraints that used the `NOT VALID`
clause. This is because there were scenarios where there was data
grandfathered into tables that violate the constraint that has not yet
been resolved by users, but allowed because of the `NOT VALID` clause.
Because of this issue, and the assumption that nothing can depend on
constraints, constraints were moved from predata to postdata to allow
tables with data that violate `NOT VALID` constraints to restore.
greenplum-db/gpbackup@90b3b54

It was then later discovered that views can depend on constraints. All
constraints, expect ones with `NOT VALID` clause, were moved back into
predata.
greenplum-db/gpbackup@5895076

While this fixes the dependency issues, this is a significant
performance regression because it is much more performant to restore
constraints after all data has been restored. This is because
constraints use internal indexes. If constraints are restored during
predata, data load would slow down considerably because every row insert
would trigger constraint index update.

In order resolve both the performance regression and original issue, we
can move constraint restore back into postdata and resolve the circular
dependency of a predata view depending on a postdata constraint using a
dummy view in predata. Now if a view that depends on a constraint is
detected, a dummy view that satisfies dependency conditions for other
relations gets dumped in predata. The dummy view and will eventually get
replaced by the dump of the full view definition in postdata after
constraints are dumped.

This circular dependency of predata views depending on postdata
constraints does not exist on GPDB5. View dependency on constraints
exist because of the feature to allow incomplete GROUP BY lists that was
introduced in GPDB6 (postgres 9.1). This commit allows the parser to
recognize functional dependency on primary keys. This allows a table's
other columns to be referenced without listing them in GROUP BY, so long
as the primary key column(s) are listed in GROUP BY.
greenplum-db/gpdb@e49ae8d

Co-authored-by: Karen Huddleston <khuddleston@vmware.com>
  • Loading branch information
kyeap-vmware and khuddlefish committed Feb 2, 2024
1 parent 3f36dd9 commit defa05d
Show file tree
Hide file tree
Showing 19 changed files with 429 additions and 94 deletions.
8 changes: 6 additions & 2 deletions backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,16 @@ func backupPredata(metadataFile *utils.FileWithByteCount, tables []Table, tableO

retrieveViews(&objects)
sequences := retrieveAndBackupSequences(metadataFile, relationMetadata)
domainConstraints := retrieveConstraints(&objects, metadataMap)
domainConstraints, nonDomainConstraints, conMetadata := retrieveConstraints(&objects, metadataMap)

backupDependentObjects(metadataFile, tables, protocols, metadataMap, domainConstraints, objects, sequences, funcInfoMap, tableOnly)
viewsDependingOnConstraints := backupDependentObjects(metadataFile, tables, protocols, metadataMap, domainConstraints, objects, sequences, funcInfoMap, tableOnly)

backupConversions(metadataFile)

// These two are actually in postdata, but we print them here to avoid passing information around too much
backupConstraints(metadataFile, nonDomainConstraints, conMetadata)
backupViewsDependingOnConstraints(metadataFile, viewsDependingOnConstraints)

logCompletionMessage("Pre-data metadata metadata backup")
}

Expand Down
35 changes: 33 additions & 2 deletions backup/dependencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,11 @@ func PrintDependentObjectStatements(metadataFile *utils.FileWithByteCount, objTo
case ExternalProtocol:
PrintCreateExternalProtocolStatement(metadataFile, objToc, obj, funcInfoMap, objMetadata)
case View:
PrintCreateViewStatement(metadataFile, objToc, obj, objMetadata)
if obj.NeedsDummy {
PrintCreateDummyViewStatement(metadataFile, objToc, obj, objMetadata)
} else {
PrintCreateViewStatement(metadataFile, objToc, obj, objMetadata)
}
case TextSearchParser:
PrintCreateTextSearchParserStatement(metadataFile, objToc, obj, objMetadata)
case TextSearchConfiguration:
Expand All @@ -441,13 +445,40 @@ func PrintDependentObjectStatements(metadataFile *utils.FileWithByteCount, objTo
case UserMapping:
PrintCreateUserMappingStatement(metadataFile, objToc, obj)
case Constraint:
PrintConstraintStatement(metadataFile, objToc, obj, objMetadata)
// Constraints have been moved to postdata, but we need to include
// them for dependency sorting
continue
case Transform:
PrintCreateTransformStatement(metadataFile, objToc, obj, funcInfoMap, objMetadata)
}
// Remove ACLs from metadataMap for the current object since they have been processed
delete(metadataMap, object.GetUniqueID())
}

// Process ACLs for left over objects in the metadata map
printExtensionFunctionACLs(metadataFile, objToc, metadataMap, funcInfoMap)
}

func MarkViewsDependingOnConstraints(sortableObjs []Sortable, depMap DependencyMap) []View {
var viewsDependingOnConstraints []View
for i, _ := range sortableObjs {
view, ok := sortableObjs[i].(View)
if !ok {
continue
}

relationsViewDependsOn, ok := depMap[view.GetUniqueID()]
if !ok {
continue
}

for relation := range relationsViewDependsOn {
if relation.ClassID == PG_CONSTRAINT_OID {
view.NeedsDummy = true
sortableObjs[i] = view
viewsDependingOnConstraints = append(viewsDependingOnConstraints, view)
}
}
}
return viewsDependingOnConstraints
}
37 changes: 37 additions & 0 deletions backup/dependencies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,41 @@ COMMENT ON PROTOCOL ext_protocol IS 'protocol';
`, default_parallel))
})
})
Describe("MarkViewsDependingOnConstraints", func() {
It("marks views that depend on constraints", func() {
view1 := backup.View{Schema: "public", Name: "view1", Oid: 1}
view2 := backup.View{Schema: "public", Name: "view2", Oid: 2}
view3 := backup.View{Schema: "public", Name: "view3", Oid: 3}
sortableObjs := []backup.Sortable{view1, view2, view3}

depMap[backup.UniqueID{ClassID: backup.PG_CLASS_OID, Oid: 1}] = map[backup.UniqueID]bool{{ClassID: backup.PG_CONSTRAINT_OID, Oid: 4}: true}

resultViews := backup.MarkViewsDependingOnConstraints(sortableObjs, depMap)
Expect(resultViews).To(HaveLen(1))
Expect(resultViews[0].FQN()).To(Equal("public.view1"))
})
It("marks no views if none depend on constraints", func() {
view1 := backup.View{Schema: "public", Name: "view1", Oid: 1}
view2 := backup.View{Schema: "public", Name: "view2", Oid: 2}
view3 := backup.View{Schema: "public", Name: "view3", Oid: 3}
sortableObjs := []backup.Sortable{view1, view2, view3}

resultViews := backup.MarkViewsDependingOnConstraints(sortableObjs, depMap)
Expect(resultViews).To(HaveLen(0))

})
It("does not marks any object that is not a view", func() {
view1 := backup.View{Schema: "public", Name: "view1", Oid: 1}
view2 := backup.View{Schema: "public", Name: "view2", Oid: 2}
view3 := backup.View{Schema: "public", Name: "view3", Oid: 3}
relation1 := backup.Relation{Schema: "public", Name: "relation1", Oid: 4}
sortableObjs := []backup.Sortable{view1, view2, view3, relation1}

depMap[backup.UniqueID{ClassID: backup.PG_CLASS_OID, Oid: 1}] = map[backup.UniqueID]bool{{ClassID: backup.PG_CONSTRAINT_OID, Oid: 4}: true}

resultViews := backup.MarkViewsDependingOnConstraints(sortableObjs, depMap)
Expect(resultViews).To(HaveLen(1))
Expect(resultViews[0].FQN()).To(Equal("public.view1"))
})
})
})
21 changes: 21 additions & 0 deletions backup/postdata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,4 +221,25 @@ EXECUTE %s abort_any_command();`, evTrigExecReplacement), `ALTER EVENT TRIGGER t
testutils.AssertBufferContents(tocfile.PostdataEntries, buffer, `CREATE STATISTICS public.test_stat (dependencies) ON a, b FROM myschema.mytable;`)
})
})
Describe("PrintCreatePostdataViewStatements", func() {
It("prints all postdata views", func() {
view1 := backup.View{
Schema: "schema1",
Name: "view1",
Definition: sql.NullString{String: "SELECT 1", Valid: true},
}
view2 := backup.View{
Schema: "schema1",
Name: "view2",
Definition: sql.NullString{String: "SELECT 2", Valid: true},
Options: " WITH (check_option = cascaded, security_barrier)",
}
backup.PrintCreatePostdataViewStatements(backupfile, tocfile, []backup.View{view1, view2})
testutils.ExpectEntry(tocfile.PostdataEntries, 0, "schema1", "", "view1", toc.OBJ_VIEW)
testutils.ExpectEntry(tocfile.PostdataEntries, 1, "schema1", "", "view2", toc.OBJ_VIEW)
expected1 := "CREATE OR REPLACE VIEW schema1.view1 AS SELECT 1"
expected2 := "CREATE OR REPLACE VIEW schema1.view2 WITH (check_option = cascaded, security_barrier) AS SELECT 2"
testutils.AssertBufferContents(tocfile.PostdataEntries, buffer, expected1, expected2)
})
})
})
43 changes: 43 additions & 0 deletions backup/predata_relations.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,49 @@ func PrintCreateViewStatement(metadataFile *utils.FileWithByteCount, objToc *toc
PrintObjectMetadata(metadataFile, objToc, viewMetadata, view, "", tier)
}

// The CREATE statement here should be kept in sync with the one in
// PrintCreateViewStatement
func PrintCreatePostdataViewStatements(metadataFile *utils.FileWithByteCount, objToc *toc.TOC, views []View) {
for _, view := range views {
start := metadataFile.ByteCount
metadataFile.MustPrintf("\n\nCREATE OR REPLACE VIEW %s%s AS %s\n", view.FQN(), view.Options, view.Definition.String)
section, entry := view.GetMetadataEntry()
section = "postdata"
tier := globalTierMap[view.GetUniqueID()]
objToc.AddMetadataEntry(section, entry, start, metadataFile.ByteCount, tier)
}
}

// A dummy view is used to resolve the circular dependency of the following
// scenario. Views are restored in predata. Views can depend on constraints.
// Constraints are restored in postdata for performance reasons.
// This is directly based off pg_dump's createDummyViewAsClause
func PrintCreateDummyViewStatement(metadataFile *utils.FileWithByteCount, objToc *toc.TOC, view View, viewMetadata ObjectMetadata) {
start := metadataFile.ByteCount

dummyViewClause := "SELECT"
for i, column := range view.ColumnDefs {
if i > 0 {
dummyViewClause += ","
}
dummyViewClause += fmt.Sprintf("\n\tNULL::%s", column.Type)

if column.Collation != "" {
dummyViewClause += fmt.Sprintf(" COLLATE %s", column.Collation)
}

dummyViewClause += fmt.Sprintf(" AS %s", column.Name)
}
dummyViewClause += ";"

metadataFile.MustPrintf("\n\nCREATE VIEW %s AS \n%s\n", view.FQN(), dummyViewClause)

section, entry := view.GetMetadataEntry()
tier := globalTierMap[view.GetUniqueID()]
objToc.AddMetadataEntry(section, entry, start, metadataFile.ByteCount, tier)
PrintObjectMetadata(metadataFile, objToc, viewMetadata, view, "", tier)
}

func ExpandIncludesForPartitions(conn *dbconn.DBConn, opts *options.Options, includeOids []string, flags *pflag.FlagSet) error {
if len(opts.GetIncludedTables()) == 0 {
return nil
Expand Down
63 changes: 63 additions & 0 deletions backup/predata_relations_other_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,4 +663,67 @@ WITH NO DATA
DISTRIBUTED BY (tablename);`)
})
})
Describe("PrintCreateDummyViewStatement", func() {
var emptyMetadata backup.ObjectMetadata
BeforeEach(func() {
emptyMetadata = backup.ObjectMetadata{}
})
It("can print a simple dummy view with 0 columns", func() {
view := backup.View{
Schema: "schema1",
Name: "view1",
ColumnDefs: []backup.ColumnDefinition{},
}
backup.PrintCreateDummyViewStatement(backupfile, tocfile, view, emptyMetadata)
testutils.ExpectEntry(tocfile.PredataEntries, 0, "schema1", "", "view1", toc.OBJ_VIEW)
Expect(string(buffer.Contents())).To(ContainSubstring("CREATE VIEW schema1.view1 AS"))
Expect(string(buffer.Contents())).To(ContainSubstring("SELECT;"))
})
It("can print a simple dummy view with 1 columns", func() {
view := backup.View{
Schema: "schema1",
Name: "view1",
ColumnDefs: []backup.ColumnDefinition{
backup.ColumnDefinition{Type: "integer", Name: "i"},
},
}
backup.PrintCreateDummyViewStatement(backupfile, tocfile, view, emptyMetadata)
testutils.ExpectEntry(tocfile.PredataEntries, 0, "schema1", "", "view1", toc.OBJ_VIEW)
Expect(string(buffer.Contents())).To(ContainSubstring("CREATE VIEW schema1.view1 AS"))
Expect(string(buffer.Contents())).To(ContainSubstring("SELECT"))
Expect(string(buffer.Contents())).To(ContainSubstring("NULL::integer AS i;"))
})
It("can print a simple dummy view with 2 columns", func() {
view := backup.View{
Schema: "schema1",
Name: "view1",
ColumnDefs: []backup.ColumnDefinition{
backup.ColumnDefinition{Type: "integer", Name: "i"},
backup.ColumnDefinition{Type: "integer", Name: "j"},
},
}
backup.PrintCreateDummyViewStatement(backupfile, tocfile, view, emptyMetadata)
testutils.ExpectEntry(tocfile.PredataEntries, 0, "schema1", "", "view1", toc.OBJ_VIEW)
Expect(string(buffer.Contents())).To(ContainSubstring("CREATE VIEW schema1.view1 AS"))
Expect(string(buffer.Contents())).To(ContainSubstring("SELECT"))
Expect(string(buffer.Contents())).To(ContainSubstring("NULL::integer AS i,"))
Expect(string(buffer.Contents())).To(ContainSubstring("NULL::integer AS j;"))
})
It("can print a simple dummy view with a collation", func() {
view := backup.View{
Schema: "schema1",
Name: "view1",
ColumnDefs: []backup.ColumnDefinition{
backup.ColumnDefinition{Type: "integer", Name: "i"},
backup.ColumnDefinition{Type: "integer", Name: "j", Collation: `pg_catalog."C"`},
},
}
backup.PrintCreateDummyViewStatement(backupfile, tocfile, view, emptyMetadata)
testutils.ExpectEntry(tocfile.PredataEntries, 0, "schema1", "", "view1", toc.OBJ_VIEW)
Expect(string(buffer.Contents())).To(ContainSubstring("CREATE VIEW schema1.view1 AS"))
Expect(string(buffer.Contents())).To(ContainSubstring("SELECT"))
Expect(string(buffer.Contents())).To(ContainSubstring("NULL::integer AS i,"))
Expect(string(buffer.Contents())).To(ContainSubstring(`NULL::integer COLLATE pg_catalog."C" AS j;`))
})
})
})
44 changes: 31 additions & 13 deletions backup/predata_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,39 @@ import (
* There's no built-in function to generate constraint definitions like there is for other types of
* metadata, so this function constructs them.
*/
func PrintConstraintStatement(metadataFile *utils.FileWithByteCount, objToc *toc.TOC, constraint Constraint, conMetadata ObjectMetadata) {
alterStr := "\n\nALTER %s %s ADD CONSTRAINT %s %s;\n"
start := metadataFile.ByteCount
// ConIsLocal should always return true from GetConstraints because we filter out constraints that are inherited using the INHERITS clause, or inherited from a parent partition table. This field only accurately reflects constraints in GPDB6+ because check constraints on parent tables must propogate to children. For GPDB versions 5 or lower, this field will default to false.
objStr := "TABLE ONLY"
if constraint.IsPartitionParent || (constraint.ConType == "c" && constraint.ConIsLocal) {
// this is not strictly an object type but it shares use with them so we use the const here
objStr = toc.OBJ_TABLE
func PrintConstraintStatements(metadataFile *utils.FileWithByteCount, objToc *toc.TOC, constraints []Constraint, conMetadata MetadataMap) {
allConstraints := make([]Constraint, 0)
allFkConstraints := make([]Constraint, 0)
/*
* Because FOREIGN KEY constraints must be backed up after PRIMARY KEY
* constraints, we separate the two types then concatenate the lists,
* so FOREIGN KEY are guaranteed to be printed last.
*/
for _, constraint := range constraints {
if constraint.ConType == "f" {
allFkConstraints = append(allFkConstraints, constraint)
} else {
allConstraints = append(allConstraints, constraint)
}
}
metadataFile.MustPrintf(alterStr, objStr, constraint.OwningObject, constraint.Name, constraint.Def.String)
constraints = append(allConstraints, allFkConstraints...)

alterStr := "\n\nALTER %s %s ADD CONSTRAINT %s %s;\n"
for _, constraint := range constraints {
start := metadataFile.ByteCount

section, entry := constraint.GetMetadataEntry()
tier := globalTierMap[constraint.GetUniqueID()]
objToc.AddMetadataEntry(section, entry, start, metadataFile.ByteCount, tier)
PrintObjectMetadata(metadataFile, objToc, conMetadata, constraint, constraint.OwningObject, tier)
// ConIsLocal should always return true from GetConstraints because we filter out constraints that are inherited using the INHERITS clause, or inherited from a parent partition table. This field only accurately reflects constraints in GPDB6+ because check constraints on parent tables must propogate to children. For GPDB versions 5 or lower, this field will default to false.
objStr := "TABLE ONLY"
if constraint.IsPartitionParent || (constraint.ConType == "c" && constraint.ConIsLocal) {
objStr = toc.OBJ_TABLE
}
metadataFile.MustPrintf(alterStr, objStr, constraint.OwningObject, constraint.Name, constraint.Def.String)

section, entry := constraint.GetMetadataEntry()
tier := globalTierMap[constraint.GetUniqueID()]
objToc.AddMetadataEntry(section, entry, start, metadataFile.ByteCount, tier)
PrintObjectMetadata(metadataFile, objToc, conMetadata[constraint.GetUniqueID()], constraint, constraint.OwningObject, tier)
}
}

func PrintCreateSchemaStatements(metadataFile *utils.FileWithByteCount, objToc *toc.TOC, schemas []Schema, schemaMetadata MetadataMap) {
Expand Down
Loading

0 comments on commit defa05d

Please sign in to comment.