From 1e4b5a61d475dc5beac1592f93fff9614e05d6a1 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Mon, 27 Feb 2023 14:20:14 +0100 Subject: [PATCH] Add check for facts with inconsistent fact/dimension time ranges. --- check_command.go | 31 +++- pkg/check/inconsistent.go | 52 +++++++ pkg/check/inconsistent_test.go | 138 ++++++++++++++++++ ...force_date_times_timestamp_consistency.sql | 8 + pkg/db/types.go | 75 ++++++++++ pkg/db/util.go | 3 + pkg/tenantmapping/mapping_test.go | 16 +- 7 files changed, 306 insertions(+), 17 deletions(-) create mode 100644 pkg/check/inconsistent.go create mode 100644 pkg/check/inconsistent_test.go create mode 100644 pkg/db/migrations/0014_enforce_date_times_timestamp_consistency.sql diff --git a/check_command.go b/check_command.go index 40dbe1a..5289c87 100644 --- a/check_command.go +++ b/check_command.go @@ -56,17 +56,32 @@ func (cmd *checkMissingCommand) execute(cliCtx *cli.Context) error { if err != nil { return err } + inconsistent, err := check.Inconsistent(ctx, tx) + if err != nil { + return err + } + + if len(missing) != 0 { + w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + defer w.Flush() + fmt.Fprint(w, "Table\tMissing Field\tID\tSource\n") + for _, m := range missing { + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Table, m.MissingField, m.ID, m.Source) + } + } - if len(missing) == 0 { - return nil + if len(inconsistent) != 0 { + w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + defer w.Flush() + fmt.Fprint(w, "Table\tDimension ID\tFact Time\tDimension Time Range\n") + for _, i := range inconsistent { + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", i.Table, i.DimensionID, i.FactTime, i.DimensionRange) + } } - w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - defer w.Flush() - fmt.Fprint(w, "Table\tMissing Field\tID\tSource\n") - for _, m := range missing { - fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Table, m.MissingField, m.ID, m.Source) + if len(missing) == 0 && len(inconsistent) == 0 { + return cli.Exit("No missing or inconsistent data found.", 0) } - return cli.Exit(fmt.Sprintf("%d missing entries found.", len(missing)), 1) + return cli.Exit(fmt.Sprintf("%d missing, %d inconsistent entries found.", len(missing), len(inconsistent)), 1) } diff --git a/pkg/check/inconsistent.go b/pkg/check/inconsistent.go new file mode 100644 index 0000000..1a98a56 --- /dev/null +++ b/pkg/check/inconsistent.go @@ -0,0 +1,52 @@ +package check + +import ( + "context" + "fmt" + "strings" + + "github.com/jmoiron/sqlx" + "go.uber.org/multierr" + + "github.com/appuio/appuio-cloud-reporting/pkg/db" +) + +var dimensionsWithTimeranges = []db.Model{ + db.Discount{}, + db.Product{}, + db.Query{}, + db.Tenant{}, +} + +const inconsistentFactsQuery = ` +select distinct '{{table}}' as "table", {{table}}.id as DimensionID, date_times.timestamp::text as FactTime, {{table}}.during::text as DimensionRange from facts + inner join {{table}} on facts.{{foreign_key}} = {{table}}.id + inner join date_times on facts.date_time_id = date_times.id + where false = {{table}}.during @> date_times.timestamp +` + +// InconsistentField represents an inconsistent field. +type InconsistentField struct { + Table string + + DimensionID string + + FactTime string + DimensionRange string +} + +// Inconsistent checks for facts with inconsistent time ranges. +// Those are facts that reference a dimension with a time range that does not include the fact's timestamp. +func Inconsistent(ctx context.Context, tx sqlx.QueryerContext) ([]InconsistentField, error) { + var inconsistent []InconsistentField + var errors []error + for _, m := range dimensionsWithTimeranges { + var ic []InconsistentField + q := strings.NewReplacer("{{table}}", m.TableName(), "{{foreign_key}}", m.ForeignKeyName()).Replace(inconsistentFactsQuery) + err := sqlx.SelectContext(ctx, tx, &ic, fmt.Sprintf(`WITH inconsistent AS (%s) SELECT * FROM inconsistent ORDER BY "table",FactTime`, q)) + errors = append(errors, err) + inconsistent = append(inconsistent, ic...) + } + + return inconsistent, multierr.Combine(errors...) +} diff --git a/pkg/check/inconsistent_test.go b/pkg/check/inconsistent_test.go new file mode 100644 index 0000000..b8c7198 --- /dev/null +++ b/pkg/check/inconsistent_test.go @@ -0,0 +1,138 @@ +package check_test + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/jackc/pgtype" + "github.com/jmoiron/sqlx" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/appuio/appuio-cloud-reporting/pkg/check" + "github.com/appuio/appuio-cloud-reporting/pkg/db" + "github.com/appuio/appuio-cloud-reporting/pkg/db/dbtest" +) + +type InconsistentTestSuite struct { + dbtest.Suite +} + +func (s *InconsistentTestSuite) TestInconsistentFields() { + t := s.T() + tx := s.Begin() + defer tx.Rollback() + require.NoError(t, func() error { _, err := tx.Exec("set timezone to 'UTC';"); return err }()) + + m, err := check.Inconsistent(context.Background(), tx) + require.NoError(t, err) + require.Len(t, m, 0) + + expectedInconsistent := s.requireInconsistentTestEntries(t, tx) + + m, err = check.Inconsistent(context.Background(), tx) + require.NoError(t, err) + require.Equal(t, expectedInconsistent, m) +} + +func (s *InconsistentTestSuite) requireInconsistentTestEntries(t *testing.T, tdb *sqlx.Tx) []check.InconsistentField { + var category db.Category + require.NoError(t, + db.GetNamed(tdb, &category, + "INSERT INTO categories (source,target) VALUES (:source,:target) RETURNING *", db.Category{ + Source: "af-south-1:uroboros-research", + })) + + at := time.Date(2023, time.January, 2, 3, 0, 0, 0, time.UTC) + var dateTime db.DateTime + require.NoError(t, + db.GetNamed(tdb, &dateTime, + "INSERT INTO date_times (timestamp, year, month, day, hour) VALUES (:timestamp, :year, :month, :day, :hour) RETURNING *", + db.BuildDateTime(at), + )) + + discountOutsideRange, err := db.CreateDiscount(tdb, db.Discount{ + Source: "test_memory:us-rac-2", + During: rangeOutsideDateTimes(), + }) + require.NoError(t, err) + + var tenantOutsideRange db.Tenant + require.NoError(t, + db.GetNamed(tdb, &tenantOutsideRange, + "INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{ + Source: "tricell", + During: rangeOutsideDateTimes(), + })) + + var tenantInsideRange db.Tenant + require.NoError(t, + db.GetNamed(tdb, &tenantInsideRange, + "INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{ + Source: "tricell", + During: db.Timerange(db.MustTimestamp(at), db.MustTimestamp(at.Add(time.Hour))), + })) + + var productOutsideRange db.Product + require.NoError(t, + db.GetNamed(tdb, &productOutsideRange, + "INSERT INTO products (source,target,amount,unit,during) VALUES (:source,:target,:amount,:unit,:during) RETURNING *", db.Product{ + Source: "test_memory:us-rac-2", + During: rangeOutsideDateTimes(), + })) + + var queryOutsideRange db.Query + require.NoError(t, + db.GetNamed(tdb, &queryOutsideRange, + "INSERT INTO queries (name,query,unit,during) VALUES (:name,:query,:unit,:during) RETURNING *", db.Query{ + Name: "test_memory", + Query: "test_memory", + Unit: "GiB", + During: rangeOutsideDateTimes(), + })) + + testFact := db.Fact{ + DateTimeId: dateTime.Id, + QueryId: queryOutsideRange.Id, + TenantId: tenantOutsideRange.Id, + CategoryId: category.Id, + ProductId: productOutsideRange.Id, + DiscountId: discountOutsideRange.Id, + Quantity: 1, + } + createFact(t, tdb, testFact) + testFact.TenantId = tenantInsideRange.Id + createFact(t, tdb, testFact) + + formattedAt := at.Format(db.PGTimestampFormat) + formattedRange := fmt.Sprintf("[\"%s\",\"%s\")", + rangeOutsideDateTimes().Lower.Time.Format(db.PGTimestampFormat), + rangeOutsideDateTimes().Upper.Time.Format(db.PGTimestampFormat), + ) + return []check.InconsistentField{ + {Table: "discounts", DimensionID: discountOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, + {Table: "products", DimensionID: productOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, + {Table: "queries", DimensionID: queryOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, + {Table: "tenants", DimensionID: tenantOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, + } +} + +func rangeOutsideDateTimes() pgtype.Tstzrange { + return db.Timerange( + db.MustTimestamp(time.Date(2023, time.January, 2, 10, 0, 0, 0, time.UTC)), + db.MustTimestamp(time.Date(2023, time.January, 2, 11, 0, 0, 0, time.UTC)), + ) +} + +func createFact(t *testing.T, tx *sqlx.Tx, fact db.Fact) (rf db.Fact) { + require.NoError(t, + db.GetNamed(tx, &rf, + "INSERT INTO facts (date_time_id,query_id,tenant_id,category_id,product_id,discount_id,quantity) VALUES (:date_time_id,:query_id,:tenant_id,:category_id,:product_id,:discount_id,:quantity) RETURNING *", fact)) + return +} + +func TestInconsistentTestSuite(t *testing.T) { + suite.Run(t, new(InconsistentTestSuite)) +} diff --git a/pkg/db/migrations/0014_enforce_date_times_timestamp_consistency.sql b/pkg/db/migrations/0014_enforce_date_times_timestamp_consistency.sql new file mode 100644 index 0000000..d61bac3 --- /dev/null +++ b/pkg/db/migrations/0014_enforce_date_times_timestamp_consistency.sql @@ -0,0 +1,8 @@ +-- Timestamp duplicates the (year, month, day, hour) fields, but is more convenient to use. +-- I'd delete the fields but that would be a pretty breaking change. +-- So we just enforce consistency between the two fields. + +ALTER TABLE date_times + ADD CONSTRAINT date_times_timestamp_check_consistency CHECK ( + (date_times.year || '-' || date_times.month || '-' || date_times.day || ' ' || date_times.hour || ':00:00+00')::timestamptz = date_times.timestamp + ); diff --git a/pkg/db/types.go b/pkg/db/types.go index 9dea422..7eaca1d 100644 --- a/pkg/db/types.go +++ b/pkg/db/types.go @@ -8,6 +8,13 @@ import ( "github.com/jackc/pgtype" ) +type Model interface { + TableName() string + ForeignKeyName() string +} + +var _ Model = Query{} + type Query struct { Id string ParentID sql.NullString `db:"parent_id"` @@ -22,6 +29,14 @@ type Query struct { subQueries []Query } +func (q Query) TableName() string { + return "queries" +} + +func (q Query) ForeignKeyName() string { + return "query_id" +} + // CreateQuery creates the given query func CreateQuery(p NamedPreparer, in Query) (Query, error) { var query Query @@ -30,6 +45,8 @@ func CreateQuery(p NamedPreparer, in Query) (Query, error) { return query, err } +var _ Model = Tenant{} + type Tenant struct { Id string @@ -40,6 +57,16 @@ type Tenant struct { During pgtype.Tstzrange } +func (t Tenant) TableName() string { + return "tenants" +} + +func (t Tenant) ForeignKeyName() string { + return "tenant_id" +} + +var _ Model = Category{} + type Category struct { Id string @@ -48,6 +75,16 @@ type Category struct { Target sql.NullString } +func (c Category) TableName() string { + return "categories" +} + +func (c Category) ForeignKeyName() string { + return "category_id" +} + +var _ Model = Product{} + type Product struct { Id string @@ -61,6 +98,14 @@ type Product struct { During pgtype.Tstzrange } +func (p Product) TableName() string { + return "products" +} + +func (p Product) ForeignKeyName() string { + return "product_id" +} + // CreateProduct creates the given product func CreateProduct(p NamedPreparer, in Product) (Product, error) { var product Product @@ -69,6 +114,8 @@ func CreateProduct(p NamedPreparer, in Product) (Product, error) { return product, err } +var _ Model = Discount{} + type Discount struct { Id string @@ -80,6 +127,14 @@ type Discount struct { During pgtype.Tstzrange } +func (d Discount) TableName() string { + return "discounts" +} + +func (d Discount) ForeignKeyName() string { + return "discount_id" +} + // CreateDiscount creates the given discount func CreateDiscount(p NamedPreparer, in Discount) (Discount, error) { var discount Discount @@ -88,6 +143,8 @@ func CreateDiscount(p NamedPreparer, in Discount) (Discount, error) { return discount, err } +var _ Model = DateTime{} + type DateTime struct { Id string @@ -99,6 +156,16 @@ type DateTime struct { Hour int } +func (d DateTime) TableName() string { + return "date_times" +} + +func (d DateTime) ForeignKeyName() string { + return "date_time_id" +} + +var _ Model = Fact{} + type Fact struct { Id string @@ -112,6 +179,14 @@ type Fact struct { Quantity float64 } +func (f Fact) TableName() string { + return "facts" +} + +func (f Fact) ForeignKeyName() string { + return "fact_id" +} + // BuildDateTime builds a DateTime object from the given timestamp. func BuildDateTime(ts time.Time) DateTime { return DateTime{ diff --git a/pkg/db/util.go b/pkg/db/util.go index 6071ad5..ba07c45 100644 --- a/pkg/db/util.go +++ b/pkg/db/util.go @@ -8,6 +8,9 @@ import ( "github.com/jmoiron/sqlx" ) +// PGTimestampFormat is the string representation of PostgreSQL's `timestamptz`. +const PGTimestampFormat = "2006-01-02 15:04:05-07" + // NamedPreparer is an interface used by GetNamed. type NamedPreparer interface { PrepareNamed(query string) (*sqlx.NamedStmt, error) diff --git a/pkg/tenantmapping/mapping_test.go b/pkg/tenantmapping/mapping_test.go index 7b81176..df9cfc2 100644 --- a/pkg/tenantmapping/mapping_test.go +++ b/pkg/tenantmapping/mapping_test.go @@ -19,8 +19,6 @@ import ( apiv1 "github.com/prometheus/client_golang/api/prometheus/v1" ) -const pgTimestampFormat = "2006-01-02 15:04:05-07" - type MappingSuite struct { testsuite.Suite } @@ -74,17 +72,17 @@ func (s *MappingSuite) TestReport_RunReportRemapsExistingTenants() { { Source: "bar-org", Target: "666", - During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(db.PGTimestampFormat)), }, { Source: "foo-org", Target: "555", - During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(db.PGTimestampFormat)), }, { Source: "foo-org", Target: "be-other", - During: fmt.Sprintf("[-infinity,\"%s\")", ts.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[-infinity,\"%s\")", ts.In(time.UTC).Format(db.PGTimestampFormat)), }, } var tenants []comparableTenant @@ -142,22 +140,22 @@ func (s *MappingSuite) TestReport_RunReport_NewUpperBoundInfinityOrUntilNextRang { Source: "bar-org", Target: "666", - During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[\"%s\",infinity)", ts.In(time.UTC).Format(db.PGTimestampFormat)), }, { Source: "bar-org", Target: "be-other", - During: fmt.Sprintf("[-infinity,\"%s\")", pastTS.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[-infinity,\"%s\")", pastTS.In(time.UTC).Format(db.PGTimestampFormat)), }, { Source: "foo-org", Target: "555", - During: fmt.Sprintf("[\"%s\",\"%s\")", ts.In(time.UTC).Format(pgTimestampFormat), futureTS.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[\"%s\",\"%s\")", ts.In(time.UTC).Format(db.PGTimestampFormat), futureTS.In(time.UTC).Format(db.PGTimestampFormat)), }, { Source: "foo-org", Target: "be-other", - During: fmt.Sprintf("[\"%s\",infinity)", futureTS.In(time.UTC).Format(pgTimestampFormat)), + During: fmt.Sprintf("[\"%s\",infinity)", futureTS.In(time.UTC).Format(db.PGTimestampFormat)), }, } var tenants []comparableTenant