diff --git a/pkg/bench/tpcc/BUILD.bazel b/pkg/bench/tpcc/BUILD.bazel index d432f6dd6695..1609330ada65 100644 --- a/pkg/bench/tpcc/BUILD.bazel +++ b/pkg/bench/tpcc/BUILD.bazel @@ -21,6 +21,7 @@ go_test( embed = [":tpcc"], deps = [ "//pkg/base", + "//pkg/kv/kvserver/logstore", "//pkg/security/securityassets", "//pkg/security/securitytest", "//pkg/server", diff --git a/pkg/bench/tpcc/subprocess_commands_test.go b/pkg/bench/tpcc/subprocess_commands_test.go index 47629319737d..660a03e9043b 100644 --- a/pkg/bench/tpcc/subprocess_commands_test.go +++ b/pkg/bench/tpcc/subprocess_commands_test.go @@ -19,8 +19,9 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" - "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/workload" @@ -61,17 +62,16 @@ var ( require.True(t, ok) defer log.Scope(t).Close(t) - tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ - ServerArgs: base.TestServerArgs{ - StoreSpecs: []base.StoreSpec{{Path: storeDir}}, - }, + srv, db, _ := serverutils.StartServer(t, base.TestServerArgs{ + StoreSpecs: []base.StoreSpec{{Path: storeDir}}, }) - defer tc.Stopper().Stop(ctx) + defer srv.Stopper().Stop(ctx) + + // Make the generation faster. + logstore.DisableSyncRaftLog.Override(context.Background(), &srv.SystemLayer().ClusterSettings().SV, true) - db := tc.ServerConn(0) tdb := sqlutils.MakeSQLRunner(db) tdb.Exec(t, "CREATE DATABASE "+databaseName) - tdb.Exec(t, "SET CLUSTER SETTING kv.raft_log.synchronization.disabled = true") tdb.Exec(t, "USE "+databaseName) tpcc, err := workload.Get("tpcc") require.NoError(t, err) diff --git a/pkg/bench/tpcc/tpcc_bench_options_test.go b/pkg/bench/tpcc/tpcc_bench_options_test.go index 684ab712f59d..7dc5687b6c68 100644 --- a/pkg/bench/tpcc/tpcc_bench_options_test.go +++ b/pkg/bench/tpcc/tpcc_bench_options_test.go @@ -16,6 +16,7 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" ) type option interface { @@ -45,6 +46,7 @@ func (o options) apply(cfg *benchmarkConfig) { type benchmarkConfig struct { workloadFlags []string argsGenerator serverArgs + setupServer []func(b testing.TB, s serverutils.TestServerInterface) setupStmts []string } @@ -80,4 +82,20 @@ func setupStmt(stmt string) option { return setupStmtOption(stmt) } +var _ = setupStmt // silence unused linter + func (s setupStmtOption) String() string { return string(s) } + +func setupServer(fn func(tb testing.TB, s serverutils.TestServerInterface)) option { + return setupServerOption{fn} +} + +type setupServerOption struct { + fn func(tb testing.TB, s serverutils.TestServerInterface) +} + +func (s setupServerOption) apply(cfg *benchmarkConfig) { + cfg.setupServer = append(cfg.setupServer, s.fn) +} + +func (s setupServerOption) String() string { return "setup server" } diff --git a/pkg/bench/tpcc/tpcc_bench_test.go b/pkg/bench/tpcc/tpcc_bench_test.go index 0247704f030f..a17a82577e06 100644 --- a/pkg/bench/tpcc/tpcc_bench_test.go +++ b/pkg/bench/tpcc/tpcc_bench_test.go @@ -25,6 +25,7 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" @@ -66,8 +67,9 @@ func BenchmarkTPCC(b *testing.B) { StoreSpecs: []base.StoreSpec{{Path: td}}, }, cleanup }), - setupStmt(` -SET CLUSTER SETTING kv.raft_log.synchronization.disabled = true`), + setupServer(func(tb testing.TB, s serverutils.TestServerInterface) { + logstore.DisableSyncRaftLog.Override(context.Background(), &s.SystemLayer().ClusterSettings().SV, true) + }), } for _, opts := range []options{ @@ -142,6 +144,9 @@ func (bm *benchmark) startCockroach(b testing.TB) { s.Stopper().Stop(context.Background()) }) + for _, fn := range bm.setupServer { + fn(b, s) + } for _, stmt := range bm.setupStmts { sqlutils.MakeSQLRunner(db).Exec(b, stmt) } diff --git a/pkg/cmd/allocsim/main.go b/pkg/cmd/allocsim/main.go index 9882fcfa70b8..76a5e167959e 100644 --- a/pkg/cmd/allocsim/main.go +++ b/pkg/cmd/allocsim/main.go @@ -538,13 +538,15 @@ func main() { os.Exit(exitStatus) }() + for i := 0; i < c.Cfg.NumNodes; i++ { + cfg := c.Cfg.PerNodeCfg[i] + cfg.ExtraEnv = append(cfg.ExtraEnv, "COCKROACH_DISABLE_RAFT_LOG_SYNCHRONIZATION_UNSAFE=true") + c.Cfg.PerNodeCfg[i] = cfg + } + c.Start(context.Background()) defer c.Close() c.UpdateZoneConfig(1, 1<<20) - _, err := c.Nodes[0].DB().Exec("SET CLUSTER SETTING kv.raft_log.synchronization.disabled = true") - if err != nil { - log.Fatalf(context.Background(), "%v", err) - } if len(config.Localities) != 0 { a.runWithConfig(config) } else { diff --git a/pkg/cmd/roachprod-microbench/executor.go b/pkg/cmd/roachprod-microbench/executor.go index 952326bbc7f2..c7abcf5fc487 100644 --- a/pkg/cmd/roachprod-microbench/executor.go +++ b/pkg/cmd/roachprod-microbench/executor.go @@ -439,11 +439,7 @@ func (e *executor) executeBenchmarks() error { e.log.Printf("Completed benchmarks, results located at %s", e.outputDir) if errorCount != 0 { - if e.lenient { - e.log.Printf("Ignoring errors in benchmark results (lenient flag was set)") - } else { - return errors.Newf("Found %d errors during remote execution", errorCount) - } + return errors.Newf("Found %d errors during remote execution", errorCount) } return nil } diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go index 7f15d56ce4c5..5fd06bfeb6d2 100644 --- a/pkg/cmd/roachprod-microbench/main.go +++ b/pkg/cmd/roachprod-microbench/main.go @@ -92,7 +92,7 @@ func makeRunCommand() *cobra.Command { cmd.Flags().StringSliceVar(&config.excludeList, "exclude", []string{}, "comma-separated regex of packages and benchmarks to exclude e.g. 'pkg/util/.*:BenchmarkIntPool,pkg/sql:.*'") cmd.Flags().IntVar(&config.iterations, "iterations", config.iterations, "number of iterations to run each benchmark") cmd.Flags().BoolVar(&config.copyBinaries, "copy", config.copyBinaries, "copy and extract test binaries and libraries to the target cluster") - cmd.Flags().BoolVar(&config.lenient, "lenient", config.lenient, "tolerate errors in the benchmark results") + cmd.Flags().BoolVar(&config.lenient, "lenient", config.lenient, "tolerate errors while running benchmarks") cmd.Flags().BoolVar(&config.affinity, "affinity", config.affinity, "run benchmarks with iterations and binaries having affinity to the same node, only applies when more than one archive is specified") cmd.Flags().BoolVar(&config.quiet, "quiet", config.quiet, "suppress roachprod progress output") diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index 90c4ac8e190b..54a42c936bdc 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -3718,6 +3718,8 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + skip.UnderMetamorphicWithIssue(t, 111624) + // We will be testing the SSTs written on store2's engine. var receivingEng, sendingEng storage.Engine // All of these variables will be populated later, after starting the cluster.. diff --git a/pkg/kv/kvserver/logstore/logstore.go b/pkg/kv/kvserver/logstore/logstore.go index d3ca200feb0c..a831a13bf6e5 100644 --- a/pkg/kv/kvserver/logstore/logstore.go +++ b/pkg/kv/kvserver/logstore/logstore.go @@ -38,7 +38,8 @@ import ( "go.etcd.io/raft/v3/raftpb" ) -var disableSyncRaftLog = settings.RegisterBoolSetting( +// DisableSyncRaftLog disables raft log synchronization and can cause data loss. +var DisableSyncRaftLog = settings.RegisterBoolSetting( settings.SystemOnly, "kv.raft_log.disable_synchronization_unsafe", "disables synchronization of Raft log writes to persistent storage. "+ @@ -46,7 +47,8 @@ var disableSyncRaftLog = settings.RegisterBoolSetting( "This not only disables fsync, but also disables flushing writes to the OS buffer. "+ "The setting is meant for internal testing only and SHOULD NOT be used in production.", envutil.EnvOrDefaultBool("COCKROACH_DISABLE_RAFT_LOG_SYNCHRONIZATION_UNSAFE", false), - settings.WithName("kv.raft_log.synchronization.disabled"), + settings.WithName("kv.raft_log.synchronization.unsafe.disabled"), + settings.WithUnsafe, ) var enableNonBlockingRaftLogSync = settings.RegisterBoolSetting( @@ -232,7 +234,7 @@ func (s *LogStore) storeEntriesAndCommitBatch( stats.PebbleBegin = timeutil.Now() stats.PebbleBytes = int64(batch.Len()) wantsSync := len(m.Responses) > 0 - willSync := wantsSync && !disableSyncRaftLog.Get(&s.Settings.SV) + willSync := wantsSync && !DisableSyncRaftLog.Get(&s.Settings.SV) // Use the non-blocking log sync path if we are performing a log sync ... nonBlockingSync := willSync && // and the cluster setting is enabled ... diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index 01b066c3fddc..83897cdf7cd6 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -140,6 +140,7 @@ go_library( "//pkg/kv/kvserver/kvstorage", "//pkg/kv/kvserver/liveness", "//pkg/kv/kvserver/liveness/livenesspb", + "//pkg/kv/kvserver/logstore", "//pkg/kv/kvserver/loqrecovery", "//pkg/kv/kvserver/loqrecovery/loqrecoverypb", "//pkg/kv/kvserver/protectedts", diff --git a/pkg/server/import_ts.go b/pkg/server/import_ts.go index 37c55151096e..e35432a67ddb 100644 --- a/pkg/server/import_ts.go +++ b/pkg/server/import_ts.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" @@ -73,9 +74,11 @@ func maybeImportTS(ctx context.Context, s *topLevelServer) (returnErr error) { // is not in use in the data set to import. s.node.suppressNodeStatus.Set(true) + // Disable raft log synchronization to make the server generally faster. + logstore.DisableSyncRaftLog.Override(ctx, &s.cfg.Settings.SV, true) + // Disable writing of new timeseries, as well as roll-ups and deletion. for _, stmt := range []string{ - "SET CLUSTER SETTING kv.raft_log.synchronization.disabled = 'true';", "SET CLUSTER SETTING timeseries.storage.enabled = 'false';", "SET CLUSTER SETTING timeseries.storage.resolution_10s.ttl = '99999h';", "SET CLUSTER SETTING timeseries.storage.resolution_30m.ttl = '99999h';", diff --git a/pkg/settings/common.go b/pkg/settings/common.go index 3a040da53e29..0328c0334bd2 100644 --- a/pkg/settings/common.go +++ b/pkg/settings/common.go @@ -24,6 +24,7 @@ type common struct { name SettingName description string visibility Visibility + unsafe bool slot slotIdx nonReportable bool retired bool @@ -119,6 +120,17 @@ func (c *common) setName(name SettingName) { c.name = name } +// setUnsafe is used to override the unsafe status of the setting. +// Refer to the WithUnsafe option for details. +func (c *common) setUnsafe() { + c.unsafe = true +} + +// IsUnsafe indicates whether the setting is unsafe. +func (c *common) IsUnsafe() bool { + return c.unsafe +} + // SetOnChange installs a callback to be called when a setting's value changes. // `fn` should avoid doing long-running or blocking work as it is called on the // goroutine which handles all settings updates. diff --git a/pkg/settings/lint/lint_test.go b/pkg/settings/lint/lint_test.go index 7ecc23265254..5d457e4c8111 100644 --- a/pkg/settings/lint/lint_test.go +++ b/pkg/settings/lint/lint_test.go @@ -170,6 +170,13 @@ func TestLintClusterSettingNames(t *testing.T) { } } + if strings.Contains(settingName, "unsafe") && !setting.IsUnsafe() { + return errors.Errorf("%s: setting name contains \"unsafe\" but is not marked unsafe (hint: use option settings.WithUnsafe)", settingName) + } + if setting.IsUnsafe() && !strings.Contains(settingName, "unsafe") { + return errors.Errorf("%s: setting marked as unsafe but its name does not contain \"unsafe\"", settingName) + } + return nil }() if nameErr != nil { @@ -221,3 +228,24 @@ func TestLintClusterSettingDescriptions(t *testing.T) { } } } + +func TestLintClusterVisibility(t *testing.T) { + defer leaktest.AfterTest(t)() + + skip.UnderRace(t, "lint only test") + skip.UnderDeadlock(t, "lint only test") + skip.UnderStress(t, "lint only test") + + keys := settings.Keys(true /* forSystemTenant */) + for _, settingKey := range keys { + setting, ok := settings.LookupForLocalAccessByKey(settingKey, true /* forSystemTenant */) + if !ok { + t.Errorf("registry bug: setting with key %q not found", settingKey) + continue + } + + if setting.IsUnsafe() && setting.Visibility() == settings.Public { + t.Errorf("%s: unsafe settings must not be public", setting.Name()) + } + } +} diff --git a/pkg/settings/masked.go b/pkg/settings/masked.go index 850063be3024..7984079e6ded 100644 --- a/pkg/settings/masked.go +++ b/pkg/settings/masked.go @@ -58,6 +58,11 @@ func (s *maskedSetting) Class() Class { return s.setting.Class() } +// IsUnsafe returns whether the underlying setting is unsafe. +func (s *maskedSetting) IsUnsafe() bool { + return s.setting.IsUnsafe() +} + // TestingIsReportable is used in testing for reportability. func TestingIsReportable(s Setting) bool { if _, ok := s.(*maskedSetting); ok { diff --git a/pkg/settings/options.go b/pkg/settings/options.go index 81ae40ecae5d..824b17aec082 100644 --- a/pkg/settings/options.go +++ b/pkg/settings/options.go @@ -59,6 +59,12 @@ func WithVisibility(v Visibility) SettingOption { }} } +// WithUnsafe indicates that the setting is unsafe. +// Unsafe settings need an interlock to be updated. They also cannot be public. +var WithUnsafe SettingOption = SettingOption{commonOpt: func(c *common) { + c.setUnsafe() +}} + // WithPublic sets public visibility. var WithPublic SettingOption = WithVisibility(Public) diff --git a/pkg/settings/setting.go b/pkg/settings/setting.go index 21461b8a0ecd..22434fa319b8 100644 --- a/pkg/settings/setting.go +++ b/pkg/settings/setting.go @@ -77,6 +77,10 @@ type Setting interface { // settings are still accessible to users, but they don't get listed out when // retrieving all settings. Visibility() Visibility + + // IsUnsafe returns whether the setting is unsafe, and thus requires + // a special interlock to set. + IsUnsafe() bool } // NonMaskedSetting is the exported interface of non-masked settings. A diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 36604268684c..4e59903fc62a 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3649,6 +3649,10 @@ func (m *sessionDataMutator) SetSharedLockingForSerializable(val bool) { m.data.SharedLockingForSerializable = val } +func (m *sessionDataMutator) SetUnsafeSettingInterlockKey(val string) { + m.data.UnsafeSettingInterlockKey = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index b99b3463d6fb..a472edce3be1 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -449,6 +449,10 @@ message LocalOnlySessionData { // forms of DDL inside explicit txns). bool strict_ddl_atomicity = 111 [(gogoproto.customname) = "StrictDDLAtomicity"]; + // UnsafeSettingInterlockKey needs to be set to a special string + // before SET CLUSTER SETTING is allowed on an unsafe setting. + string unsafe_setting_interlock_key = 113; + /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // // be propagated to the remote nodes. If so, that parameter should live // diff --git a/pkg/sql/set_cluster_setting.go b/pkg/sql/set_cluster_setting.go index 62973e29a7f3..b85ec7a642bb 100644 --- a/pkg/sql/set_cluster_setting.go +++ b/pkg/sql/set_cluster_setting.go @@ -14,6 +14,7 @@ import ( "bytes" "context" "encoding/base64" + "hash/fnv" "strconv" "strings" "time" @@ -30,6 +31,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" + "github.com/cockroachdb/cockroach/pkg/sql/clusterunique" "github.com/cockroachdb/cockroach/pkg/sql/colexec" "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/paramparse" @@ -261,6 +263,7 @@ func (n *setClusterSettingNode) startExec(params runParams) error { params.extendedEvalCtx.Codec.ForSystemTenant(), params.p.logEvent, params.p.descCollection.ReleaseLeases, + params.p.makeUnsafeSettingInterlockInfo(), ) if err != nil { return err @@ -356,6 +359,7 @@ func writeSettingInternal( forSystemTenant bool, logFn func(context.Context, descpb.ID, logpb.EventPayload) error, releaseLeases func(context.Context), + interlockInfo unsafeSettingInterlockInfo, ) (expectedEncodedValue string, err error) { if err := func() error { var reportedValue string @@ -367,7 +371,6 @@ func writeSettingInternal( return err } } else { - // Setting a non-DEFAULT value. value, err := eval.Expr(ctx, evalCtx, value) if err != nil { @@ -377,11 +380,18 @@ func writeSettingInternal( ctx, hook, db, setting, user, st, value, forSystemTenant, releaseLeases, + interlockInfo, ) if err != nil { return err } } + + if setting.IsUnsafe() { + // Also mention the change in the non-structured DEV log. + log.Warningf(ctx, "unsafe setting changed: %q -> %v", name, reportedValue) + } + return logFn(ctx, 0, /* no target */ &eventpb.SetClusterSetting{ @@ -423,6 +433,7 @@ func writeNonDefaultSettingValue( value tree.Datum, forSystemTenant bool, releaseLeases func(context.Context), + interlockInfo unsafeSettingInterlockInfo, ) (reportedValue string, expectedEncodedValue string, err error) { // Stringify the value set by the statement for reporting in errors, logs etc. reportedValue = tree.AsStringWithFlags(value, tree.FmtBareStrings) @@ -444,6 +455,12 @@ func writeNonDefaultSettingValue( } } else { // Modifying another setting than the version. + if setting.IsUnsafe() { + if err := unsafeSettingInterlock(ctx, st, setting, encoded, interlockInfo); err != nil { + return reportedValue, expectedEncodedValue, err + } + } + if _, err = db.Executor().ExecEx( ctx, "update-setting", nil, sessiondata.RootUserSessionDataOverride, @@ -775,3 +792,52 @@ func toSettingString( return "", errors.Errorf("unsupported setting type %T", setting) } } + +// unsafeSettingInterlockInfo contains information about the current +// session that is used by the unsafe setting interlock system. +type unsafeSettingInterlockInfo struct { + sessionID clusterunique.ID + interlockKey string +} + +func (p *planner) makeUnsafeSettingInterlockInfo() unsafeSettingInterlockInfo { + return unsafeSettingInterlockInfo{ + sessionID: p.ExtendedEvalContext().SessionID, + interlockKey: p.SessionData().UnsafeSettingInterlockKey, + } +} + +const interlockKeySessionVarName = "unsafe_setting_interlock_key" + +// unsafeSettingInterlock ensures that changes to unsafe settings are +// doubly confirmed by the operator by a special value in a session +// variable. +func unsafeSettingInterlock( + ctx context.Context, + st *cluster.Settings, + setting settings.Setting, + encodedValue string, + info unsafeSettingInterlockInfo, +) error { + // The interlock key is a combination of: + // - the session ID, so that different sessions need different keys. + // - the setting key, so that different settings need different + // interlock keys. + h := fnv.New32() + h.Write([]byte(info.sessionID.String())) + h.Write([]byte(setting.InternalKey())) + pastableKey := base64.StdEncoding.EncodeToString(h.Sum(nil)) + + if info.interlockKey != pastableKey { + return errors.WithDetailf( + pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, + "changing cluster setting %q may cause cluster instability or data corruption.\n"+ + "To confirm the change, run the following command before trying again:\n\n"+ + " SET %s = '%s';\n\n", + setting.Name(), interlockKeySessionVarName, pastableKey, + ), + "key: %s", pastableKey, + ) + } + return nil +} diff --git a/pkg/sql/tests/BUILD.bazel b/pkg/sql/tests/BUILD.bazel index 2316a4bfbd9d..5167959f8fde 100644 --- a/pkg/sql/tests/BUILD.bazel +++ b/pkg/sql/tests/BUILD.bazel @@ -55,6 +55,7 @@ go_test( "rsg_test.go", "schema_changes_in_parallel_test.go", "search_path_test.go", + "set_cluster_setting_interlock_test.go", "show_commit_timestamp_test.go", "split_test.go", "system_table_test.go", @@ -96,6 +97,7 @@ go_test( "//pkg/security/securitytest", "//pkg/security/username", "//pkg/server", + "//pkg/settings", "//pkg/settings/cluster", "//pkg/spanconfig", "//pkg/sql", diff --git a/pkg/sql/tests/set_cluster_setting_interlock_test.go b/pkg/sql/tests/set_cluster_setting_interlock_test.go new file mode 100644 index 000000000000..0ad23d8c11c1 --- /dev/null +++ b/pkg/sql/tests/set_cluster_setting_interlock_test.go @@ -0,0 +1,100 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + "errors" + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/lib/pq" + "github.com/stretchr/testify/require" +) + +var unsafeSetting = settings.RegisterBoolSetting( + settings.ApplicationLevel, + "my.unsafe.setting", "unused", false, + settings.WithUnsafe) +var otherUnsafeSetting = settings.RegisterBoolSetting( + settings.ApplicationLevel, + "my.other.unsafe.setting", "unused", false, + settings.WithUnsafe) + +func TestSetUnsafeClusterSettingInterlock(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + unsafeSettingName := unsafeSetting.Name() + + ctx := context.Background() + s := serverutils.StartServerOnly(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + ts := s.ApplicationLayer() + + firstSession := ts.SQLConn(t, "") + + // RESET on unsafe settings never get an error. + _, err := firstSession.Exec(fmt.Sprintf("RESET CLUSTER SETTING %s", unsafeSettingName)) + require.NoError(t, err) + + // Try changing the setting. We're expecting an error. + _, err = firstSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", unsafeSettingName)) + require.Error(t, err) + getKey := func(err error) string { + require.Contains(t, err.Error(), "may cause cluster instability") + var pqErr *pq.Error + ok := errors.As(err, &pqErr) // Change this if/when we change the driver for tests. + require.True(t, ok) + require.True(t, strings.HasPrefix(pqErr.Detail, "key:"), pqErr.Detail) + return strings.TrimPrefix(pqErr.Detail, "key: ") + } + key := getKey(err) + + // Now set the key and try again. We're not expecting an error any more. + _, err = firstSession.Exec("SET unsafe_setting_interlock_key = $1", key) + require.NoError(t, err) + _, err = firstSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", unsafeSettingName)) + require.NoError(t, err) + + // We can reuse the key twice. + _, err = firstSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", unsafeSettingName)) + require.NoError(t, err) + + otherSession := ts.SQLConn(t, "") + + // The first key produced in each session is different from other sessions. + _, err = otherSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", unsafeSettingName)) + require.Error(t, err) + otherFirstKey := getKey(err) + require.NotEqual(t, key, otherFirstKey) + + // We also can't use a key from one session with another. + _, err = firstSession.Exec("SET unsafe_setting_interlock_key = $1", key) + require.NoError(t, err) + + _, err = otherSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", unsafeSettingName)) + require.Error(t, err) + stillFirstKey := getKey(err) + require.Equal(t, otherFirstKey, stillFirstKey) + + // A different cluster setting generates a different key. + _, err = otherSession.Exec(fmt.Sprintf("SET CLUSTER SETTING %s = true", otherUnsafeSetting.Name())) + require.Error(t, err) + otherSettingKey := getKey(err) + require.NotEqual(t, otherSettingKey, otherFirstKey) +} diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index bcf05e2fc30e..60b977f06879 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2935,6 +2935,18 @@ var varGen = map[string]sessionVar{ }, GlobalDefault: globalFalse, }, + + interlockKeySessionVarName: { + Hidden: true, + Set: func(_ context.Context, m sessionDataMutator, s string) error { + m.SetUnsafeSettingInterlockKey(s) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return evalCtx.SessionData().UnsafeSettingInterlockKey, nil + }, + GlobalDefault: func(_ *settings.Values) string { return "" }, + }, } func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {