diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index fb38aa5b8..ac262eb66 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -12,6 +12,7 @@ Added further pre-built maps, which removes the need for the expensive :map_assets build step. 2. Allow game to be renderered with top-left as origin instead of bottom-left. +3. Add 'mixerSeed' setting to change behaviour of all random number generators. ## release-2018-02-07 February 2018 release diff --git a/deepmind/engine/context.cc b/deepmind/engine/context.cc index 63b48ed60..bc0acc5ed 100644 --- a/deepmind/engine/context.cc +++ b/deepmind/engine/context.cc @@ -158,6 +158,11 @@ static bool get_native_app(void* userdata) { return static_cast(userdata)->NativeApp(); } +static void set_mixer_seed(void* userdata, int v) { + return static_cast(userdata)->SetMixerSeed( + static_cast(v)); +} + static void set_actions(void* userdata, double look_down_up, double look_left_right, signed char move_back_forward, signed char strafe_left_right, signed char crouch_jump, @@ -480,7 +485,7 @@ lua::NResultsOr MapMakerModule(lua_State* L) { LuaTextLevelMaker::CreateObject( L, ctx->ExecutableRunfiles(), ctx->TempDirectory(), ctx->UseLocalLevelCache(), ctx->UseGlobalLevelCache(), - ctx->LevelCacheParams()); + ctx->LevelCacheParams(), ctx->MixerSeed()); return 1; } else { return "Missing context!"; @@ -507,6 +512,7 @@ Context::Context(lua::Vm lua_vm, const char* executable_runfiles, : lua_vm_(std::move(lua_vm)), native_app_(false), actions_{}, + mixer_seed_(0), level_cache_params_{}, game_(executable_runfiles, calls, file_reader_override, temp_folder != nullptr ? temp_folder : ""), @@ -528,6 +534,7 @@ Context::Context(lua::Vm lua_vm, const char* executable_runfiles, hooks->run_lua_snippet = run_lua_snippet; hooks->set_native_app = set_native_app; hooks->get_native_app = get_native_app; + hooks->set_mixer_seed = set_mixer_seed; hooks->set_actions = set_actions; hooks->get_actions = get_actions; hooks->find_model = find_model; @@ -652,7 +659,8 @@ int Context::Init() { lua_vm_.AddCModuleToSearchers( "dmlab.system.tensor", tensor::LuaTensorConstructors); lua_vm_.AddCModuleToSearchers( - "dmlab.system.maze_generation", LuaMazeGeneration::Require); + "dmlab.system.maze_generation", &lua::Bind, + {reinterpret_cast(static_cast(mixer_seed_))}); lua_vm_.AddCModuleToSearchers( "dmlab.system.map_maker", &lua::Bind, {this}); lua_vm_.AddCModuleToSearchers( @@ -668,7 +676,9 @@ int Context::Init() { &lua::Bind, {MutablePickups()}); lua_vm_.AddCModuleToSearchers( - "dmlab.system.random", &lua::Bind, {UserPrbg()}); + "dmlab.system.random", &lua::Bind, + {UserPrbg(), + reinterpret_cast(static_cast(mixer_seed_))}); lua_vm_.AddCModuleToSearchers( "dmlab.system.model", &lua::Bind, {const_cast(Game().Calls())}); @@ -704,7 +714,8 @@ int Context::Init() { } int Context::Start(int episode, int seed) { - EnginePrbg()->seed(seed); + EnginePrbg()->seed(static_cast(seed) ^ + (static_cast(mixer_seed_) << 32)); MutableGame()->NextMap(); lua_State* L = lua_vm_.get(); script_table_ref_.PushMemberFunction("start"); diff --git a/deepmind/engine/context.h b/deepmind/engine/context.h index 8bba378b6..6715c8304 100644 --- a/deepmind/engine/context.h +++ b/deepmind/engine/context.h @@ -227,6 +227,15 @@ class Context { // generate new positive integers. int MakeRandomSeed(); + // Specifies a mixer value to be combined with all the seeds passed to this + // environment, before using them with the internal PRBGs. This is done in + // a way which guarantees that the resulting seeds span disjoint subsets of + // the integers in [0, 2^64) for each different mixer value. However, the + // sequences produced by the environment's PRBGs are not necessarily disjoint. + void SetMixerSeed(std::uint32_t s) { mixer_seed_ = s; } + + std::uint32_t MixerSeed() const { return mixer_seed_; } + std::mt19937_64* UserPrbg() { return &user_prbg_; } std::mt19937_64* EnginePrbg() { return &engine_prbg_; } @@ -480,6 +489,9 @@ class Context { // A pseudo-random-bit generator for exclusive use by users. std::mt19937_64 user_prbg_; + // Stores the mixer seed for the PRBG. + std::uint32_t mixer_seed_; + // A pseudo-random-bit generator for exclusive use of the engine. Seeded each // episode with the episode start seed. std::mt19937_64 engine_prbg_; diff --git a/deepmind/engine/lua_maze_generation.cc b/deepmind/engine/lua_maze_generation.cc index 212f95236..de83fc8b1 100644 --- a/deepmind/engine/lua_maze_generation.cc +++ b/deepmind/engine/lua_maze_generation.cc @@ -37,7 +37,8 @@ namespace lab { namespace { std::mt19937_64* GetRandomNumberGenerator(lua::TableRef* table, - std::mt19937_64* seeded_rng) { + std::mt19937_64* seeded_rng, + std::uint64_t mixer_seq) { std::mt19937_64* prng = nullptr; lua_State* L = table->LuaState(); table->LookUpToStack("random"); @@ -51,7 +52,7 @@ std::mt19937_64* GetRandomNumberGenerator(lua::TableRef* table, if (prng == nullptr) { int seed = 0; if (table->LookUp("seed", &seed)) { - seeded_rng->seed(seed); + seeded_rng->seed(static_cast(seed) ^ mixer_seq); prng = seeded_rng; } } @@ -112,11 +113,19 @@ class LuaRoom : public lua::Class { std::vector room_; }; +// Bit toggle sequence applied to the 32 MSB of the 64bit seeds fed to the maze +// generation PRBGs, with the intention of creating disjoint seed subspaces for +// each different mixer_seed value as described in python_api.md +std::uint64_t LuaMazeGeneration::mixer_seq_ = 0; + const char* LuaMazeGeneration::ClassName() { return "deepmind.lab.LuaMazeGeneration"; } -int LuaMazeGeneration::Require(lua_State* L) { +lua::NResultsOr LuaMazeGeneration::Require(lua_State* L) { + std::uintptr_t mixer_seed = + reinterpret_cast(lua_touserdata(L, lua_upvalueindex(1))); + mixer_seq_ = static_cast(mixer_seed) << 32; auto table = lua::TableRef::Create(L); table.Insert("mazeGeneration", &lua::Bind); table.Insert("randomMazeGeneration", @@ -162,7 +171,8 @@ lua::NResultsOr LuaMazeGeneration::CreateRandom(lua_State* L) { lua::Read(L, -1, &table); std::mt19937_64 seeded_rng; - std::mt19937_64* prng = GetRandomNumberGenerator(&table, &seeded_rng); + std::mt19937_64* prng = + GetRandomNumberGenerator(&table, &seeded_rng, mixer_seq_); if (prng == nullptr) { return "[randomMazeGeneration] - Must construct with 'random' a random " "number generator. ('seed' is deprecated.)"; @@ -544,7 +554,8 @@ lua::NResultsOr LuaMazeGeneration::VisitRandomPath(lua_State* L) { return "[visitRandomPath] - must supply table"; } std::mt19937_64 seeded_rng; - std::mt19937_64* prng = GetRandomNumberGenerator(&table, &seeded_rng); + std::mt19937_64* prng = + GetRandomNumberGenerator(&table, &seeded_rng, mixer_seq_); if (prng == nullptr) { return "[visitRandomPath] - must supply 'random' with random number " "generator. ('seed' is deprecated.)"; diff --git a/deepmind/engine/lua_maze_generation.h b/deepmind/engine/lua_maze_generation.h index 431d1917c..5e8325391 100644 --- a/deepmind/engine/lua_maze_generation.h +++ b/deepmind/engine/lua_maze_generation.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 Google Inc. +// Copyright (C) 2016-2018 Google Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -47,7 +47,7 @@ class LuaMazeGeneration : public lua::Class { // Returns table of constructors and standalone functions. // [0, 1, -] - static int Require(lua_State* L); + static lua::NResultsOr Require(lua_State* L); private: // Constructs a LuaMazeGeneration. @@ -197,6 +197,8 @@ class LuaMazeGeneration : public lua::Class { lua::NResultsOr CountVariations(lua_State* L); maze_generation::TextMaze text_maze_; + + static std::uint64_t mixer_seq_; }; } // namespace lab diff --git a/deepmind/engine/lua_maze_generation_test.cc b/deepmind/engine/lua_maze_generation_test.cc index 296da4d84..e15a0d704 100644 --- a/deepmind/engine/lua_maze_generation_test.cc +++ b/deepmind/engine/lua_maze_generation_test.cc @@ -42,11 +42,13 @@ class LuaMazeGenerationTest : public lua::testing::TestWithVm { protected: LuaMazeGenerationTest() { LuaMazeGeneration::Register(L); - vm()->AddCModuleToSearchers("dmlab.system.maze_generation", - LuaMazeGeneration::Require); + vm()->AddCModuleToSearchers( + "dmlab.system.maze_generation", &lua::Bind, + {reinterpret_cast(static_cast(0))}); LuaRandom::Register(L); - vm()->AddCModuleToSearchers("dmlab.system.sys_random", - &lua::Bind, {&prbg_}); + vm()->AddCModuleToSearchers( + "dmlab.system.sys_random", &lua::Bind, + {&prbg_, reinterpret_cast(static_cast(0))}); } std::mt19937_64 prbg_; diff --git a/deepmind/engine/lua_random.cc b/deepmind/engine/lua_random.cc index 515d16c02..5b6ef374f 100644 --- a/deepmind/engine/lua_random.cc +++ b/deepmind/engine/lua_random.cc @@ -60,7 +60,9 @@ bool ReadLargeNumber(lua_State* L, int idx, RbgNumType* num) { lua::NResultsOr LuaRandom::Require(lua_State* L) { if (auto* prbg = static_cast( lua_touserdata(L, lua_upvalueindex(1)))) { - LuaRandom::CreateObject(L, prbg); + std::uintptr_t mixer_seed = reinterpret_cast( + lua_touserdata(L, lua_upvalueindex(2))); + LuaRandom::CreateObject(L, prbg, mixer_seed); return 1; } else { return "Missing std::mt19937_64 pointer in up value!"; @@ -87,7 +89,7 @@ lua::NResultsOr LuaRandom::Seed(lua_State* L) { RbgNumType k; if (ReadLargeNumber(L, -1, &k)) { - prbg_->seed(k); + prbg_->seed(k ^ mixer_seq_); return 0; } else if (lua::Read(L, -1, &s)) { auto& err = errno; // cache TLS-lookup @@ -96,7 +98,7 @@ lua::NResultsOr LuaRandom::Seed(lua_State* L) { unsigned long long int n = std::strtoull(s.data(), &ep, 0); if (ep != s.data() && *ep == '\0' && err == 0 && n <= std::numeric_limits::max()) { - prbg_->seed(n); + prbg_->seed(n ^ mixer_seq_); return 0; } } diff --git a/deepmind/engine/lua_random.h b/deepmind/engine/lua_random.h index 385db91e9..287d4feec 100644 --- a/deepmind/engine/lua_random.h +++ b/deepmind/engine/lua_random.h @@ -43,7 +43,8 @@ class LuaRandom : public lua::Class { public: // Constructed with a non-owning view of a PRBG instance. - explicit LuaRandom(std::mt19937_64* prbg) : prbg_(prbg) {} + explicit LuaRandom(std::mt19937_64* prbg, std::uint32_t mixer_seed) + : prbg_(prbg), mixer_seq_(static_cast(mixer_seed) << 32) {} // Registers the class as well as member functions: // @@ -124,6 +125,7 @@ class LuaRandom : public lua::Class { private: std::mt19937_64* prbg_; + std::uint64_t mixer_seq_; }; } // namespace lab diff --git a/deepmind/engine/lua_random_test.cc b/deepmind/engine/lua_random_test.cc index bccc6e9f7..6cb1b0631 100644 --- a/deepmind/engine/lua_random_test.cc +++ b/deepmind/engine/lua_random_test.cc @@ -39,8 +39,9 @@ class LuaRandomTest : public lua::testing::TestWithVm { protected: LuaRandomTest() { LuaRandom::Register(L); - vm()->AddCModuleToSearchers("dmlab.system.sys_random", - &lua::Bind, {&prbg_}); + vm()->AddCModuleToSearchers( + "dmlab.system.sys_random", &lua::Bind, + {&prbg_, reinterpret_cast(static_cast(0))}); } std::mt19937_64 prbg_; diff --git a/deepmind/engine/lua_text_level_maker.cc b/deepmind/engine/lua_text_level_maker.cc index 565e8746a..27f065752 100644 --- a/deepmind/engine/lua_text_level_maker.cc +++ b/deepmind/engine/lua_text_level_maker.cc @@ -314,8 +314,11 @@ bool NoOp(std::size_t, std::size_t, char, LuaTextLevelMaker::LuaTextLevelMaker( const std::string& self, const std::string& output_folder, bool use_local_level_cache, bool use_global_level_cache, - DeepMindLabLevelCacheParams level_cache_params) - : prng_(0), rundir_(self), output_folder_(output_folder) { + DeepMindLabLevelCacheParams level_cache_params, std::uint32_t mixer_seed) + : prng_(0), + mixer_seed_(mixer_seed), + rundir_(self), + output_folder_(output_folder) { settings_.use_local_level_cache = use_local_level_cache; settings_.use_global_level_cache = use_global_level_cache; settings_.level_cache_params = level_cache_params; @@ -404,7 +407,7 @@ lua::NResultsOr LuaTextLevelMaker::MapFromTextLevel(lua_State* L) { lua::NResultsOr LuaTextLevelMaker::ViewRandomness(lua_State* L) { - LuaRandom::CreateObject(L, &prng_); + LuaRandom::CreateObject(L, &prng_, mixer_seed_); return 1; } diff --git a/deepmind/engine/lua_text_level_maker.h b/deepmind/engine/lua_text_level_maker.h index aaf3f0d16..8fbf6efbe 100644 --- a/deepmind/engine/lua_text_level_maker.h +++ b/deepmind/engine/lua_text_level_maker.h @@ -47,7 +47,8 @@ class LuaTextLevelMaker : public lua::Class { const std::string& output_folder, bool use_local_level_cache, bool use_global_level_cache, - DeepMindLabLevelCacheParams level_cache_params); + DeepMindLabLevelCacheParams level_cache_params, + std::uint32_t mixer_seed); // Registers MapFromTextLevel as "mapFromTextLevel". static void Register(lua_State* L); @@ -85,6 +86,7 @@ class LuaTextLevelMaker : public lua::Class { private: std::mt19937_64 prng_; + std::uint32_t mixer_seed_; MapCompileSettings settings_; const std::string rundir_; const std::string output_folder_; diff --git a/deepmind/include/deepmind_hooks.h b/deepmind/include/deepmind_hooks.h index a149fa4ee..f3e08a474 100644 --- a/deepmind/include/deepmind_hooks.h +++ b/deepmind/include/deepmind_hooks.h @@ -191,6 +191,10 @@ struct DeepmindHooks_s { // allowed to set actions. bool (*get_native_app)(void* userdata); + // This is a bit toggle sequence applied to the most significant bits of the + // seed. + void (*set_mixer_seed)(void* userdata, int v); + // Sets the actions of the player. void (*set_actions)(void* userdata, // double look_down_up, // diff --git a/deepmind/tensor/lua_tensor_test.cc b/deepmind/tensor/lua_tensor_test.cc index ff030a780..c9ef43a92 100644 --- a/deepmind/tensor/lua_tensor_test.cc +++ b/deepmind/tensor/lua_tensor_test.cc @@ -47,13 +47,15 @@ class LuaTensorTest : public ::testing::Test { LuaTensorTest() : lua_vm_(lua::CreateVm()) { auto* L = lua_vm_.get(); LuaRandom::Register(L); - lua_vm_.AddCModuleToSearchers("dmlab.system.sys_random", - &lua::Bind, {&prbg_}); + lua_vm_.AddCModuleToSearchers( + "dmlab.system.sys_random", &lua::Bind, + {&prbg_, reinterpret_cast(static_cast(0))}); tensor::LuaTensorRegister(L); lua_vm_.AddCModuleToSearchers("dmlab.system.tensor", tensor::LuaTensorConstructors); } std::mt19937_64 prbg_; + uint32_t mixer_seed_; lua::Vm lua_vm_; }; diff --git a/docs/users/python_api.md b/docs/users/python_api.md index c3969a659..aaba500d1 100644 --- a/docs/users/python_api.md +++ b/docs/users/python_api.md @@ -23,14 +23,15 @@ name in the list *observations*. The `config` dict specifies additional settings as key-value string pairs. The following options are recognized: -| Option | Description | Default | -| ---------------- | ----------------------------------------------- | ------: | -| `width` | horizontal resolution of the observation frames | `'320'` | -| `height` | vertical resolution of the observation frames | `'240'` | -| `fps` | frames per second | `'60'` | -| `levelDirectory` | optional path to level directory (relative | `''` | -: : paths are relative to game_scripts/levels) : : -| `appendCommand` | Commands for the internal Quake console\* | `''` | +| Option | Description | Default | +| ---------------- | ---------------------------------------------------------------------------------------------- | ------: | +| `width` | horizontal resolution of the observation frames | `'320'` | +| `height` | vertical resolution of the observation frames | `'240'` | +| `fps` | frames per second | `'60'` | +| `levelDirectory` | optional path to level directory (relative | `''` | +: : paths are relative to game_scripts/levels) : : +| `appendCommand` | Commands for the internal Quake console\* | `''` | +| `mixerSeed` | value combined with each of the seeds fed to the environment to define unique subsets of seeds | `'0'` | \* See also [Lua map API](/docs/developers/reference/lua_api.md#commandlineold-commandline-string). @@ -88,6 +89,12 @@ The optional integer argument `seed` can be supplied to seed the environment's random number generator. If `seed` is omitted or `None`, a random number is used. +The optional integer argument `mixerSeed` provided with the environment is +combined with every seed passed to this function. The resulting seeds span a +unique subset of the integers in \[0, 2^64\) for each different `mixerSeed` +value. However, the sequences produced by the environment's random number +generator are not necessarily disjoint. + ### `num_steps`() Number of frames since the last `reset`() call diff --git a/engine/code/deepmind/dmlab_connect.c b/engine/code/deepmind/dmlab_connect.c index c93c15bac..40e815cca 100644 --- a/engine/code/deepmind/dmlab_connect.c +++ b/engine/code/deepmind/dmlab_connect.c @@ -17,6 +17,7 @@ //////////////////////////////////////////////////////////////////////////////// #include +#include #include #include #include @@ -636,6 +637,17 @@ static int dmlab_setting(void* context, const char* key, const char* value) { } Q_strcat(gc->command_line, sizeof(gc->command_line), va(" +set name \"%s\"", value)); + } else if (strcmp(key, "mixerSeed") == 0) { + int res = parse_int(value, &v, ctx); + if (res != 0) return res; + if (v < 0 || v > UINT32_MAX) { + ctx->hooks.set_error_message(ctx->userdata, + va("Invalid mixerSeed value, must be a " + "positive integer not greater than '%" + PRIu32 "'.", UINT32_MAX)); + return 1; + } + ctx->hooks.set_mixer_seed(ctx->userdata, (uint32_t)v); } else { ctx->hooks.add_setting(ctx->userdata, key, value); } diff --git a/examples/game_main.c b/examples/game_main.c index e6c6c7632..23e52d128 100644 --- a/examples/game_main.c +++ b/examples/game_main.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -68,24 +69,26 @@ static const char kUsage[] = " -r, --random_seed: A seed value used for randomly generated content; using\n" " the same seed should result in the same content. Defaults\n" " to a fixed value.\n" + " -m, --mixer_seed: A XOR mask applied to the most significant bits of the seed.\n" ; static void process_commandline(int argc, char** argv, EnvCApi* env_c_api, void* context, int* num_episodes, int* seed, - bool* log_events) { + int* mixer_seed, bool* log_events) { static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"level_script", required_argument, NULL, 'l'}, {"level_setting", required_argument, NULL, 's'}, {"num_episodes", required_argument, NULL, 'e'}, {"random_seed", required_argument, NULL, 'r'}, + {"mixer_seed", required_argument, NULL, 'm'}, {"print_events", no_argument, NULL, 'p'}, {NULL, 0, NULL, 0}}; char *key, *value; for (int c; - (c = getopt_long(argc, argv, "hl:s:e:r:p", long_options, 0)) != -1;) { + (c = getopt_long(argc, argv, "hl:s:e:r:m:p", long_options, 0)) != -1;) { switch (c) { case 'h': fputs(kUsage, stdout); @@ -119,6 +122,15 @@ static void process_commandline(int argc, char** argv, EnvCApi* env_c_api, sys_error("Failed to set random_seed to '%s'.", optarg); } break; + case 'm': + if (!parse_int(optarg, mixer_seed)) { + sys_error("Failed to set mixer_seed to '%s'.", optarg); + } + if (*mixer_seed < 0 || *mixer_seed > UINT32_MAX) { + sys_error("Invalid 'mixerSeed' setting. Must be a positive integer " + "not greater than %" PRIu32 ".", UINT32_MAX); + } + break; case 'p': *log_events = true; break; @@ -228,8 +240,15 @@ int main(int argc, char** argv) { int num_episodes = 1; int seed = 1; + int mixer_seed = 0; process_commandline(argc, argv, &env_c_api, context, &num_episodes, &seed, - &log_events); + &mixer_seed, &log_events); + static char mixer_seed_str[16]; + snprintf(mixer_seed_str, sizeof(mixer_seed_str), "%d", mixer_seed); + if (env_c_api.setting(context, "mixerSeed", mixer_seed_str) != 0) { + sys_error("Failed to apply 'mixerSeed' setting. Internal error: %s", + env_c_api.error_message(context)); + } if (env_c_api.init(context) != 0) { sys_error("Failed to init RL API: %s", env_c_api.error_message(context));