From d02f23eabb91fdc6f015f51ddcd5377fb5850ece Mon Sep 17 00:00:00 2001 From: David Wu Date: Mon, 22 May 2023 15:30:01 -0400 Subject: [PATCH] Indentation second pass --- python/board.py | 16 +- python/data.py | 24 +- python/edit_checkpoint.py | 12 +- python/elo.py | 86 +- python/export_model_pytorch.py | 20 +- python/features.py | 8 +- python/forward_model.py | 10 +- python/genboard_train.py | 16 +- python/migrate_double_v1.py | 1 - python/modelconfigs.py | 2462 ++++++++++++------------ python/play.py | 222 +-- python/save_model_for_export_manual.py | 18 +- python/shuffle.py | 50 +- python/summarize_old_selfplay_files.py | 8 +- python/summarize_sgfs.py | 46 +- python/test.py | 44 +- python/train.py | 168 +- python/upload_model.py | 22 +- python/upload_poses.py | 24 +- 19 files changed, 1628 insertions(+), 1629 deletions(-) diff --git a/python/board.py b/python/board.py index e385ea4c0..0a754ac05 100644 --- a/python/board.py +++ b/python/board.py @@ -133,10 +133,10 @@ def is_simple_eye(self,pla,loc): return True against_wall = ( - self.board[adj0] == Board.WALL or \ - self.board[adj1] == Board.WALL or \ - self.board[adj2] == Board.WALL or \ - self.board[adj3] == Board.WALL + self.board[adj0] == Board.WALL or \ + self.board[adj1] == Board.WALL or \ + self.board[adj2] == Board.WALL or \ + self.board[adj3] == Board.WALL ) if against_wall: @@ -612,10 +612,10 @@ def countImmediateLiberties(self,loc): def is_group_adjacent(self,head,loc): return ( - self.group_head[loc+self.adj[0]] == head or \ - self.group_head[loc+self.adj[1]] == head or \ - self.group_head[loc+self.adj[2]] == head or \ - self.group_head[loc+self.adj[3]] == head + self.group_head[loc+self.adj[0]] == head or \ + self.group_head[loc+self.adj[1]] == head or \ + self.group_head[loc+self.adj[2]] == head or \ + self.group_head[loc+self.adj[3]] == head ) #Helper, merge two groups assuming they're owned by the same player and adjacent diff --git a/python/data.py b/python/data.py index ceeebd29c..cb530b5f7 100644 --- a/python/data.py +++ b/python/data.py @@ -96,21 +96,21 @@ def load_sgf_moves_exn(path): if rulesstr is not None: if rulesstr.lower() == "japanese" or rulesstr.lower() == "jp": rules = { - "koRule": "KO_SIMPLE", - "scoringRule": "SCORING_TERRITORY", - "multiStoneSuicideLegal": False, - "encorePhase": 0, - "passWouldEndPhase": False, - "whiteKomi": komi + "koRule": "KO_SIMPLE", + "scoringRule": "SCORING_TERRITORY", + "multiStoneSuicideLegal": False, + "encorePhase": 0, + "passWouldEndPhase": False, + "whiteKomi": komi } elif rulesstr.lower() == "chinese": rules = { - "koRule": "KO_SIMPLE", - "scoringRule": "SCORING_AREA", - "multiStoneSuicideLegal": False, - "encorePhase": 0, - "passWouldEndPhase": False, - "whiteKomi": komi + "koRule": "KO_SIMPLE", + "scoringRule": "SCORING_AREA", + "multiStoneSuicideLegal": False, + "encorePhase": 0, + "passWouldEndPhase": False, + "whiteKomi": komi } elif rulesstr.startswith("ko"): rules = {} diff --git a/python/edit_checkpoint.py b/python/edit_checkpoint.py index fdbc5e5cc..b5c88cb02 100644 --- a/python/edit_checkpoint.py +++ b/python/edit_checkpoint.py @@ -25,9 +25,9 @@ if output_json_to is not None: assert output_json_to.endswith(".json") data_to_write = dict( - running_metrics = data["running_metrics"], - train_state = data["train_state"], - config = data["config"] if "config" in data else None, + running_metrics = data["running_metrics"], + train_state = data["train_state"], + config = data["config"] if "config" in data else None, ) with open(output_json_to,"w") as f: json.dump(data,f,indent=2) @@ -51,8 +51,8 @@ else: data_to_write = dict( - running_metrics = data["running_metrics"], - train_state = data["train_state"], - config = data["config"] if "config" in data else None, + running_metrics = data["running_metrics"], + train_state = data["train_state"], + config = data["config"] if "config" in data else None, ) print(json.dumps(data_to_write,indent=2)) diff --git a/python/elo.py b/python/elo.py index cb8a48a10..6b51b1391 100644 --- a/python/elo.py +++ b/python/elo.py @@ -186,36 +186,36 @@ def likelihood_of_games( if not include_first_player_advantage: if p1_won_proportion > 0.0: ret.append(Likelihood( - playercombo={p1: 1.0, p2: -1.0}, - offset=0.0, - weight=p1_won_proportion*num_games, - gamecount=p1_won_proportion*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p1: 1.0, p2: -1.0}, + offset=0.0, + weight=p1_won_proportion*num_games, + gamecount=p1_won_proportion*num_games, + kind=Likelihood.SIGMOID_KIND )) if p1_won_proportion < 1.0: ret.append(Likelihood( - playercombo={p2: 1.0, p1: -1.0}, - offset=0.0, - weight=(1.0-p1_won_proportion)*num_games, - gamecount=(1.0-p1_won_proportion)*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p2: 1.0, p1: -1.0}, + offset=0.0, + weight=(1.0-p1_won_proportion)*num_games, + gamecount=(1.0-p1_won_proportion)*num_games, + kind=Likelihood.SIGMOID_KIND )) else: if p1_won_proportion > 0.0: ret.append(Likelihood( - playercombo={p1: 1.0, p2: -1.0, P1_ADVANTAGE_NAME: 1.0}, - offset=0.0, - weight=p1_won_proportion*num_games, - gamecount=p1_won_proportion*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p1: 1.0, p2: -1.0, P1_ADVANTAGE_NAME: 1.0}, + offset=0.0, + weight=p1_won_proportion*num_games, + gamecount=p1_won_proportion*num_games, + kind=Likelihood.SIGMOID_KIND )) if p1_won_proportion < 1.0: ret.append(Likelihood( - playercombo={p2: 1.0, p1: -1.0, P1_ADVANTAGE_NAME: -1.0}, - offset=0.0, - weight=(1.0-p1_won_proportion)*num_games, - gamecount=(1.0-p1_won_proportion)*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p2: 1.0, p1: -1.0, P1_ADVANTAGE_NAME: -1.0}, + offset=0.0, + weight=(1.0-p1_won_proportion)*num_games, + gamecount=(1.0-p1_won_proportion)*num_games, + kind=Likelihood.SIGMOID_KIND )) return ret @@ -238,18 +238,18 @@ def make_single_player_prior( assert np.isfinite(elo) if num_games > 0.0: ret.append(Likelihood( - playercombo={p1: 1.0}, - offset=(-elo / ELO_PER_STRENGTH), - weight=0.5*num_games, - gamecount=0.5*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p1: 1.0}, + offset=(-elo / ELO_PER_STRENGTH), + weight=0.5*num_games, + gamecount=0.5*num_games, + kind=Likelihood.SIGMOID_KIND )) ret.append(Likelihood( - playercombo={p1: -1.0}, - offset=(elo / ELO_PER_STRENGTH), - weight=0.5*num_games, - gamecount=0.5*num_games, - kind=Likelihood.SIGMOID_KIND + playercombo={p1: -1.0}, + offset=(elo / ELO_PER_STRENGTH), + weight=0.5*num_games, + gamecount=0.5*num_games, + kind=Likelihood.SIGMOID_KIND )) return ret @@ -278,11 +278,11 @@ def make_sequential_prior( for i in range(len(players)-1): ret.extend(likelihood_of_games( - p1=players[i], - p2=players[i+1], - num_games=num_games, - p1_won_proportion=0.5, - include_first_player_advantage=False, + p1=players[i], + p2=players[i+1], + num_games=num_games, + p1_won_proportion=0.5, + include_first_player_advantage=False, )) return ret @@ -304,11 +304,11 @@ def make_center_elos_prior( assert len(set(players)) == len(players), "players must not contain any duplicates" playercombo = { player: 1.0 for player in players } ret.append(Likelihood( - playercombo=playercombo, - offset=-len(players) * elo / ELO_PER_STRENGTH, - weight=0.001, - gamecount=0.0, - kind=Likelihood.GAUSSIAN_KIND + playercombo=playercombo, + offset=-len(players) * elo / ELO_PER_STRENGTH, + weight=0.001, + gamecount=0.0, + kind=Likelihood.GAUSSIAN_KIND )) return ret @@ -446,9 +446,9 @@ def line_search_ascend(strengths: np.array, cur_loglikelihood: float) -> Tuple[n elo_stderr = { player: math.sqrt(1.0 / elo_precision[player_to_idx[player],player_to_idx[player]]) for player in players }, elo_covariance = { (p1,p2): elo_covariance[player_to_idx[p1],player_to_idx[p2]] for p1 in players for p2 in players }, effective_game_count = { - player: (np.square(sqrt_ess_numerator[player_to_idx[player],player_to_idx[player]]) / - ess_denominator[player_to_idx[player],player_to_idx[player]]) - for player in players + player: (np.square(sqrt_ess_numerator[player_to_idx[player],player_to_idx[player]]) / + ess_denominator[player_to_idx[player],player_to_idx[player]]) + for player in players }, ) return info diff --git a/python/export_model_pytorch.py b/python/export_model_pytorch.py index 8ba23e9ef..8ec510aca 100644 --- a/python/export_model_pytorch.py +++ b/python/export_model_pytorch.py @@ -48,12 +48,12 @@ def main(args): logging.root.handlers = [] logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.StreamHandler(stream=sys.stdout), - logging.FileHandler(export_dir + "/log.txt"), - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.StreamHandler(stream=sys.stdout), + logging.FileHandler(export_dir + "/log.txt"), + ], ) np.set_printoptions(linewidth=150) @@ -369,13 +369,13 @@ def write_model(model): if "running_metrics" in other_state_dict: assert sorted(list(other_state_dict["running_metrics"].keys())) == ["sums", "weights"] data["extra_stats"] = { - "sums": { key: value for (key,value) in other_state_dict["running_metrics"]["sums"].items() if "sopt" not in key and "lopt" not in key }, - "weights": { key: value for (key,value) in other_state_dict["running_metrics"]["weights"].items() if "sopt" not in key and "lopt" not in key }, + "sums": { key: value for (key,value) in other_state_dict["running_metrics"]["sums"].items() if "sopt" not in key and "lopt" not in key }, + "weights": { key: value for (key,value) in other_state_dict["running_metrics"]["weights"].items() if "sopt" not in key and "lopt" not in key }, } if "last_val_metrics" in other_state_dict and "sums" in other_state_dict["last_val_metrics"] and "weights" in other_state_dict["last_val_metrics"]: data["extra_stats"]["last_val_metrics"] = { - "sums": { key: value for (key,value) in other_state_dict["last_val_metrics"]["sums"].items() if "sopt" not in key and "lopt" not in key }, - "weights": { key: value for (key,value) in other_state_dict["last_val_metrics"]["weights"].items() if "sopt" not in key and "lopt" not in key }, + "sums": { key: value for (key,value) in other_state_dict["last_val_metrics"]["sums"].items() if "sopt" not in key and "lopt" not in key }, + "weights": { key: value for (key,value) in other_state_dict["last_val_metrics"]["weights"].items() if "sopt" not in key and "lopt" not in key }, } json.dump(data,f) diff --git a/python/features.py b/python/features.py index 397308f58..c7d223bd1 100644 --- a/python/features.py +++ b/python/features.py @@ -221,10 +221,10 @@ def addPrevPrevLadderFeature(loc,pos,workingMoves): if hasAreaFeature: board.calculateNonDameTouchingArea( - area, - keepTerritories, - keepStones, - rules["multiStoneSuicideLegal"] + area, + keepTerritories, + keepStones, + rules["multiStoneSuicideLegal"] ) for y in range(bsize): diff --git a/python/forward_model.py b/python/forward_model.py index 2ee3fa46d..97a46df12 100644 --- a/python/forward_model.py +++ b/python/forward_model.py @@ -53,11 +53,11 @@ def main(args): logging.root.handlers = [] logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.StreamHandler(stream=sys.stdout) - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.StreamHandler(stream=sys.stdout) + ], ) np.set_printoptions(linewidth=150) diff --git a/python/genboard_train.py b/python/genboard_train.py index b0f5738bf..2799d2f3b 100755 --- a/python/genboard_train.py +++ b/python/genboard_train.py @@ -146,8 +146,8 @@ def __iter__(self): num_always_known_poses = 0 else: num_always_known_poses = ( - ( min(alwaysknownxmax, metadata.size-1) - max(alwaysknownxmin, 0) + 1) * - ( min(alwaysknownymax, metadata.size-1) - max(alwaysknownymin, 0) + 1) + ( min(alwaysknownxmax, metadata.size-1) - max(alwaysknownxmin, 0) + 1) * + ( min(alwaysknownymax, metadata.size-1) - max(alwaysknownymin, 0) + 1) ) num_not_always_known_poses = metadata.size * metadata.size - num_always_known_poses inferenceidx = rand.randint(0,num_not_always_known_poses-1) @@ -452,12 +452,12 @@ def lossfunc(inputs, results, preds, aux, auxpreds): running_ewms_exgnorm += max(0.0, gnorm - grad_clip_max) if running_batch_count >= print_every_batches: trainlog("TRAIN samples: %d, batches: %d, main loss: %.5f, aux loss: %.5f, gnorm: %.2f, ewms_exgnorm: %.3g" % ( - traindata["samples_so_far"], - traindata["batches_so_far"], - running_main_loss / (running_batch_count * batch_size), - running_aux_loss / (running_batch_count * batch_size), - running_gnorm / (running_batch_count), - running_ewms_exgnorm / (running_batch_count), + traindata["samples_so_far"], + traindata["batches_so_far"], + running_main_loss / (running_batch_count * batch_size), + running_aux_loss / (running_batch_count * batch_size), + running_gnorm / (running_batch_count), + running_ewms_exgnorm / (running_batch_count), )) running_batch_count = 0 running_main_loss = 0.0 diff --git a/python/migrate_double_v1.py b/python/migrate_double_v1.py index 02121a7cf..1f6cf5a79 100644 --- a/python/migrate_double_v1.py +++ b/python/migrate_double_v1.py @@ -64,7 +64,6 @@ def expand_out_dim_for(name, scale): if any("intermediate_value_head" in key for key in data["model"].keys()): - expand_out_dim_for("intermediate_value_head.conv1.weight", scale=1.0) expand_in_dim_for("intermediate_value_head.bias1.beta", scale=1.0) expand_in_dim_for("intermediate_value_head.linear2.weight", scale=math.sqrt(0.5)) diff --git a/python/modelconfigs.py b/python/modelconfigs.py index 418a352e4..1b8ac6ad2 100644 --- a/python/modelconfigs.py +++ b/python/modelconfigs.py @@ -57,1344 +57,1344 @@ def get_num_global_input_features(config: ModelConfig): assert(False) b1c6nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":6, - "mid_num_channels":4, - "gpool_num_channels":4, - "use_attention_pool":False, - "num_attention_pool_heads":2, - "block_kind": [ - ["rconv1","bottlenest2"], - ], - "p1_num_channels":4, - "g1_num_channels":4, - "v1_num_channels":4, - "sbv2_num_channels":4, - "num_scorebeliefs":2, - "v2_size":6, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":6, + "mid_num_channels":4, + "gpool_num_channels":4, + "use_attention_pool":False, + "num_attention_pool_heads":2, + "block_kind": [ + ["rconv1","bottlenest2"], + ], + "p1_num_channels":4, + "g1_num_channels":4, + "v1_num_channels":4, + "sbv2_num_channels":4, + "num_scorebeliefs":2, + "v2_size":6, } b2c16 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":16, - "mid_num_channels":16, - "gpool_num_channels":8, - "use_attention_pool":False, - "num_attention_pool_heads":2, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regulargpool"], - ], - "p1_num_channels":8, - "g1_num_channels":8, - "v1_num_channels":8, - "sbv2_num_channels":12, - "num_scorebeliefs":2, - "v2_size":12, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":16, + "mid_num_channels":16, + "gpool_num_channels":8, + "use_attention_pool":False, + "num_attention_pool_heads":2, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regulargpool"], + ], + "p1_num_channels":8, + "g1_num_channels":8, + "v1_num_channels":8, + "sbv2_num_channels":12, + "num_scorebeliefs":2, + "v2_size":12, } b4c32 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":32, - "mid_num_channels":32, - "gpool_num_channels":16, - "use_attention_pool":False, - "num_attention_pool_heads":2, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regulargpool"], - ["rconv4","regular"], - ], - "p1_num_channels":12, - "g1_num_channels":12, - "v1_num_channels":12, - "sbv2_num_channels":24, - "num_scorebeliefs":4, - "v2_size":24, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":32, + "mid_num_channels":32, + "gpool_num_channels":16, + "use_attention_pool":False, + "num_attention_pool_heads":2, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regulargpool"], + ["rconv4","regular"], + ], + "p1_num_channels":12, + "g1_num_channels":12, + "v1_num_channels":12, + "sbv2_num_channels":24, + "num_scorebeliefs":4, + "v2_size":24, } b6c96 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":96, - "mid_num_channels":96, - "gpool_num_channels":32, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regulargpool"], - ["rconv4","regular"], - ["rconv5","regulargpool"], - ["rconv6","regular"], - ], - "p1_num_channels":32, - "g1_num_channels":32, - "v1_num_channels":32, - "sbv2_num_channels":48, - "num_scorebeliefs":4, - "v2_size":64, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":96, + "mid_num_channels":96, + "gpool_num_channels":32, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regulargpool"], + ["rconv4","regular"], + ["rconv5","regulargpool"], + ["rconv6","regular"], + ], + "p1_num_channels":32, + "g1_num_channels":32, + "v1_num_channels":32, + "sbv2_num_channels":48, + "num_scorebeliefs":4, + "v2_size":64, } b10c128 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":128, - "mid_num_channels":128, - "gpool_num_channels":32, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regulargpool"], - ["rconv6","regular"], - ["rconv7","regular"], - ["rconv8","regulargpool"], - ["rconv9","regular"], - ["rconv10","regular"], - ], - "p1_num_channels":32, - "g1_num_channels":32, - "v1_num_channels":32, - "sbv2_num_channels":64, - "num_scorebeliefs":6, - "v2_size":80, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":128, + "mid_num_channels":128, + "gpool_num_channels":32, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regulargpool"], + ["rconv6","regular"], + ["rconv7","regular"], + ["rconv8","regulargpool"], + ["rconv9","regular"], + ["rconv10","regular"], + ], + "p1_num_channels":32, + "g1_num_channels":32, + "v1_num_channels":32, + "sbv2_num_channels":64, + "num_scorebeliefs":6, + "v2_size":80, } b5c192nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":192, - "mid_num_channels":96, - "gpool_num_channels":32, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2gpool"], - ["rconv3","bottlenest2"], - ["rconv4","bottlenest2gpool"], - ["rconv5","bottlenest2"], - ], - "p1_num_channels":32, - "g1_num_channels":32, - "v1_num_channels":32, - "sbv2_num_channels":64, - "num_scorebeliefs":6, - "v2_size":80, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":192, + "mid_num_channels":96, + "gpool_num_channels":32, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2gpool"], + ["rconv3","bottlenest2"], + ["rconv4","bottlenest2gpool"], + ["rconv5","bottlenest2"], + ], + "p1_num_channels":32, + "g1_num_channels":32, + "v1_num_channels":32, + "sbv2_num_channels":64, + "num_scorebeliefs":6, + "v2_size":80, } b15c192 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":192, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regular"], - ["rconv7","regulargpool"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regular"], - ["rconv12","regulargpool"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ], - "p1_num_channels":32, - "g1_num_channels":32, - "v1_num_channels":32, - "sbv2_num_channels":80, - "num_scorebeliefs":8, - "v2_size":96, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":192, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regular"], + ["rconv7","regulargpool"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regular"], + ["rconv12","regulargpool"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ], + "p1_num_channels":32, + "g1_num_channels":32, + "v1_num_channels":32, + "sbv2_num_channels":80, + "num_scorebeliefs":8, + "v2_size":96, } b20c256 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":256, - "mid_num_channels":256, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regular"], - ["rconv7","regulargpool"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regular"], - ["rconv12","regulargpool"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regular"], - ["rconv17","regulargpool"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":256, + "mid_num_channels":256, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regular"], + ["rconv7","regulargpool"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regular"], + ["rconv12","regulargpool"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regular"], + ["rconv17","regulargpool"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b30c256bt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":256, - "mid_num_channels":128, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv0","bottle"], - ["rconv1","bottle"], - ["rconv2","bottle"], - ["rconv3","bottle"], - ["rconv4","bottle"], - ["rconv5","bottle"], - ["rconv6","bottlegpool"], - ["rconv7","bottle"], - ["rconv8","bottle"], - ["rconv9","bottle"], - ["rconv10","bottle"], - ["rconv11","bottle"], - ["rconv12","bottlegpool"], - ["rconv13","bottle"], - ["rconv14","bottle"], - ["rconv15","bottle"], - ["rconv16","bottle"], - ["rconv17","bottle"], - ["rconv18","bottlegpool"], - ["rconv19","bottle"], - ["rconv20","bottle"], - ["rconv21","bottle"], - ["rconv22","bottle"], - ["rconv23","bottle"], - ["rconv24","bottlegpool"], - ["rconv25","bottle"], - ["rconv26","bottle"], - ["rconv27","bottle"], - ["rconv28","bottle"], - ["rconv29","bottle"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":256, + "mid_num_channels":128, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv0","bottle"], + ["rconv1","bottle"], + ["rconv2","bottle"], + ["rconv3","bottle"], + ["rconv4","bottle"], + ["rconv5","bottle"], + ["rconv6","bottlegpool"], + ["rconv7","bottle"], + ["rconv8","bottle"], + ["rconv9","bottle"], + ["rconv10","bottle"], + ["rconv11","bottle"], + ["rconv12","bottlegpool"], + ["rconv13","bottle"], + ["rconv14","bottle"], + ["rconv15","bottle"], + ["rconv16","bottle"], + ["rconv17","bottle"], + ["rconv18","bottlegpool"], + ["rconv19","bottle"], + ["rconv20","bottle"], + ["rconv21","bottle"], + ["rconv22","bottle"], + ["rconv23","bottle"], + ["rconv24","bottlegpool"], + ["rconv25","bottle"], + ["rconv26","bottle"], + ["rconv27","bottle"], + ["rconv28","bottle"], + ["rconv29","bottle"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b24c320bt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":320, - "mid_num_channels":160, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle"], - ["rconv2","bottle"], - ["rconv3","bottle"], - ["rconv4","bottle"], - ["rconv5","bottle"], - ["rconv6","bottle"], - ["rconv7","bottlegpool"], - ["rconv8","bottle"], - ["rconv9","bottle"], - ["rconv10","bottle"], - ["rconv11","bottle"], - ["rconv12","bottle"], - ["rconv13","bottlegpool"], - ["rconv14","bottle"], - ["rconv15","bottle"], - ["rconv16","bottle"], - ["rconv17","bottle"], - ["rconv18","bottle"], - ["rconv19","bottlegpool"], - ["rconv20","bottle"], - ["rconv21","bottle"], - ["rconv22","bottle"], - ["rconv23","bottle"], - ["rconv24","bottle"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":320, + "mid_num_channels":160, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle"], + ["rconv2","bottle"], + ["rconv3","bottle"], + ["rconv4","bottle"], + ["rconv5","bottle"], + ["rconv6","bottle"], + ["rconv7","bottlegpool"], + ["rconv8","bottle"], + ["rconv9","bottle"], + ["rconv10","bottle"], + ["rconv11","bottle"], + ["rconv12","bottle"], + ["rconv13","bottlegpool"], + ["rconv14","bottle"], + ["rconv15","bottle"], + ["rconv16","bottle"], + ["rconv17","bottle"], + ["rconv18","bottle"], + ["rconv19","bottlegpool"], + ["rconv20","bottle"], + ["rconv21","bottle"], + ["rconv22","bottle"], + ["rconv23","bottle"], + ["rconv24","bottle"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b20c384bt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle"], - ["rconv2","bottle"], - ["rconv3","bottle"], - ["rconv4","bottle"], - ["rconv5","bottle"], - ["rconv6","bottlegpool"], - ["rconv7","bottle"], - ["rconv8","bottle"], - ["rconv9","bottle"], - ["rconv10","bottle"], - ["rconv11","bottlegpool"], - ["rconv12","bottle"], - ["rconv13","bottle"], - ["rconv14","bottle"], - ["rconv15","bottle"], - ["rconv16","bottlegpool"], - ["rconv17","bottle"], - ["rconv18","bottle"], - ["rconv19","bottle"], - ["rconv20","bottle"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle"], + ["rconv2","bottle"], + ["rconv3","bottle"], + ["rconv4","bottle"], + ["rconv5","bottle"], + ["rconv6","bottlegpool"], + ["rconv7","bottle"], + ["rconv8","bottle"], + ["rconv9","bottle"], + ["rconv10","bottle"], + ["rconv11","bottlegpool"], + ["rconv12","bottle"], + ["rconv13","bottle"], + ["rconv14","bottle"], + ["rconv15","bottle"], + ["rconv16","bottlegpool"], + ["rconv17","bottle"], + ["rconv18","bottle"], + ["rconv19","bottle"], + ["rconv20","bottle"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b10c512lbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":512, - "mid_num_channels":256, - "gpool_num_channels":128, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle2"], - ["rconv2","bottle2"], - ["rconv3","bottle2"], - ["rconv4","bottle2gpool"], - ["rconv5","bottle2"], - ["rconv6","bottle2"], - ["rconv7","bottle2"], - ["rconv8","bottle2gpool"], - ["rconv9","bottle2"], - ["rconv10","bottle2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":512, + "mid_num_channels":256, + "gpool_num_channels":128, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle2"], + ["rconv2","bottle2"], + ["rconv3","bottle2"], + ["rconv4","bottle2gpool"], + ["rconv5","bottle2"], + ["rconv6","bottle2"], + ["rconv7","bottle2"], + ["rconv8","bottle2gpool"], + ["rconv9","bottle2"], + ["rconv10","bottle2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b15c384lbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle2"], - ["rconv2","bottle2"], - ["rconv3","bottle2"], - ["rconv4","bottle2gpool"], - ["rconv5","bottle2"], - ["rconv6","bottle2"], - ["rconv7","bottle2"], - ["rconv8","bottle2gpool"], - ["rconv9","bottle2"], - ["rconv10","bottle2"], - ["rconv11","bottle2"], - ["rconv12","bottle2gpool"], - ["rconv13","bottle2"], - ["rconv14","bottle2"], - ["rconv15","bottle2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle2"], + ["rconv2","bottle2"], + ["rconv3","bottle2"], + ["rconv4","bottle2gpool"], + ["rconv5","bottle2"], + ["rconv6","bottle2"], + ["rconv7","bottle2"], + ["rconv8","bottle2gpool"], + ["rconv9","bottle2"], + ["rconv10","bottle2"], + ["rconv11","bottle2"], + ["rconv12","bottle2gpool"], + ["rconv13","bottle2"], + ["rconv14","bottle2"], + ["rconv15","bottle2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b18c320lbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":320, - "mid_num_channels":160, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle2"], - ["rconv2","bottle2"], - ["rconv3","bottle2"], - ["rconv4","bottle2"], - ["rconv5","bottle2gpool"], - ["rconv6","bottle2"], - ["rconv7","bottle2"], - ["rconv8","bottle2"], - ["rconv9","bottle2"], - ["rconv10","bottle2gpool"], - ["rconv11","bottle2"], - ["rconv12","bottle2"], - ["rconv13","bottle2"], - ["rconv14","bottle2"], - ["rconv15","bottle2gpool"], - ["rconv16","bottle2"], - ["rconv17","bottle2"], - ["rconv18","bottle2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":320, + "mid_num_channels":160, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle2"], + ["rconv2","bottle2"], + ["rconv3","bottle2"], + ["rconv4","bottle2"], + ["rconv5","bottle2gpool"], + ["rconv6","bottle2"], + ["rconv7","bottle2"], + ["rconv8","bottle2"], + ["rconv9","bottle2"], + ["rconv10","bottle2gpool"], + ["rconv11","bottle2"], + ["rconv12","bottle2"], + ["rconv13","bottle2"], + ["rconv14","bottle2"], + ["rconv15","bottle2gpool"], + ["rconv16","bottle2"], + ["rconv17","bottle2"], + ["rconv18","bottle2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b23c256lbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":256, - "mid_num_channels":128, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle2"], - ["rconv2","bottle2"], - ["rconv3","bottle2"], - ["rconv4","bottle2"], - ["rconv5","bottle2"], - ["rconv6","bottle2gpool"], - ["rconv7","bottle2"], - ["rconv8","bottle2"], - ["rconv9","bottle2"], - ["rconv10","bottle2"], - ["rconv11","bottle2"], - ["rconv12","bottle2gpool"], - ["rconv13","bottle2"], - ["rconv14","bottle2"], - ["rconv15","bottle2"], - ["rconv16","bottle2"], - ["rconv17","bottle2"], - ["rconv18","bottle2gpool"], - ["rconv19","bottle2"], - ["rconv20","bottle2"], - ["rconv21","bottle2"], - ["rconv22","bottle2"], - ["rconv23","bottle2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":256, + "mid_num_channels":128, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle2"], + ["rconv2","bottle2"], + ["rconv3","bottle2"], + ["rconv4","bottle2"], + ["rconv5","bottle2"], + ["rconv6","bottle2gpool"], + ["rconv7","bottle2"], + ["rconv8","bottle2"], + ["rconv9","bottle2"], + ["rconv10","bottle2"], + ["rconv11","bottle2"], + ["rconv12","bottle2gpool"], + ["rconv13","bottle2"], + ["rconv14","bottle2"], + ["rconv15","bottle2"], + ["rconv16","bottle2"], + ["rconv17","bottle2"], + ["rconv18","bottle2gpool"], + ["rconv19","bottle2"], + ["rconv20","bottle2"], + ["rconv21","bottle2"], + ["rconv22","bottle2"], + ["rconv23","bottle2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b12c384llbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle3"], - ["rconv2","bottle3"], - ["rconv3","bottle3"], - ["rconv4","bottle3gpool"], - ["rconv5","bottle3"], - ["rconv6","bottle3"], - ["rconv7","bottle3gpool"], - ["rconv8","bottle3"], - ["rconv9","bottle3"], - ["rconv10","bottle3gpool"], - ["rconv11","bottle3"], - ["rconv12","bottle3"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle3"], + ["rconv2","bottle3"], + ["rconv3","bottle3"], + ["rconv4","bottle3gpool"], + ["rconv5","bottle3"], + ["rconv6","bottle3"], + ["rconv7","bottle3gpool"], + ["rconv8","bottle3"], + ["rconv9","bottle3"], + ["rconv10","bottle3gpool"], + ["rconv11","bottle3"], + ["rconv12","bottle3"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b10c384nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b10c480nb3t = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":480, - "mid_num_channels":160, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":480, + "mid_num_channels":160, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b7c384lnbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest3"], - ["rconv3","bottlenest3gpool"], - ["rconv5","bottlenest3"], - ["rconv6","bottlenest3gpool"], - ["rconv8","bottlenest3"], - ["rconv9","bottlenest3gpool"], - ["rconv10","bottlenest3"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest3"], + ["rconv3","bottlenest3gpool"], + ["rconv5","bottlenest3"], + ["rconv6","bottlenest3gpool"], + ["rconv8","bottlenest3"], + ["rconv9","bottlenest3gpool"], + ["rconv10","bottlenest3"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b5c512nnbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": True, - "trunk_num_channels":512, - "outermid_num_channels":256, - "mid_num_channels":128, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2bottlenest2"], - ["rconv2","bottlenest2bottlenest2gpool"], - ["rconv3","bottlenest2bottlenest2"], - ["rconv4","bottlenest2bottlenest2gpool"], - ["rconv5","bottlenest2bottlenest2gpool"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": True, + "trunk_num_channels":512, + "outermid_num_channels":256, + "mid_num_channels":128, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2bottlenest2"], + ["rconv2","bottlenest2bottlenest2gpool"], + ["rconv3","bottlenest2bottlenest2"], + ["rconv4","bottlenest2bottlenest2gpool"], + ["rconv5","bottlenest2bottlenest2gpool"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b20c384lbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottle2"], - ["rconv2","bottle2"], - ["rconv3","bottle2"], - ["rconv4","bottle2"], - ["rconv5","bottle2"], - ["rconv6","bottle2gpool"], - ["rconv7","bottle2"], - ["rconv8","bottle2"], - ["rconv9","bottle2"], - ["rconv10","bottle2"], - ["rconv11","bottle2gpool"], - ["rconv12","bottle2"], - ["rconv13","bottle2"], - ["rconv14","bottle2"], - ["rconv15","bottle2"], - ["rconv16","bottle2gpool"], - ["rconv17","bottle2"], - ["rconv18","bottle2"], - ["rconv19","bottle2"], - ["rconv20","bottle2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":48, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottle2"], + ["rconv2","bottle2"], + ["rconv3","bottle2"], + ["rconv4","bottle2"], + ["rconv5","bottle2"], + ["rconv6","bottle2gpool"], + ["rconv7","bottle2"], + ["rconv8","bottle2"], + ["rconv9","bottle2"], + ["rconv10","bottle2"], + ["rconv11","bottle2gpool"], + ["rconv12","bottle2"], + ["rconv13","bottle2"], + ["rconv14","bottle2"], + ["rconv15","bottle2"], + ["rconv16","bottle2gpool"], + ["rconv17","bottle2"], + ["rconv18","bottle2"], + ["rconv19","bottle2"], + ["rconv20","bottle2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":48, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } b30c320 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":320, - "mid_num_channels":320, - "gpool_num_channels":96, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regulargpool"], - ["rconv7","regular"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regulargpool"], - ["rconv12","regular"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regulargpool"], - ["rconv17","regular"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ["rconv21","regulargpool"], - ["rconv22","regular"], - ["rconv23","regular"], - ["rconv24","regular"], - ["rconv25","regular"], - ["rconv26","regulargpool"], - ["rconv27","regular"], - ["rconv28","regular"], - ["rconv29","regular"], - ["rconv30","regular"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":96, - "sbv2_num_channels":112, - "num_scorebeliefs":8, - "v2_size":128, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":320, + "mid_num_channels":320, + "gpool_num_channels":96, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regulargpool"], + ["rconv7","regular"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regulargpool"], + ["rconv12","regular"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regulargpool"], + ["rconv17","regular"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ["rconv21","regulargpool"], + ["rconv22","regular"], + ["rconv23","regular"], + ["rconv24","regular"], + ["rconv25","regular"], + ["rconv26","regulargpool"], + ["rconv27","regular"], + ["rconv28","regular"], + ["rconv29","regular"], + ["rconv30","regular"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":96, + "sbv2_num_channels":112, + "num_scorebeliefs":8, + "v2_size":128, } b40c256 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":256, - "mid_num_channels":256, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regulargpool"], - ["rconv7","regular"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regulargpool"], - ["rconv12","regular"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regulargpool"], - ["rconv17","regular"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ["rconv21","regulargpool"], - ["rconv22","regular"], - ["rconv23","regular"], - ["rconv24","regular"], - ["rconv25","regular"], - ["rconv26","regulargpool"], - ["rconv27","regular"], - ["rconv28","regular"], - ["rconv29","regular"], - ["rconv30","regular"], - ["rconv31","regulargpool"], - ["rconv32","regular"], - ["rconv33","regular"], - ["rconv34","regular"], - ["rconv35","regular"], - ["rconv36","regulargpool"], - ["rconv37","regular"], - ["rconv38","regular"], - ["rconv39","regular"], - ["rconv40","regular"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":96, - "sbv2_num_channels":112, - "num_scorebeliefs":8, - "v2_size":128, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":256, + "mid_num_channels":256, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regulargpool"], + ["rconv7","regular"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regulargpool"], + ["rconv12","regular"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regulargpool"], + ["rconv17","regular"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ["rconv21","regulargpool"], + ["rconv22","regular"], + ["rconv23","regular"], + ["rconv24","regular"], + ["rconv25","regular"], + ["rconv26","regulargpool"], + ["rconv27","regular"], + ["rconv28","regular"], + ["rconv29","regular"], + ["rconv30","regular"], + ["rconv31","regulargpool"], + ["rconv32","regular"], + ["rconv33","regular"], + ["rconv34","regular"], + ["rconv35","regular"], + ["rconv36","regulargpool"], + ["rconv37","regular"], + ["rconv38","regular"], + ["rconv39","regular"], + ["rconv40","regular"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":96, + "sbv2_num_channels":112, + "num_scorebeliefs":8, + "v2_size":128, } b18c384nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ["rconv15","bottlenest2gpool"], - ["rconv16","bottlenest2"], - ["rconv17","bottlenest2"], - ["rconv18","bottlenest2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":96, - "sbv2_num_channels":112, - "num_scorebeliefs":8, - "v2_size":128, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ["rconv15","bottlenest2gpool"], + ["rconv16","bottlenest2"], + ["rconv17","bottlenest2"], + ["rconv18","bottlenest2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":96, + "sbv2_num_channels":112, + "num_scorebeliefs":8, + "v2_size":128, } b14c448nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":448, - "mid_num_channels":224, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":96, - "sbv2_num_channels":112, - "num_scorebeliefs":8, - "v2_size":128, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":448, + "mid_num_channels":224, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":96, + "sbv2_num_channels":112, + "num_scorebeliefs":8, + "v2_size":128, } b40c384 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":384, - "gpool_num_channels":128, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regulargpool"], - ["rconv7","regular"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regulargpool"], - ["rconv12","regular"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regulargpool"], - ["rconv17","regular"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ["rconv21","regulargpool"], - ["rconv22","regular"], - ["rconv23","regular"], - ["rconv24","regular"], - ["rconv25","regular"], - ["rconv26","regulargpool"], - ["rconv27","regular"], - ["rconv28","regular"], - ["rconv29","regular"], - ["rconv30","regular"], - ["rconv31","regulargpool"], - ["rconv32","regular"], - ["rconv33","regular"], - ["rconv34","regular"], - ["rconv35","regular"], - ["rconv36","regulargpool"], - ["rconv37","regular"], - ["rconv38","regular"], - ["rconv39","regular"], - ["rconv40","regular"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":384, + "gpool_num_channels":128, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regulargpool"], + ["rconv7","regular"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regulargpool"], + ["rconv12","regular"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regulargpool"], + ["rconv17","regular"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ["rconv21","regulargpool"], + ["rconv22","regular"], + ["rconv23","regular"], + ["rconv24","regular"], + ["rconv25","regular"], + ["rconv26","regulargpool"], + ["rconv27","regular"], + ["rconv28","regular"], + ["rconv29","regular"], + ["rconv30","regular"], + ["rconv31","regulargpool"], + ["rconv32","regular"], + ["rconv33","regular"], + ["rconv34","regular"], + ["rconv35","regular"], + ["rconv36","regulargpool"], + ["rconv37","regular"], + ["rconv38","regular"], + ["rconv39","regular"], + ["rconv40","regular"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } b60c320 = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":320, - "mid_num_channels":320, - "gpool_num_channels":96, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regulargpool"], - ["rconv7","regular"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regulargpool"], - ["rconv12","regular"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regulargpool"], - ["rconv17","regular"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ["rconv21","regulargpool"], - ["rconv22","regular"], - ["rconv23","regular"], - ["rconv24","regular"], - ["rconv25","regular"], - ["rconv26","regulargpool"], - ["rconv27","regular"], - ["rconv28","regular"], - ["rconv29","regular"], - ["rconv30","regular"], - ["rconv31","regulargpool"], - ["rconv32","regular"], - ["rconv33","regular"], - ["rconv34","regular"], - ["rconv35","regular"], - ["rconv36","regulargpool"], - ["rconv37","regular"], - ["rconv38","regular"], - ["rconv39","regular"], - ["rconv40","regular"], - ["rconv41","regulargpool"], - ["rconv42","regular"], - ["rconv43","regular"], - ["rconv44","regular"], - ["rconv45","regular"], - ["rconv46","regulargpool"], - ["rconv47","regular"], - ["rconv48","regular"], - ["rconv49","regular"], - ["rconv50","regular"], - ["rconv51","regulargpool"], - ["rconv52","regular"], - ["rconv53","regular"], - ["rconv54","regular"], - ["rconv55","regular"], - ["rconv56","regulargpool"], - ["rconv57","regular"], - ["rconv58","regular"], - ["rconv59","regular"], - ["rconv60","regular"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":320, + "mid_num_channels":320, + "gpool_num_channels":96, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regulargpool"], + ["rconv7","regular"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regulargpool"], + ["rconv12","regular"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regulargpool"], + ["rconv17","regular"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ["rconv21","regulargpool"], + ["rconv22","regular"], + ["rconv23","regular"], + ["rconv24","regular"], + ["rconv25","regular"], + ["rconv26","regulargpool"], + ["rconv27","regular"], + ["rconv28","regular"], + ["rconv29","regular"], + ["rconv30","regular"], + ["rconv31","regulargpool"], + ["rconv32","regular"], + ["rconv33","regular"], + ["rconv34","regular"], + ["rconv35","regular"], + ["rconv36","regulargpool"], + ["rconv37","regular"], + ["rconv38","regular"], + ["rconv39","regular"], + ["rconv40","regular"], + ["rconv41","regulargpool"], + ["rconv42","regular"], + ["rconv43","regular"], + ["rconv44","regular"], + ["rconv45","regular"], + ["rconv46","regulargpool"], + ["rconv47","regular"], + ["rconv48","regular"], + ["rconv49","regular"], + ["rconv50","regular"], + ["rconv51","regulargpool"], + ["rconv52","regular"], + ["rconv53","regular"], + ["rconv54","regular"], + ["rconv55","regular"], + ["rconv56","regulargpool"], + ["rconv57","regular"], + ["rconv58","regular"], + ["rconv59","regular"], + ["rconv60","regular"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } b41c384nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":384, - "mid_num_channels":192, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ["rconv15","bottlenest2gpool"], - ["rconv16","bottlenest2"], - ["rconv17","bottlenest2"], - ["rconv18","bottlenest2gpool"], - ["rconv19","bottlenest2"], - ["rconv20","bottlenest2"], - ["rconv21","bottlenest2gpool"], - ["rconv22","bottlenest2"], - ["rconv23","bottlenest2"], - ["rconv24","bottlenest2gpool"], - ["rconv25","bottlenest2"], - ["rconv26","bottlenest2"], - ["rconv27","bottlenest2gpool"], - ["rconv28","bottlenest2"], - ["rconv29","bottlenest2"], - ["rconv30","bottlenest2gpool"], - ["rconv31","bottlenest2"], - ["rconv32","bottlenest2"], - ["rconv33","bottlenest2gpool"], - ["rconv34","bottlenest2"], - ["rconv35","bottlenest2"], - ["rconv36","bottlenest2gpool"], - ["rconv37","bottlenest2"], - ["rconv38","bottlenest2"], - ["rconv39","bottlenest2gpool"], - ["rconv40","bottlenest2"], - ["rconv41","bottlenest2"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":384, + "mid_num_channels":192, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ["rconv15","bottlenest2gpool"], + ["rconv16","bottlenest2"], + ["rconv17","bottlenest2"], + ["rconv18","bottlenest2gpool"], + ["rconv19","bottlenest2"], + ["rconv20","bottlenest2"], + ["rconv21","bottlenest2gpool"], + ["rconv22","bottlenest2"], + ["rconv23","bottlenest2"], + ["rconv24","bottlenest2gpool"], + ["rconv25","bottlenest2"], + ["rconv26","bottlenest2"], + ["rconv27","bottlenest2gpool"], + ["rconv28","bottlenest2"], + ["rconv29","bottlenest2"], + ["rconv30","bottlenest2gpool"], + ["rconv31","bottlenest2"], + ["rconv32","bottlenest2"], + ["rconv33","bottlenest2gpool"], + ["rconv34","bottlenest2"], + ["rconv35","bottlenest2"], + ["rconv36","bottlenest2gpool"], + ["rconv37","bottlenest2"], + ["rconv38","bottlenest2"], + ["rconv39","bottlenest2gpool"], + ["rconv40","bottlenest2"], + ["rconv41","bottlenest2"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } b32c448nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":448, - "mid_num_channels":224, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ["rconv15","bottlenest2gpool"], - ["rconv16","bottlenest2"], - ["rconv17","bottlenest2"], - ["rconv18","bottlenest2gpool"], - ["rconv19","bottlenest2"], - ["rconv20","bottlenest2"], - ["rconv21","bottlenest2gpool"], - ["rconv22","bottlenest2"], - ["rconv23","bottlenest2"], - ["rconv24","bottlenest2gpool"], - ["rconv25","bottlenest2"], - ["rconv26","bottlenest2"], - ["rconv27","bottlenest2gpool"], - ["rconv28","bottlenest2"], - ["rconv29","bottlenest2"], - ["rconv30","bottlenest2gpool"], - ["rconv31","bottlenest2"], - ["rconv32","bottlenest2"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":448, + "mid_num_channels":224, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ["rconv15","bottlenest2gpool"], + ["rconv16","bottlenest2"], + ["rconv17","bottlenest2"], + ["rconv18","bottlenest2gpool"], + ["rconv19","bottlenest2"], + ["rconv20","bottlenest2"], + ["rconv21","bottlenest2gpool"], + ["rconv22","bottlenest2"], + ["rconv23","bottlenest2"], + ["rconv24","bottlenest2gpool"], + ["rconv25","bottlenest2"], + ["rconv26","bottlenest2"], + ["rconv27","bottlenest2gpool"], + ["rconv28","bottlenest2"], + ["rconv29","bottlenest2"], + ["rconv30","bottlenest2gpool"], + ["rconv31","bottlenest2"], + ["rconv32","bottlenest2"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } b28c512nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":512, - "mid_num_channels":256, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ["rconv15","bottlenest2gpool"], - ["rconv16","bottlenest2"], - ["rconv17","bottlenest2"], - ["rconv18","bottlenest2gpool"], - ["rconv19","bottlenest2"], - ["rconv20","bottlenest2"], - ["rconv21","bottlenest2gpool"], - ["rconv22","bottlenest2"], - ["rconv23","bottlenest2"], - ["rconv24","bottlenest2gpool"], - ["rconv25","bottlenest2"], - ["rconv26","bottlenest2"], - ["rconv27","bottlenest2gpool"], - ["rconv28","bottlenest2"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":512, + "mid_num_channels":256, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ["rconv15","bottlenest2gpool"], + ["rconv16","bottlenest2"], + ["rconv17","bottlenest2"], + ["rconv18","bottlenest2gpool"], + ["rconv19","bottlenest2"], + ["rconv20","bottlenest2"], + ["rconv21","bottlenest2gpool"], + ["rconv22","bottlenest2"], + ["rconv23","bottlenest2"], + ["rconv24","bottlenest2gpool"], + ["rconv25","bottlenest2"], + ["rconv26","bottlenest2"], + ["rconv27","bottlenest2gpool"], + ["rconv28","bottlenest2"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } b20c640nbt = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":640, - "mid_num_channels":320, - "gpool_num_channels":96, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","bottlenest2"], - ["rconv2","bottlenest2"], - ["rconv3","bottlenest2gpool"], - ["rconv4","bottlenest2"], - ["rconv5","bottlenest2"], - ["rconv6","bottlenest2gpool"], - ["rconv7","bottlenest2"], - ["rconv8","bottlenest2"], - ["rconv9","bottlenest2gpool"], - ["rconv10","bottlenest2"], - ["rconv11","bottlenest2"], - ["rconv12","bottlenest2gpool"], - ["rconv13","bottlenest2"], - ["rconv14","bottlenest2"], - ["rconv15","bottlenest2gpool"], - ["rconv16","bottlenest2"], - ["rconv17","bottlenest2"], - ["rconv18","bottlenest2gpool"], - ["rconv19","bottlenest2"], - ["rconv20","bottlenest2"], - ], - "p1_num_channels":64, - "g1_num_channels":64, - "v1_num_channels":96, - "sbv2_num_channels":128, - "num_scorebeliefs":8, - "v2_size":144, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":640, + "mid_num_channels":320, + "gpool_num_channels":96, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","bottlenest2"], + ["rconv2","bottlenest2"], + ["rconv3","bottlenest2gpool"], + ["rconv4","bottlenest2"], + ["rconv5","bottlenest2"], + ["rconv6","bottlenest2gpool"], + ["rconv7","bottlenest2"], + ["rconv8","bottlenest2"], + ["rconv9","bottlenest2gpool"], + ["rconv10","bottlenest2"], + ["rconv11","bottlenest2"], + ["rconv12","bottlenest2gpool"], + ["rconv13","bottlenest2"], + ["rconv14","bottlenest2"], + ["rconv15","bottlenest2gpool"], + ["rconv16","bottlenest2"], + ["rconv17","bottlenest2"], + ["rconv18","bottlenest2gpool"], + ["rconv19","bottlenest2"], + ["rconv20","bottlenest2"], + ], + "p1_num_channels":64, + "g1_num_channels":64, + "v1_num_channels":96, + "sbv2_num_channels":128, + "num_scorebeliefs":8, + "v2_size":144, } sandbox = { - "version":14, - "norm_kind":"fixup", - "bnorm_epsilon": 1e-4, - "bnorm_running_avg_momentum": 0.001, - "initial_conv_1x1": False, - "trunk_num_channels":256, - "mid_num_channels":256, - "gpool_num_channels":64, - "use_attention_pool":False, - "num_attention_pool_heads":4, - "block_kind": [ - ["rconv1","regular"], - ["rconv2","regular"], - ["rconv3","regular"], - ["rconv4","regular"], - ["rconv5","regular"], - ["rconv6","regular"], - ["rconv7","regulargpool"], - ["rconv8","regular"], - ["rconv9","regular"], - ["rconv10","regular"], - ["rconv11","regular"], - ["rconv12","regulargpool"], - ["rconv13","regular"], - ["rconv14","regular"], - ["rconv15","regular"], - ["rconv16","regular"], - ["rconv17","regulargpool"], - ["rconv18","regular"], - ["rconv19","regular"], - ["rconv20","regular"], - ], - "p1_num_channels":48, - "g1_num_channels":48, - "v1_num_channels":96, - "sbv2_num_channels":96, - "num_scorebeliefs":8, - "v2_size":112, + "version":14, + "norm_kind":"fixup", + "bnorm_epsilon": 1e-4, + "bnorm_running_avg_momentum": 0.001, + "initial_conv_1x1": False, + "trunk_num_channels":256, + "mid_num_channels":256, + "gpool_num_channels":64, + "use_attention_pool":False, + "num_attention_pool_heads":4, + "block_kind": [ + ["rconv1","regular"], + ["rconv2","regular"], + ["rconv3","regular"], + ["rconv4","regular"], + ["rconv5","regular"], + ["rconv6","regular"], + ["rconv7","regulargpool"], + ["rconv8","regular"], + ["rconv9","regular"], + ["rconv10","regular"], + ["rconv11","regular"], + ["rconv12","regulargpool"], + ["rconv13","regular"], + ["rconv14","regular"], + ["rconv15","regular"], + ["rconv16","regular"], + ["rconv17","regulargpool"], + ["rconv18","regular"], + ["rconv19","regular"], + ["rconv20","regular"], + ], + "p1_num_channels":48, + "g1_num_channels":48, + "v1_num_channels":96, + "sbv2_num_channels":96, + "num_scorebeliefs":8, + "v2_size":112, } base_config_of_name = { - # Micro-sized model configs - "b1c6nbt": b1c6nbt, - "b2c16": b2c16, - "b4c32": b4c32, - "b6c96": b6c96, - - # Small model configs, not too different in inference cost from b10c128 - "b10c128": b10c128, - "b5c192nbt": b5c192nbt, - - # Medium model configs, not too different in inference cost from b15c192 - "b15c192": b15c192, - - # Roughly AlphaZero-sized, not too different in inference cost from b20c256 - "b20c256": b20c256, - "b30c256bt": b30c256bt, - "b24c320bt": b24c320bt, - "b20c384bt": b20c384bt, - "b23c256lbt": b23c256lbt, - "b18c320lbt": b18c320lbt, - "b15c384lbt": b15c384lbt, - "b10c512lbt": b10c512lbt, - "b12c384llbt": b12c384llbt, - "b10c384nbt": b10c384nbt, # Recommended best config for this cost - "b10c480nb3t": b10c480nb3t, - "b7c384lnbt": b7c384lnbt, - "b5c512nnbt": b5c512nnbt, - "b20c384lbt": b20c384lbt, - - # Roughly AlphaGoZero-sized, not too different in inference cost from b40c256 - "b30c320": b30c320, - "b40c256": b40c256, - "b18c384nbt": b18c384nbt, # Recommended best config for this cost - "b14c448nbt": b14c448nbt, - - # Large model configs, not too different in inference cost from b60c320 - "b40c384": b40c384, - "b60c320": b60c320, - "b41c384nbt": b41c384nbt, - "b32c448nbt": b32c448nbt, - "b28c512nbt": b28c512nbt, # Recommended best config for this cost - "b20c640nbt": b20c640nbt, - - "sandbox": sandbox, + # Micro-sized model configs + "b1c6nbt": b1c6nbt, + "b2c16": b2c16, + "b4c32": b4c32, + "b6c96": b6c96, + + # Small model configs, not too different in inference cost from b10c128 + "b10c128": b10c128, + "b5c192nbt": b5c192nbt, + + # Medium model configs, not too different in inference cost from b15c192 + "b15c192": b15c192, + + # Roughly AlphaZero-sized, not too different in inference cost from b20c256 + "b20c256": b20c256, + "b30c256bt": b30c256bt, + "b24c320bt": b24c320bt, + "b20c384bt": b20c384bt, + "b23c256lbt": b23c256lbt, + "b18c320lbt": b18c320lbt, + "b15c384lbt": b15c384lbt, + "b10c512lbt": b10c512lbt, + "b12c384llbt": b12c384llbt, + "b10c384nbt": b10c384nbt, # Recommended best config for this cost + "b10c480nb3t": b10c480nb3t, + "b7c384lnbt": b7c384lnbt, + "b5c512nnbt": b5c512nnbt, + "b20c384lbt": b20c384lbt, + + # Roughly AlphaGoZero-sized, not too different in inference cost from b40c256 + "b30c320": b30c320, + "b40c256": b40c256, + "b18c384nbt": b18c384nbt, # Recommended best config for this cost + "b14c448nbt": b14c448nbt, + + # Large model configs, not too different in inference cost from b60c320 + "b40c384": b40c384, + "b60c320": b60c320, + "b41c384nbt": b41c384nbt, + "b32c448nbt": b32c448nbt, + "b28c512nbt": b28c512nbt, # Recommended best config for this cost + "b20c640nbt": b20c640nbt, + + "sandbox": sandbox, } config_of_name = {} diff --git a/python/play.py b/python/play.py index 26512c6d9..8feb04d33 100644 --- a/python/play.py +++ b/python/play.py @@ -44,11 +44,11 @@ logging.root.handlers = [] logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.StreamHandler(stream=sys.stderr), - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.StreamHandler(stream=sys.stderr), + ], ) np.set_printoptions(linewidth=150) torch.set_printoptions(precision=7,sci_mode=False,linewidth=100000,edgeitems=1000,threshold=1000000) @@ -94,26 +94,26 @@ def get_outputs(gs, rules): # model_outputs = model(apply_symmetry(batch["binaryInputNCHW"],symmetry),batch["globalInputNC"]) model_outputs = model( - torch.tensor(bin_input_data, dtype=torch.float32), - torch.tensor(global_input_data, dtype=torch.float32), + torch.tensor(bin_input_data, dtype=torch.float32), + torch.tensor(global_input_data, dtype=torch.float32), ) outputs = model.postprocess_output(model_outputs) ( - policy_logits, # N, num_policy_outputs, move - value_logits, # N, {win,loss,noresult} - td_value_logits, # N, {long, mid, short} {win,loss,noresult} - pred_td_score, # N, {long, mid, short} - ownership_pretanh, # N, 1, y, x - pred_scoring, # N, 1, y, x - futurepos_pretanh, # N, 2, y, x - seki_logits, # N, 4, y, x - pred_scoremean, # N - pred_scorestdev, # N - pred_lead, # N - pred_variance_time, # N - pred_shortterm_value_error, # N - pred_shortterm_score_error, # N - scorebelief_logits, # N, 2 * (self.pos_len*self.pos_len + EXTRA_SCORE_DISTR_RADIUS) + policy_logits, # N, num_policy_outputs, move + value_logits, # N, {win,loss,noresult} + td_value_logits, # N, {long, mid, short} {win,loss,noresult} + pred_td_score, # N, {long, mid, short} + ownership_pretanh, # N, 1, y, x + pred_scoring, # N, 1, y, x + futurepos_pretanh, # N, 2, y, x + seki_logits, # N, 4, y, x + pred_scoremean, # N + pred_scorestdev, # N + pred_lead, # N + pred_variance_time, # N + pred_shortterm_value_error, # N + pred_shortterm_score_error, # N + scorebelief_logits, # N, 2 * (self.pos_len*self.pos_len + EXTRA_SCORE_DISTR_RADIUS) ) = (x[0] for x in outputs[0]) # N = 0 policy0 = torch.nn.functional.softmax(policy_logits[0,:],dim=0).cpu().numpy() @@ -242,34 +242,34 @@ def get_outputs(gs, rules): i += 1 return { - "policy0": policy0, - "policy1": policy1, - "moves_and_probs0": moves_and_probs0, - "moves_and_probs1": moves_and_probs1, - "value": value, - "td_value": td_value, - "td_value2": td_value2, - "td_value3": td_value3, - "scoremean": scoremean, - "td_score": td_score, - "scorestdev": scorestdev, - "lead": lead, - "vtime": vtime, - "estv": estv, - "ests": ests, - "ownership": ownership, - "ownership_by_loc": ownership_by_loc, - "scoring": scoring, - "scoring_by_loc": scoring_by_loc, - "futurepos": futurepos, - "futurepos0_by_loc": futurepos0_by_loc, - "futurepos1_by_loc": futurepos1_by_loc, - "seki": seki, - "seki_by_loc": seki_by_loc, - "seki2": seki2, - "seki_by_loc2": seki_by_loc2, - "scorebelief": scorebelief, - "genmove_result": genmove_result + "policy0": policy0, + "policy1": policy1, + "moves_and_probs0": moves_and_probs0, + "moves_and_probs1": moves_and_probs1, + "value": value, + "td_value": td_value, + "td_value2": td_value2, + "td_value3": td_value3, + "scoremean": scoremean, + "td_score": td_score, + "scorestdev": scorestdev, + "lead": lead, + "vtime": vtime, + "estv": estv, + "ests": ests, + "ownership": ownership, + "ownership_by_loc": ownership_by_loc, + "scoring": scoring, + "scoring_by_loc": scoring_by_loc, + "futurepos": futurepos, + "futurepos0_by_loc": futurepos0_by_loc, + "futurepos1_by_loc": futurepos1_by_loc, + "seki": seki, + "seki_by_loc": seki_by_loc, + "seki2": seki2, + "seki_by_loc2": seki_by_loc2, + "scorebelief": scorebelief, + "genmove_result": genmove_result } def get_input_feature(gs, rules, feature_idx): @@ -341,17 +341,17 @@ def curve(p): value = loose_cap(value) interpoints = [ - (-1.00,(0,0,0)), - (-0.85,(15,0,50)), - (-0.60,(60,0,160)), - (-0.35,(0,0,255)), - (-0.15,(0,100,255)), - ( 0.00,(115,115,115)), - ( 0.15,(250,45,40)), - ( 0.25,(255,55,0)), - ( 0.60,(255,255,20)), - ( 0.85,(255,255,128)), - ( 1.00,(255,255,255)), + (-1.00,(0,0,0)), + (-0.85,(15,0,50)), + (-0.60,(60,0,160)), + (-0.35,(0,0,255)), + (-0.15,(0,100,255)), + ( 0.00,(115,115,115)), + ( 0.15,(250,45,40)), + ( 0.25,(255,55,0)), + ( 0.60,(255,255,20)), + ( 0.85,(255,255,128)), + ( 1.00,(255,255,255)), ] def lerp(p,y0,y1): @@ -450,11 +450,11 @@ def lerp(p,x0,x1,y0,y1): lead = value_and_score_from["lead"] vtime = value_and_score_from["vtime"] texts_value.append("wv %.2fc nr %.2f%% ws %.1f wl %.1f vt %.1f" % ( - 100*(value[0]-value[1] if board.pla == Board.WHITE else value[1] - value[0]), - 100*value[2], - (score if board.pla == Board.WHITE else -score), - (lead if board.pla == Board.WHITE else -lead), - vtime + 100*(value[0]-value[1] if board.pla == Board.WHITE else value[1] - value[0]), + 100*value[2], + (score if board.pla == Board.WHITE else -score), + (lead if board.pla == Board.WHITE else -lead), + vtime )) gfx_commands.append("TEXT " + ", ".join(texts_value + texts_rev + texts)) @@ -550,59 +550,59 @@ def str_coord(loc,board): # https://opensource.org/licenses/MIT known_commands = [ - 'boardsize', - 'clear_board', - 'showboard', - 'komi', - 'play', - 'genmove', - 'quit', - 'name', - 'version', - 'known_command', - 'list_commands', - 'protocol_version', - 'gogui-analyze_commands', - 'setrule', - 'policy', - 'policy1', - 'logpolicy', - 'ownership', - 'scoring', - 'futurepos0', - 'futurepos1', - 'seki', - 'seki2', - 'scorebelief', - 'passalive', + 'boardsize', + 'clear_board', + 'showboard', + 'komi', + 'play', + 'genmove', + 'quit', + 'name', + 'version', + 'known_command', + 'list_commands', + 'protocol_version', + 'gogui-analyze_commands', + 'setrule', + 'policy', + 'policy1', + 'logpolicy', + 'ownership', + 'scoring', + 'futurepos0', + 'futurepos1', + 'seki', + 'seki2', + 'scorebelief', + 'passalive', ] known_analyze_commands = [ - 'gfx/Policy/policy', - 'gfx/Policy1/policy1', - 'gfx/LogPolicy/logpolicy', - 'gfx/Ownership/ownership', - 'gfx/Scoring/scoring', - 'gfx/FuturePos0/futurepos0', - 'gfx/FuturePos1/futurepos1', - 'gfx/Seki/seki', - 'gfx/Seki2/seki2', - 'gfx/ScoreBelief/scorebelief', - 'gfx/PassAlive/passalive', + 'gfx/Policy/policy', + 'gfx/Policy1/policy1', + 'gfx/LogPolicy/logpolicy', + 'gfx/Ownership/ownership', + 'gfx/Scoring/scoring', + 'gfx/FuturePos0/futurepos0', + 'gfx/FuturePos1/futurepos1', + 'gfx/Seki/seki', + 'gfx/Seki2/seki2', + 'gfx/ScoreBelief/scorebelief', + 'gfx/PassAlive/passalive', ] board_size = 19 gs = GameState(board_size) rules = { - "koRule": "KO_POSITIONAL", - "scoringRule": "SCORING_AREA", - "taxRule": "TAX_NONE", - "multiStoneSuicideLegal": True, - "hasButton": False, - "encorePhase": 0, - "passWouldEndPhase": False, - "whiteKomi": 7.5, - "asymPowersOfTwo": 0.0, + "koRule": "KO_POSITIONAL", + "scoringRule": "SCORING_AREA", + "taxRule": "TAX_NONE", + "multiStoneSuicideLegal": True, + "hasButton": False, + "encorePhase": 0, + "passWouldEndPhase": False, + "whiteKomi": 7.5, + "asymPowersOfTwo": 0.0, } diff --git a/python/save_model_for_export_manual.py b/python/save_model_for_export_manual.py index fa75afe6c..b0d670a39 100644 --- a/python/save_model_for_export_manual.py +++ b/python/save_model_for_export_manual.py @@ -66,12 +66,12 @@ def main(args): logging.root.handlers = [] logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.FileHandler(os.path.join(traindir,f"save_model_for_export_manual.log"), mode="a"), - logging.StreamHandler() - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.FileHandler(os.path.join(traindir,f"save_model_for_export_manual.log"), mode="a"), + logging.StreamHandler() + ], ) np.set_printoptions(linewidth=150) @@ -140,9 +140,9 @@ def load(): # Export a model for testing, unless somehow it already exists modelname = "%s-s%d-d%d" % ( - exportprefix, - train_state["global_step_samples"], - train_state["total_num_data_rows"], + exportprefix, + train_state["global_step_samples"], + train_state["total_num_data_rows"], ) savepath = os.path.join(exportdir,modelname) savepathtmp = os.path.join(exportdir,modelname+".tmp") diff --git a/python/shuffle.py b/python/shuffle.py index f50d42b9b..377732efb 100755 --- a/python/shuffle.py +++ b/python/shuffle.py @@ -19,12 +19,12 @@ import numpy as np keys = [ - "binaryInputNCHWPacked", - "globalInputNC", - "policyTargetsNCMove", - "globalTargetsNC", - "scoreDistrN", - "valueTargetsNCHW" + "binaryInputNCHWPacked", + "globalInputNC", + "policyTargetsNCMove", + "globalTargetsNC", + "scoreDistrN", + "valueTargetsNCHW" ] def is_temp_npz_like(filename): @@ -109,7 +109,7 @@ def shardify(input_idx, input_file_group, num_out_files, out_tmp_dirs, keep_prob num_rows_to_keep = min(num_rows_to_keep,int(round(num_rows_to_keep * keep_prob))) [binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW] = ( - joint_shuffle_take_first_n(num_rows_to_keep,[binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW]) + joint_shuffle_take_first_n(num_rows_to_keep,[binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW]) ) assert(binaryInputNCHWPacked.shape[0] == num_rows_to_keep) @@ -131,13 +131,13 @@ def shardify(input_idx, input_file_group, num_out_files, out_tmp_dirs, keep_prob start = countsums[out_idx]-counts[out_idx] stop = countsums[out_idx] np.savez_compressed( - os.path.join(out_tmp_dirs[out_idx], str(input_idx) + ".npz"), - binaryInputNCHWPacked = binaryInputNCHWPacked[start:stop], - globalInputNC = globalInputNC[start:stop], - policyTargetsNCMove = policyTargetsNCMove[start:stop], - globalTargetsNC = globalTargetsNC[start:stop], - scoreDistrN = scoreDistrN[start:stop], - valueTargetsNCHW = valueTargetsNCHW[start:stop] + os.path.join(out_tmp_dirs[out_idx], str(input_idx) + ".npz"), + binaryInputNCHWPacked = binaryInputNCHWPacked[start:stop], + globalInputNC = globalInputNC[start:stop], + policyTargetsNCMove = policyTargetsNCMove[start:stop], + globalTargetsNC = globalTargetsNC[start:stop], + scoreDistrN = scoreDistrN[start:stop], + valueTargetsNCHW = valueTargetsNCHW[start:stop] ) return num_files_not_found @@ -200,7 +200,7 @@ def merge_shards(filename, num_shards_to_merge, out_tmp_dir, batch_size, ensure_ assert(valueTargetsNCHW.shape[0] == num_rows) [binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW] = ( - joint_shuffle_take_first_n(num_rows,[binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW]) + joint_shuffle_take_first_n(num_rows,[binaryInputNCHWPacked,globalInputNC,policyTargetsNCMove,globalTargetsNC,scoreDistrN,valueTargetsNCHW]) ) assert(binaryInputNCHWPacked.shape[0] == num_rows) @@ -218,13 +218,13 @@ def merge_shards(filename, num_shards_to_merge, out_tmp_dir, batch_size, ensure_ start = 0 stop = num_batches*batch_size np.savez_compressed( - filename, - binaryInputNCHWPacked = binaryInputNCHWPacked[start:stop], - globalInputNC = globalInputNC[start:stop], - policyTargetsNCMove = policyTargetsNCMove[start:stop], - globalTargetsNC = globalTargetsNC[start:stop], - scoreDistrN = scoreDistrN[start:stop], - valueTargetsNCHW = valueTargetsNCHW[start:stop] + filename, + binaryInputNCHWPacked = binaryInputNCHWPacked[start:stop], + globalInputNC = globalInputNC[start:stop], + policyTargetsNCMove = policyTargetsNCMove[start:stop], + globalTargetsNC = globalTargetsNC[start:stop], + scoreDistrN = scoreDistrN[start:stop], + valueTargetsNCHW = valueTargetsNCHW[start:stop] ) else: assert False, "No longer supports outputting tensorflow data" @@ -670,13 +670,13 @@ def clean_tmp_dirs(): with multiprocessing.Pool(num_processes) as pool: with TimeStuff("Sharding"): shard_results = pool.starmap(shardify, [ - (input_idx, desired_input_file_groups[input_idx], num_out_files, out_tmp_dirs, keep_prob) for input_idx in range(len(desired_input_file_groups)) + (input_idx, desired_input_file_groups[input_idx], num_out_files, out_tmp_dirs, keep_prob) for input_idx in range(len(desired_input_file_groups)) ]) with TimeStuff("Merging"): num_shards_to_merge = len(desired_input_file_groups) merge_results = pool.starmap(merge_shards, [ - (out_files[idx],num_shards_to_merge,out_tmp_dirs[idx],batch_size,ensure_batch_multiple,output_npz) for idx in range(len(out_files)) + (out_files[idx],num_shards_to_merge,out_tmp_dirs[idx],batch_size,ensure_batch_multiple,output_npz) for idx in range(len(out_files)) ]) print("Number of rows by output file:",flush=True) print(list(zip(out_files,merge_results)),flush=True) @@ -685,7 +685,7 @@ def clean_tmp_dirs(): clean_tmp_dirs() dump_value = { - "range": (min_start_row, max_end_row) + "range": (min_start_row, max_end_row) } with open(out_dir + ".json", 'w') as f: diff --git a/python/summarize_old_selfplay_files.py b/python/summarize_old_selfplay_files.py index 003a443b9..7590be5d2 100644 --- a/python/summarize_old_selfplay_files.py +++ b/python/summarize_old_selfplay_files.py @@ -124,8 +124,8 @@ def __exit__(self, exception_type, exception_val, trace): if "dir_mtime" not in summary_data_by_dirpath[dirpath]: filename_mtime_num_rowss = summary_data_by_dirpath[dirpath] summary_data_by_dirpath[dirpath] = { - "dir_mtime": os.path.getmtime(dirpath), - "filename_mtime_num_rowss": filename_mtime_num_rowss, + "dir_mtime": os.path.getmtime(dirpath), + "filename_mtime_num_rowss": filename_mtime_num_rowss, } dirs_to_handle = [] @@ -175,8 +175,8 @@ def __exit__(self, exception_type, exception_val, trace): (dirpath, filename_mtime_num_rowss, num_rows_this_dir) = result num_total_rows += num_rows_this_dir summary_data_by_dirpath[os.path.abspath(dirpath)] = { - "dir_mtime": os.path.getmtime(os.path.abspath(dirpath)), - "filename_mtime_num_rowss": filename_mtime_num_rowss, + "dir_mtime": os.path.getmtime(os.path.abspath(dirpath)), + "filename_mtime_num_rowss": filename_mtime_num_rowss, } if len(dirs_to_handle) == 0 and old_summary_file_to_assume_correct is not None and os.path.exists(old_summary_file_to_assume_correct): diff --git a/python/summarize_sgfs.py b/python/summarize_sgfs.py index ee5edf5d7..53e8fca92 100644 --- a/python/summarize_sgfs.py +++ b/python/summarize_sgfs.py @@ -216,11 +216,11 @@ def _estimate_elo(self) -> elo.EloInfo: win = record.win + 0.5 * record.draw winrate = win / total data.extend(elo.likelihood_of_games( - pla_black, - pla_white, - total, - winrate, - include_first_player_advantage=self._estimate_first_player_advantage + pla_black, + pla_white, + total, + winrate, + include_first_player_advantage=self._estimate_first_player_advantage )) for pla in pla_names: @@ -307,28 +307,28 @@ def _print_result_matrix(self, pla_names): """ parser = argparse.ArgumentParser(description=description) parser.add_argument( - "input-files-or-dirs", - help="sgf/sgfs files or directories of them", - nargs="+", + "input-files-or-dirs", + help="sgf/sgfs files or directories of them", + nargs="+", ) parser.add_argument( - "-recursive", - help="Recursively search subdirectories of input directories", - required=False, - action="store_true", + "-recursive", + help="Recursively search subdirectories of input directories", + required=False, + action="store_true", ) parser.add_argument( - "-elo-prior-games", - help="Prior for Bayes Elo calculation, using input as the prior number of games to stabilize the results", - required=False, - type=float, - default=2, + "-elo-prior-games", + help="Prior for Bayes Elo calculation, using input as the prior number of games to stabilize the results", + required=False, + type=float, + default=2, ) parser.add_argument( - "-estimate-first-player-advantage", - help="Attempt to estimate first player advantage instead of assuming fair game", - required=False, - action="store_true", + "-estimate-first-player-advantage", + help="Attempt to estimate first player advantage instead of assuming fair game", + required=False, + action="store_true", ) args = vars(parser.parse_args()) print(args) @@ -339,8 +339,8 @@ def _print_result_matrix(self, pla_names): estimate_first_player_advantage = args["estimate_first_player_advantage"] game_result_summary = GameResultSummary( - elo_prior_games=elo_prior_games, - estimate_first_player_advantage=estimate_first_player_advantage, + elo_prior_games=elo_prior_games, + estimate_first_player_advantage=estimate_first_player_advantage, ) for input_file_or_dir in input_files_or_dirs: game_result_summary.add_games(input_file_or_dir, recursive=recursive) diff --git a/python/test.py b/python/test.py index fa256ad06..9a3caf8fc 100644 --- a/python/test.py +++ b/python/test.py @@ -65,11 +65,11 @@ def main(args): logging.root.handlers = [] logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.StreamHandler(stream=sys.stdout) - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.StreamHandler(stream=sys.stdout) + ], ) np.set_printoptions(linewidth=150) @@ -173,14 +173,14 @@ def log_metrics(prefix, metric_sums, metric_weights, metrics, metrics_out): total_inference_time = 0.0 is_first_batch = True for batch in data_processing_pytorch.read_npz_training_data( - val_files, - batch_size, - world_size, - rank, - pos_len, - device, - randomize_symmetries=True, - model_config=model_config, + val_files, + batch_size, + world_size, + rank, + pos_len, + device, + randomize_symmetries=True, + model_config=model_config, ): if max_batches is not None and num_batches_tested >= max_batches: break @@ -199,15 +199,15 @@ def log_metrics(prefix, metric_sums, metric_weights, metrics, metrics_out): postprocessed = model.postprocess_output(model_outputs) metrics = metrics_obj.metrics_dict_batchwise( - model, - postprocessed, - batch, - is_training=False, - soft_policy_weight_scale=soft_policy_weight_scale, - value_loss_scale=value_loss_scale, - td_value_loss_scales=td_value_loss_scales, - main_loss_scale=1.0, - intermediate_loss_scale=None, + model, + postprocessed, + batch, + is_training=False, + soft_policy_weight_scale=soft_policy_weight_scale, + value_loss_scale=value_loss_scale, + td_value_loss_scales=td_value_loss_scales, + main_loss_scale=1.0, + intermediate_loss_scale=None, ) metrics = detensorify_metrics(metrics) diff --git a/python/train.py b/python/train.py index 77d5c3fb6..67bb10cac 100755 --- a/python/train.py +++ b/python/train.py @@ -204,20 +204,20 @@ def main(rank: int, world_size: int, args, multi_gpu_device_ids, readpipes, writ logging.root.handlers = [] if rank == 0: logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.FileHandler(os.path.join(traindir,f"train{rank}.log"), mode="a"), - logging.StreamHandler() - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.FileHandler(os.path.join(traindir,f"train{rank}.log"), mode="a"), + logging.StreamHandler() + ], ) else: logging.basicConfig( - level=logging.INFO, - format="%(message)s", - handlers=[ - logging.FileHandler(os.path.join(traindir,f"train{rank}.log"), mode="a"), - ], + level=logging.INFO, + format="%(message)s", + handlers=[ + logging.FileHandler(os.path.join(traindir,f"train{rank}.log"), mode="a"), + ], ) np.set_printoptions(linewidth=150) @@ -289,10 +289,10 @@ def get_weight_decay(raw_model, lr_scale, warmup_scale, train_state, running_met else: assert False elif ( - raw_model.get_norm_kind() == "bnorm" or - raw_model.get_norm_kind() == "brenorm" or - raw_model.get_norm_kind() == "fixbrenorm" or - raw_model.get_norm_kind() == "fixscaleonenorm" + raw_model.get_norm_kind() == "bnorm" or + raw_model.get_norm_kind() == "brenorm" or + raw_model.get_norm_kind() == "fixbrenorm" or + raw_model.get_norm_kind() == "fixscaleonenorm" ): if group_name == "normal" or group_name == "normal_gamma": adaptive_scale = 1.0 @@ -332,30 +332,30 @@ def get_param_groups(raw_model,train_state,running_metrics): raw_model.add_reg_dict(reg_dict) param_groups = [] param_groups.append({ - "params": reg_dict["normal"], - "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="normal"), - "group_name": "normal", + "params": reg_dict["normal"], + "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="normal"), + "group_name": "normal", }) if len(reg_dict["normal_gamma"]) > 0: param_groups.append({ - "params": reg_dict["normal_gamma"], - "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="normal_gamma"), - "group_name": "normal_gamma", + "params": reg_dict["normal_gamma"], + "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="normal_gamma"), + "group_name": "normal_gamma", }) param_groups.append({ - "params": reg_dict["output"], - "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="output"), - "group_name": "output", + "params": reg_dict["output"], + "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="output"), + "group_name": "output", }) param_groups.append({ - "params": reg_dict["noreg"], - "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="noreg"), - "group_name": "noreg", + "params": reg_dict["noreg"], + "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="noreg"), + "group_name": "noreg", }) param_groups.append({ - "params": reg_dict["output_noreg"], - "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="output_noreg"), - "group_name": "output_noreg", + "params": reg_dict["output_noreg"], + "weight_decay": get_weight_decay(raw_model, lr_scale, warmup_scale=1.0, train_state=train_state, running_metrics=running_metrics, group_name="output_noreg"), + "group_name": "output_noreg", }) num_params = len(list(raw_model.parameters())) num_reg_dict_params = len(reg_dict["normal"]) + len(reg_dict["normal_gamma"]) + len(reg_dict["output"]) + len(reg_dict["noreg"]) + len(reg_dict["output_noreg"]) @@ -618,12 +618,12 @@ def update_and_return_lr_and_wd(): param_group["lr"] = per_sample_lr * warmup_scale * group_scale param_group["weight_decay"] = get_weight_decay( - raw_model, - lr_scale, - warmup_scale=warmup_scale, - train_state=train_state, - running_metrics=running_metrics, - group_name=group_name, + raw_model, + lr_scale, + warmup_scale=warmup_scale, + train_state=train_state, + running_metrics=running_metrics, + group_name=group_name, ) if group_name == "normal": normal_weight_decay = param_group["weight_decay"] @@ -700,7 +700,7 @@ def maybe_reload_training_data(): if train_state["total_num_data_rows"] > train_state["train_bucket_level_at_row"]: new_row_count = train_state["total_num_data_rows"] - train_state["train_bucket_level_at_row"] logging.info("Advancing trainbucket row %.0f to %.0f, %.0f new rows" % ( - train_state["train_bucket_level_at_row"], train_state["total_num_data_rows"], new_row_count + train_state["train_bucket_level_at_row"], train_state["total_num_data_rows"], new_row_count )) train_state["train_bucket_level_at_row"] = train_state["total_num_data_rows"] logging.info("Fill per data %.3f, Max bucket size %.0f" % (max_train_bucket_per_new_data, max_train_bucket_size)) @@ -766,8 +766,8 @@ def train_files_gen(): if max_train_steps_since_last_reload is not None: if train_state["train_steps_since_last_reload"] + 0.99 * samples_per_epoch/sub_epochs > max_train_steps_since_last_reload: logging.info( - "Too many train steps since last reload, waiting 5m and retrying (current %f)" % - train_state["train_steps_since_last_reload"] + "Too many train steps since last reload, waiting 5m and retrying (current %f)" % + train_state["train_steps_since_last_reload"] ) time.sleep(300) continue @@ -908,20 +908,20 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): if max_train_bucket_per_new_data is not None: if train_state["train_bucket_level"] > 0.99 * samples_per_epoch: logging.info("Consuming %.0f rows from train bucket (%.0f -> %.0f)" % ( - samples_per_epoch, train_state["train_bucket_level"], train_state["train_bucket_level"]-samples_per_epoch + samples_per_epoch, train_state["train_bucket_level"], train_state["train_bucket_level"]-samples_per_epoch )) train_state["train_bucket_level"] -= samples_per_epoch else: if stop_when_train_bucket_limited: logging.info( - "Exceeding train bucket, not enough new data rows, terminating (current level %f)" % - train_state["train_bucket_level"] + "Exceeding train bucket, not enough new data rows, terminating (current level %f)" % + train_state["train_bucket_level"] ) break else: logging.info( - "Exceeding train bucket, not enough new data rows, waiting 5m and retrying (current level %f)" % - train_state["train_bucket_level"] + "Exceeding train bucket, not enough new data rows, waiting 5m and retrying (current level %f)" % + train_state["train_bucket_level"] ) time.sleep(300) continue @@ -985,14 +985,14 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): logging.info("Currently up to data row " + str(train_state["total_num_data_rows"])) lookahead_counter = 0 for batch in data_processing_pytorch.read_npz_training_data( - train_files_to_use, - batch_size, - world_size, - rank, - pos_len=pos_len, - device=device, - randomize_symmetries=True, - model_config=model_config + train_files_to_use, + batch_size, + world_size, + rank, + pos_len=pos_len, + device=device, + randomize_symmetries=True, + model_config=model_config ): optimizer.zero_grad(set_to_none=True) if use_fp16: @@ -1004,15 +1004,15 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): postprocessed = raw_model.postprocess_output(model_outputs) metrics = metrics_obj.metrics_dict_batchwise( - raw_model, - postprocessed, - batch, - is_training=True, - soft_policy_weight_scale=soft_policy_weight_scale, - value_loss_scale=value_loss_scale, - td_value_loss_scales=td_value_loss_scales, - main_loss_scale=main_loss_scale, - intermediate_loss_scale=intermediate_loss_scale, + raw_model, + postprocessed, + batch, + is_training=True, + soft_policy_weight_scale=soft_policy_weight_scale, + value_loss_scale=value_loss_scale, + td_value_loss_scales=td_value_loss_scales, + main_loss_scale=main_loss_scale, + intermediate_loss_scale=intermediate_loss_scale, ) # DDP averages loss across instances, so to preserve LR as per-sample lr, we scale by world size. @@ -1161,27 +1161,27 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): val_samples = 0 t0 = time.perf_counter() for batch in data_processing_pytorch.read_npz_training_data( - val_files, - batch_size, - world_size=1, # Only the main process validates - rank=0, # Only the main process validates - pos_len=pos_len, - device=device, - randomize_symmetries=True, - model_config=model_config + val_files, + batch_size, + world_size=1, # Only the main process validates + rank=0, # Only the main process validates + pos_len=pos_len, + device=device, + randomize_symmetries=True, + model_config=model_config ): model_outputs = ddp_model(batch["binaryInputNCHW"],batch["globalInputNC"]) postprocessed = raw_model.postprocess_output(model_outputs) metrics = metrics_obj.metrics_dict_batchwise( - raw_model, - postprocessed, - batch, - is_training=False, - soft_policy_weight_scale=soft_policy_weight_scale, - value_loss_scale=value_loss_scale, - td_value_loss_scales=td_value_loss_scales, - main_loss_scale=main_loss_scale, - intermediate_loss_scale=intermediate_loss_scale, + raw_model, + postprocessed, + batch, + is_training=False, + soft_policy_weight_scale=soft_policy_weight_scale, + value_loss_scale=value_loss_scale, + td_value_loss_scales=td_value_loss_scales, + main_loss_scale=main_loss_scale, + intermediate_loss_scale=intermediate_loss_scale, ) metrics = detensorify_metrics(metrics) accumulate_metrics(val_metric_sums, val_metric_weights, metrics, batch_size, decay=1.0, new_weight=1.0) @@ -1220,9 +1220,9 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): if not no_export and is_time_to_export and not skip_export_this_time and exportdir is not None and not gnorm_stats_debug: # Export a model for testing, unless somehow it already exists modelname = "%s-s%d-d%d" % ( - exportprefix, - train_state["global_step_samples"], - train_state["total_num_data_rows"], + exportprefix, + train_state["global_step_samples"], + train_state["total_num_data_rows"], ) savepath = os.path.join(exportdir,modelname) savepathtmp = os.path.join(exportdir,modelname+".tmp") @@ -1284,9 +1284,9 @@ def log_metrics(metric_sums, metric_weights, metrics, metrics_out): writepipes.append(wpipe) torch.multiprocessing.spawn( - main, - nprocs=num_gpus_used, - args=(world_size, args, multi_gpu_device_ids, readpipes, writepipes, barrier) + main, + nprocs=num_gpus_used, + args=(world_size, args, multi_gpu_device_ids, readpipes, writepipes, barrier) ) else: rank = 0 diff --git a/python/upload_model.py b/python/upload_model.py index d3ca88a41..cfd9606d7 100644 --- a/python/upload_model.py +++ b/python/upload_model.py @@ -122,17 +122,17 @@ def write_log(): with open(model_zip,"rb") as model_zip_handle: log_gamma_offset = -1.0 if network_size == "b60c320" else 0.0 data = { - "run": (None, base_server_url + "api/runs/" + run_name + "/"), - "name": (None, model_name), - "network_size": (None, network_size), - "is_random": (None, "false"), - "model_file": (model_name + model_file_extension, model_file_handle, "application/octet-stream"), - "model_file_bytes": (None, model_file_bytes), - "model_file_sha256": (None, model_file_sha256), - "training_games_enabled": (None, ("false" if (not_enabled or rating_only != 0) else "true")), - "rating_games_enabled": (None, ("false" if not_enabled else "true")), - "log_gamma_offset": (None, str(log_gamma_offset)), - "model_zip_file": (model_name + ".zip", model_zip_handle, "application/octet-stream"), + "run": (None, base_server_url + "api/runs/" + run_name + "/"), + "name": (None, model_name), + "network_size": (None, network_size), + "is_random": (None, "false"), + "model_file": (model_name + model_file_extension, model_file_handle, "application/octet-stream"), + "model_file_bytes": (None, model_file_bytes), + "model_file_sha256": (None, model_file_sha256), + "training_games_enabled": (None, ("false" if (not_enabled or rating_only != 0) else "true")), + "rating_games_enabled": (None, ("false" if not_enabled else "true")), + "log_gamma_offset": (None, str(log_gamma_offset)), + "model_zip_file": (model_name + ".zip", model_zip_handle, "application/octet-stream"), } if parent_network_name_without_run is not None: diff --git a/python/upload_poses.py b/python/upload_poses.py index 887473201..5e67a20c9 100644 --- a/python/upload_poses.py +++ b/python/upload_poses.py @@ -91,14 +91,14 @@ def handle_file(poses_by_key, poses_file): continue pos = json.loads(line) key = ( - str(pos["initialTurnNumber"]) + "$" + - "@".join(pos["moveLocs"]) + "$" + - "@".join(pos["movePlas"]) + "$" + - str(pos["xSize"]) + "$" + - str(pos["ySize"]) + "$" + - pos["nextPla"] + "$" + - pos["board"] + "$" + - pos["hintLoc"] + str(pos["initialTurnNumber"]) + "$" + + "@".join(pos["moveLocs"]) + "$" + + "@".join(pos["movePlas"]) + "$" + + str(pos["xSize"]) + "$" + + str(pos["ySize"]) + "$" + + pos["nextPla"] + "$" + + pos["board"] + "$" + + pos["hintLoc"] ) if len(pos["movePlas"]) > 0: @@ -184,10 +184,10 @@ def postStuff(to_post): for pos in poses: weight = pos["weight"] data = { - "run": base_server_url + "api/runs/" + run_name + "/", - "weight": weight, - "data": pos, - "notes": notes, + "run": base_server_url + "api/runs/" + run_name + "/", + "weight": weight, + "data": pos, + "notes": notes, } to_post.append(data) if len(to_post) >= 5000: