-
Notifications
You must be signed in to change notification settings - Fork 342
/
pretrain_base_0.75_400e_finetune_100e.txt
100 lines (100 loc) · 34.9 KB
/
pretrain_base_0.75_400e_finetune_100e.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{"train_lr": 0.0003997441637352095, "train_min_lr": 9.49682763753549e-06, "train_loss": 5.592792673719873, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.9024732707025145, "test_loss": 2.250357142452038, "test_acc1": 49.00800136184692, "test_acc5": 77.24600258117675, "epoch": 0, "n_parameters": 86406376}
{"train_lr": 0.001199872081867605, "train_min_lr": 2.8505677836826516e-05, "train_loss": 4.72904204533731, "train_loss_scale": 91886.60591526779, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 2.1820284749487704, "test_loss": 1.8586196506565267, "test_acc1": 57.59000160797119, "test_acc5": 83.0080024029541, "epoch": 1, "n_parameters": 86406376}
{"train_lr": 0.0019999999999999996, "train_min_lr": 4.751452803611757e-05, "train_loss": 4.511791644002989, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.9337663032072816, "test_loss": 1.7343718714334748, "test_acc1": 60.788001712646484, "test_acc5": 84.77000248535157, "epoch": 2, "n_parameters": 86406376}
{"train_lr": 0.002800127918132395, "train_min_lr": 6.65233782354086e-05, "train_loss": 4.412802945819499, "train_loss_scale": 165647.34772182253, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.6765434082710382, "test_acc1": 61.76400176208496, "test_acc5": 85.60000259338379, "epoch": 3, "n_parameters": 86406376}
{"train_lr": 0.0036002558362647923, "train_min_lr": 8.553222843469963e-05, "train_loss": 4.3487851610906025, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5445282463070682, "test_loss": 1.6564826540874713, "test_acc1": 62.59200166107178, "test_acc5": 86.34200263458251, "epoch": 4, "n_parameters": 86406376}
{"train_lr": 0.00399963601955247, "train_min_lr": 9.502040889264574e-05, "train_loss": 4.273726266303318, "train_loss_scale": 254390.74020783373, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4550523775086985, "test_loss": 1.6366071384964567, "test_acc1": 63.276001866149905, "test_acc5": 86.44400270233155, "epoch": 5, "n_parameters": 86406376}
{"train_lr": 0.003997450867536572, "train_min_lr": 9.496849565928443e-05, "train_loss": 4.1888694992835385, "train_loss_scale": 262563.09512390086, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.5427715507420627, "test_acc1": 65.14400178527832, "test_acc5": 87.7420023727417, "epoch": 6, "n_parameters": 86406376}
{"train_lr": 0.003993082079065952, "train_min_lr": 9.486470519814895e-05, "train_loss": 4.102591318859281, "train_loss_scale": 262144.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.36034827371486, "test_loss": 1.4743010774254799, "test_acc1": 66.52800190216064, "test_acc5": 88.52400257263183, "epoch": 7, "n_parameters": 86406376}
{"train_lr": 0.003986534431346677, "train_min_lr": 9.470915100258478e-05, "train_loss": 4.053977056539697, "train_loss_scale": 264239.4756195044, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.4227886884049936, "test_acc1": 67.17800191864013, "test_acc5": 88.94800263793945, "epoch": 8, "n_parameters": 86406376}
{"train_lr": 0.003977815084135385, "train_min_lr": 9.450200316882093e-05, "train_loss": 4.010094777476207, "train_loss_scale": 262772.6426858513, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 1.3707310209671657, "test_acc1": 67.85000229400634, "test_acc5": 89.35600260375976, "epoch": 9, "n_parameters": 86406376}
{"train_lr": 0.003966933571910235, "train_min_lr": 9.424348820997227e-05, "train_loss": 3.960189716540557, "train_loss_scale": 262144.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2916216715920172, "test_loss": 1.3670287588329026, "test_acc1": 68.58200217468261, "test_acc5": 89.8220024017334, "epoch": 10, "n_parameters": 86406376}
{"train_lr": 0.003953901793445091, "train_min_lr": 9.393388880835123e-05, "train_loss": 3.9119861596446337, "train_loss_scale": 263820.38049560355, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.3387660237424301, "test_acc1": 69.31200221679687, "test_acc5": 90.11600278381347, "epoch": 11, "n_parameters": 86406376}
{"train_lr": 0.0039387339987983876, "train_min_lr": 9.35735435063572e-05, "train_loss": 3.8878280554029296, "train_loss_scale": 262144.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2759411016718851, "test_loss": 1.317402897459088, "test_acc1": 69.93800215942383, "test_acc5": 90.50200250427245, "epoch": 12, "n_parameters": 86406376}
{"train_lr": 0.003921446773730832, "train_min_lr": 9.316284633628823e-05, "train_loss": 3.8405822453310163, "train_loss_scale": 262563.09512390086, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.2674546460762168, "test_acc1": 70.28000240081788, "test_acc5": 90.71600255645752, "epoch": 13, "n_parameters": 86406376}
{"train_lr": 0.003902059021569276, "train_min_lr": 9.270224638946935e-05, "train_loss": 3.822385010268572, "train_loss_scale": 266963.5939248601, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.2905417021476862, "test_acc1": 70.80800233581543, "test_acc5": 90.9200025112915, "epoch": 14, "n_parameters": 86406376}
{"train_lr": 0.0038805919425360376, "train_min_lr": 9.21922473251803e-05, "train_loss": 3.7929010247345643, "train_loss_scale": 262144.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2474000493018367, "test_loss": 1.260368033340483, "test_acc1": 71.12000229278564, "test_acc5": 91.212002449646, "epoch": 15, "n_parameters": 86406376}
{"train_lr": 0.0038570690105668697, "train_min_lr": 9.163340681991e-05, "train_loss": 3.767744296031604, "train_loss_scale": 264868.1183053557, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.2191570794040507, "test_acc1": 71.44800253173828, "test_acc5": 91.17600241455078, "epoch": 16, "n_parameters": 86406376}
{"train_lr": 0.0038315159476425124, "train_min_lr": 9.102633595754594e-05, "train_loss": 3.7525674749811966, "train_loss_scale": 211119.16866506793, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.2244567579843781, "test_acc1": 71.92600230895997, "test_acc5": 91.44800262237548, "epoch": 17, "n_parameters": 86406376}
{"train_lr": 0.003803960695662125, "train_min_lr": 9.037169856116365e-05, "train_loss": 3.7283766990561755, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2429498715175809, "test_loss": 1.221701103629488, "test_acc1": 71.9420024761963, "test_acc5": 91.43000235168456, "epoch": 18, "n_parameters": 86406376}
{"train_lr": 0.003774433385889171, "train_min_lr": 8.96702104671445e-05, "train_loss": 3.7078562758142333, "train_loss_scale": 220339.2613908873, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.186578752416553, "test_acc1": 72.27800240905762, "test_acc5": 91.63400238006592, "epoch": 19, "n_parameters": 86406376}
{"train_lr": 0.0037429663060033736, "train_min_lr": 8.892263874242024e-05, "train_loss": 3.6861478615817216, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.237212994782854, "test_loss": 1.1895164420658892, "test_acc1": 72.70400249084473, "test_acc5": 91.86400274139405, "epoch": 20, "n_parameters": 86406376}
{"train_lr": 0.0037095938647945864, "train_min_lr": 8.812980084569599e-05, "train_loss": 3.6667192394165493, "train_loss_scale": 198022.44604316546, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.23850567306546, "test_loss": 1.1890250189286289, "test_acc1": 72.63400233886719, "test_acc5": 91.81200269256591, "epoch": 21, "n_parameters": 86406376}
{"train_lr": 0.0036743525545373133, "train_min_lr": 8.729256373357168e-05, "train_loss": 3.6495994302294523, "train_loss_scale": 134110.43964828138, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.1797227387626965, "test_acc1": 72.6700023840332, "test_acc5": 91.88400264190673, "epoch": 22, "n_parameters": 86406376}
{"train_lr": 0.0036372809110869152, "train_min_lr": 8.641184291253709e-05, "train_loss": 3.6341962369082927, "train_loss_scale": 133272.2494004796, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.133619707190629, "test_acc1": 73.65200251922607, "test_acc5": 92.42600243255615, "epoch": 23, "n_parameters": 86406376}
{"train_lr": 0.003598419471741207, "train_min_lr": 8.548860143787963e-05, "train_loss": 3.623862175132445, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2397670864963608, "test_loss": 1.146392026182377, "test_acc1": 73.51600252227783, "test_acc5": 92.32200248840331, "epoch": 24, "n_parameters": 86406376}
{"train_lr": 0.0035578107309135816, "train_min_lr": 8.452384886059656e-05, "train_loss": 3.603262832029451, "train_loss_scale": 169943.07274180657, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.1371470603978995, "test_acc1": 73.71600233764649, "test_acc5": 92.34800261566163, "epoch": 25, "n_parameters": 86406376}
{"train_lr": 0.0035154990936659006, "train_min_lr": 8.351864012346724e-05, "train_loss": 3.587060103218237, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2464001103461408, "test_loss": 1.1180579838427631, "test_acc1": 74.0080023236084, "test_acc5": 92.44600270599365, "epoch": 26, "n_parameters": 86406376}
{"train_lr": 0.0034715308271522837, "train_min_lr": 8.24740744074869e-05, "train_loss": 3.5677696583892327, "train_loss_scale": 198651.08872901677, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.1091762039471755, "test_acc1": 74.03400264923096, "test_acc5": 92.5100022680664, "epoch": 27, "n_parameters": 86406376}
{"train_lr": 0.0034259540100266407, "train_min_lr": 8.139129392993009e-05, "train_loss": 3.559775885989626, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.245898213794382, "test_loss": 1.1035979629466028, "test_acc1": 74.22600251861573, "test_acc5": 92.74400266723633, "epoch": 28, "n_parameters": 86406376}
{"train_lr": 0.003378818479869338, "train_min_lr": 8.027148269535177e-05, "train_loss": 3.5414186530500102, "train_loss_scale": 159256.14708233412, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 1.130805738721833, "test_acc1": 74.23600257720948, "test_acc5": 92.69000242462158, "epoch": 29, "n_parameters": 86406376}
{"train_lr": 0.003330175778690558, "train_min_lr": 7.911586520089619e-05, "train_loss": 3.521332262183646, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.25035829624112, "test_loss": 1.094184869843902, "test_acc1": 74.7340027456665, "test_acc5": 92.82000255218506, "epoch": 30, "n_parameters": 86406376}
{"train_lr": 0.0032800790965698953, "train_min_lr": 7.792570509732659e-05, "train_loss": 3.518297063063184, "train_loss_scale": 146054.65067945645, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.0666649909407804, "test_acc1": 75.09000274688721, "test_acc5": 93.040002550354, "epoch": 31, "n_parameters": 86406376}
{"train_lr": 0.003228583213493717, "train_min_lr": 7.670230380724284e-05, "train_loss": 3.503086110956663, "train_loss_scale": 107393.12549960033, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 1.1114820763468742, "test_acc1": 74.80400245452881, "test_acc5": 93.01200267852784, "epoch": 32, "n_parameters": 86406376}
{"train_lr": 0.003175744439454117, "train_min_lr": 7.54469991019936e-05, "train_loss": 3.4825640880018116, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2646351176009571, "test_loss": 1.0542662342389424, "test_acc1": 75.2140026184082, "test_acc5": 93.19200239074706, "epoch": 33, "n_parameters": 86406376}
{"train_lr": 0.0031216205528747424, "train_min_lr": 7.416116363884391e-05, "train_loss": 3.4745392004648843, "train_loss_scale": 115513.09352517985, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2644246759460414, "test_loss": 1.0513502453086954, "test_acc1": 75.61600242980957, "test_acc5": 93.25600278411865, "epoch": 34, "n_parameters": 86406376}
{"train_lr": 0.0030662707374309217, "train_min_lr": 7.28462034599943e-05, "train_loss": 3.464747577345819, "train_loss_scale": 152550.62509992006, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2681104541301347, "test_loss": 1.0377614192213074, "test_acc1": 75.47400229766846, "test_acc5": 93.42200245391845, "epoch": 35, "n_parameters": 86406376}
{"train_lr": 0.0030097555173332125, "train_min_lr": 7.150355645509409e-05, "train_loss": 3.4465882360792275, "train_loss_scale": 152760.1726618705, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.0413121538180294, "test_acc1": 76.0880026235962, "test_acc5": 93.50000260162354, "epoch": 36, "n_parameters": 86406376}
{"train_lr": 0.002952136691145041, "train_min_lr": 7.013469078893113e-05, "train_loss": 3.4340920159094437, "train_loss_scale": 76144.34532374101, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.0491064079105854, "test_acc1": 76.29600268890381, "test_acc5": 93.44600247528076, "epoch": 37, "n_parameters": 86406376}
{"train_lr": 0.0028934772642068875, "train_min_lr": 6.874110329601346e-05, "train_loss": 3.4251205486883456, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2765998900365487, "test_loss": 1.022272811920354, "test_acc1": 75.94400242462159, "test_acc5": 93.47400256835938, "epoch": 38, "n_parameters": 86406376}
{"train_lr": 0.002833841379740885, "train_min_lr": 6.732431784380422e-05, "train_loss": 3.4053156239380367, "train_loss_scale": 55739.651478816944, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.2819314090658624, "test_loss": 1.0281780152158304, "test_acc1": 76.07800248321533, "test_acc5": 93.67800233184815, "epoch": 39, "n_parameters": 86406376}
{"train_lr": 0.0027732942487111276, "train_min_lr": 6.58858836663944e-05, "train_loss": 3.391267495308753, "train_loss_scale": 72241.52198241407, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.284594273109802, "test_loss": 1.0111178991695244, "test_acc1": 76.77200236083985, "test_acc5": 93.71800272125245, "epoch": 40, "n_parameters": 86406376}
{"train_lr": 0.002711902078516466, "train_min_lr": 6.442737367043799e-05, "train_loss": 3.3891672721559956, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.29443743026895, "test_loss": 0.9872939745810899, "test_acc1": 77.08800261322021, "test_acc5": 93.962002182312, "epoch": 41, "n_parameters": 86406376}
{"train_lr": 0.0026497320005936377, "train_min_lr": 6.295038271520207e-05, "train_loss": 3.3599977008968613, "train_loss_scale": 69936.49880095923, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.9843310804078074, "test_acc1": 76.86400251556397, "test_acc5": 93.89400253112792, "epoch": 42, "n_parameters": 86406376}
{"train_lr": 0.002586851997010088, "train_min_lr": 6.145652586861125e-05, "train_loss": 3.350256350710333, "train_loss_scale": 87433.72022382094, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3008638261128769, "test_loss": 1.020211474461989, "test_acc1": 76.77600253479004, "test_acc5": 94.02400250640869, "epoch": 43, "n_parameters": 86406376}
{"train_lr": 0.0025233308261265472, "train_min_lr": 5.994743664119471e-05, "train_loss": 3.3359770345792685, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3001002219083497, "test_loss": 0.9721729861515941, "test_acc1": 77.35200261566162, "test_acc5": 94.17400259613036, "epoch": 44, "n_parameters": 86406376}
{"train_lr": 0.0024592379474107535, "train_min_lr": 5.8424765199866275e-05, "train_loss": 3.321092938859876, "train_loss_scale": 202003.8497202238, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9848409746632432, "test_acc1": 77.5600025024414, "test_acc5": 94.12200234832764, "epoch": 45, "n_parameters": 86406376}
{"train_lr": 0.0023946434454845933, "train_min_lr": 5.689017656349148e-05, "train_loss": 3.315089634389614, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3133123738588475, "test_loss": 0.9504460749978368, "test_acc1": 77.61200245513916, "test_acc5": 94.41000273010253, "epoch": 46, "n_parameters": 86406376}
{"train_lr": 0.0023296179534875536, "train_min_lr": 5.5345348782213485e-05, "train_loss": 3.295001769975888, "train_loss_scale": 146159.42446043165, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9754125220757542, "test_acc1": 77.63600252716064, "test_acc5": 94.26800239349365, "epoch": 47, "n_parameters": 86406376}
{"train_lr": 0.0022642325758404375, "train_min_lr": 5.379197110253047e-05, "train_loss": 3.283714706508471, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3198145790923415, "test_loss": 0.9524194603842316, "test_acc1": 77.75800264068603, "test_acc5": 94.39600237579346, "epoch": 48, "n_parameters": 86406376}
{"train_lr": 0.002198558810493715, "train_min_lr": 5.2231742120128454e-05, "train_loss": 3.2698451197452303, "train_loss_scale": 135262.9512390088, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9604115836096533, "test_acc1": 77.91400236907958, "test_acc5": 94.43600240570068, "epoch": 49, "n_parameters": 86406376}
{"train_lr": 0.0021326684707455917, "train_min_lr": 5.0666367922492635e-05, "train_loss": 3.25602960528182, "train_loss_scale": 136729.78417266186, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9541752418559609, "test_acc1": 78.10200234954834, "test_acc5": 94.49800243286133, "epoch": 50, "n_parameters": 86406376}
{"train_lr": 0.0020666336067151868, "train_min_lr": 4.909756022332574e-05, "train_loss": 3.2334235293402087, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3345246459845064, "test_loss": 0.9387856721878052, "test_acc1": 78.2120023953247, "test_acc5": 94.5380024432373, "epoch": 51, "n_parameters": 86406376}
{"train_lr": 0.002000526426556805, "train_min_lr": 4.7527034490813655e-05, "train_loss": 3.2196662790483708, "train_loss_scale": 134110.43964828138, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9254046622099299, "test_acc1": 78.57000256988525, "test_acc5": 94.66000276489258, "epoch": 52, "n_parameters": 86406376}
{"train_lr": 0.0019344192175013665, "train_min_lr": 4.595650807178665e-05, "train_loss": 3.2003976242195407, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.35492267864023, "test_loss": 0.9210042589993188, "test_acc1": 78.64200243713378, "test_acc5": 94.77800249664307, "epoch": 53, "n_parameters": 86406376}
{"train_lr": 0.0018683842668114042, "train_min_lr": 4.438769831382569e-05, "train_loss": 3.1964066105304387, "train_loss_scale": 83347.54276578737, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.9129695732033614, "test_acc1": 78.66000256378175, "test_acc5": 94.82200239837647, "epoch": 54, "n_parameters": 86406376}
{"train_lr": 0.0018024937827359805, "train_min_lr": 4.282232068736817e-05, "train_loss": 3.1861120321267515, "train_loss_scale": 84604.82813749001, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3640874195441925, "test_loss": 0.9064727205444466, "test_acc1": 78.83800238372802, "test_acc5": 94.83600251617432, "epoch": 55, "n_parameters": 86406376}
{"train_lr": 0.001736819815552037, "train_min_lr": 4.1262086909865866e-05, "train_loss": 3.17465291219316, "train_loss_scale": 92069.96003197442, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.9148290763524446, "test_acc1": 79.15800265716553, "test_acc5": 95.03800253784179, "epoch": 56, "n_parameters": 86406376}
{"train_lr": 0.0016714341787784702, "train_min_lr": 3.970870307404734e-05, "train_loss": 3.144534255674989, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.373606550226585, "test_loss": 0.8905014463446357, "test_acc1": 79.25800266967774, "test_acc5": 95.04000248901367, "epoch": 57, "n_parameters": 86406376}
{"train_lr": 0.0016064083706491157, "train_min_lr": 3.8163867782330694e-05, "train_loss": 3.1398784050004753, "train_loss_scale": 49034.12949640288, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3803659635577366, "test_loss": 0.8827446543357589, "test_acc1": 79.5120025479126, "test_acc5": 95.07800256408692, "epoch": 58, "n_parameters": 86406376}
{"train_lr": 0.001541813495930466, "train_min_lr": 3.6629270289426194e-05, "train_loss": 3.131142016675928, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.384632907230124, "test_loss": 0.8748288392794855, "test_acc1": 79.60600256134033, "test_acc5": 95.28400251556397, "epoch": 59, "n_parameters": 86406376}
{"train_lr": 0.001477720188169644, "train_min_lr": 3.510658865516169e-05, "train_loss": 3.1028173233417395, "train_loss_scale": 124366.47801758593, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.3892139310745313, "test_loss": 0.860443905334581, "test_acc1": 79.77000245788574, "test_acc5": 95.22800262817383, "epoch": 60, "n_parameters": 86406376}
{"train_lr": 0.0014141985324576819, "train_min_lr": 3.3597487909548427e-05, "train_loss": 3.091071269078125, "train_loss_scale": 143749.6274980016, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.8695225790143013, "test_acc1": 79.92200239135742, "test_acc5": 95.23600255096436, "epoch": 61, "n_parameters": 86406376}
{"train_lr": 0.0013513179887924879, "train_min_lr": 3.210361823209538e-05, "train_loss": 3.077250486023897, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4087092674416986, "test_loss": 0.8605208184682962, "test_acc1": 80.09200233276367, "test_acc5": 95.32200256072998, "epoch": 62, "n_parameters": 86406376}
{"train_lr": 0.0012891473161253547, "train_min_lr": 3.062661314736197e-05, "train_loss": 3.061110954192712, "train_loss_scale": 154750.8745003997, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.8431629847848054, "test_acc1": 80.08800251373292, "test_acc5": 95.41400272003173, "epoch": 63, "n_parameters": 86406376}
{"train_lr": 0.0012277544971740653, "train_min_lr": 2.9168087738723258e-05, "train_loss": 3.047550282282509, "train_loss_scale": 126147.63229416466, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8572965063380472, "test_acc1": 80.30200247467042, "test_acc5": 95.38800263458252, "epoch": 64, "n_parameters": 86406376}
{"train_lr": 0.00116720666408478, "train_min_lr": 2.7729636882299765e-05, "train_loss": 3.0341893219404654, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.425688858417203, "test_loss": 0.8496151291059725, "test_acc1": 80.46200228607178, "test_acc5": 95.4760026309204, "epoch": 65, "n_parameters": 86406376}
{"train_lr": 0.0011075700250240293, "train_min_lr": 2.6312833502983773e-05, "train_loss": 3.017358848647915, "train_loss_scale": 96758.58673061551, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4325794039679756, "test_loss": 0.8399289361003673, "test_acc1": 80.73600263519288, "test_acc5": 95.5740025415039, "epoch": 66, "n_parameters": 86406376}
{"train_lr": 0.0010489097917810446, "train_min_lr": 2.491922685446939e-05, "train_loss": 3.006917386306085, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4392885264161108, "test_loss": 0.828515770873337, "test_acc1": 80.90000254302979, "test_acc5": 95.59400246246338, "epoch": 67, "n_parameters": 86406376}
{"train_lr": 0.0009912901084596437, "train_min_lr": 2.3550340825165907e-05, "train_loss": 2.988169783477684, "train_loss_scale": 139872.99760191847, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.8285546066860358, "test_acc1": 80.79600262023926, "test_acc5": 95.58600239807129, "epoch": 68, "n_parameters": 86406376}
{"train_lr": 0.0009347739813375745, "train_min_lr": 2.2207672271848693e-05, "train_loss": 2.9821149582604614, "train_loss_scale": 141339.83053557156, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8265396105972204, "test_acc1": 80.84600264892578, "test_acc5": 95.64800258666992, "epoch": 69, "n_parameters": 86406376}
{"train_lr": 0.0008794232099700762, "train_min_lr": 2.0892689382867885e-05, "train_loss": 2.9535677609302633, "train_loss_scale": 78266.0143884892, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.8108141973163142, "test_acc1": 80.98000256774903, "test_acc5": 95.71000276519776, "epoch": 70, "n_parameters": 86406376}
{"train_lr": 0.0008252983196129956, "train_min_lr": 1.960683007270625e-05, "train_loss": 2.939285759135878, "train_loss_scale": 79104.20463629096, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4710120810783929, "test_loss": 0.8041500090198084, "test_acc1": 81.35200231933594, "test_acc5": 95.79800266754151, "epoch": 71, "n_parameters": 86406376}
{"train_lr": 0.0007724584950392799, "train_min_lr": 1.835150040964055e-05, "train_loss": 2.9297137927237173, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4739933761951924, "test_loss": 0.8124989109734694, "test_acc1": 81.53800241424561, "test_acc5": 95.86400275543213, "epoch": 72, "n_parameters": 86406376}
{"train_lr": 0.0007209615158213153, "train_min_lr": 1.7128073078226846e-05, "train_loss": 2.9062990601495398, "train_loss_scale": 143435.30615507593, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8049701558369579, "test_acc1": 81.50000243896484, "test_acc5": 95.8880024987793, "epoch": 73, "n_parameters": 86406376}
{"train_lr": 0.0006708636931498053, "train_min_lr": 1.5937885878289863e-05, "train_loss": 2.8942230482824702, "train_loss_scale": 131072.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4952019505458864, "test_loss": 0.7943584825279135, "test_acc1": 81.61800241699218, "test_acc5": 95.94200252105713, "epoch": 74, "n_parameters": 86406376}
{"train_lr": 0.0006222198082583128, "train_min_lr": 1.4782240262058604e-05, "train_loss": 2.8828329217614983, "train_loss_scale": 130286.19664268586, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7842949566741785, "test_acc1": 81.83000252075195, "test_acc5": 96.03400264709472, "epoch": 75, "n_parameters": 86406376}
{"train_lr": 0.0005750830525207831, "train_min_lr": 1.3662399911047398e-05, "train_loss": 2.866628373138529, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.4989759313116828, "test_loss": 0.7911524822314581, "test_acc1": 81.93400256103516, "test_acc5": 95.92400254241943, "epoch": 76, "n_parameters": 86406376}
{"train_lr": 0.0005295049692875612, "train_min_lr": 1.2579589354238723e-05, "train_loss": 2.858981152315982, "train_loss_scale": 107183.57793764988, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.508202315329743, "test_loss": 0.7858635316969771, "test_acc1": 81.97600275817871, "test_acc5": 96.0340025894165, "epoch": 77, "n_parameters": 86406376}
{"train_lr": 0.0004855353975234924, "train_min_lr": 1.1534992629078736e-05, "train_loss": 2.834503419810443, "train_loss_scale": 135891.59392486012, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5148741068790474, "test_loss": 0.7748192102845871, "test_acc1": 82.11600239501954, "test_acc5": 96.06000239318848, "epoch": 78, "n_parameters": 86406376}
{"train_lr": 0.00044322241730974525, "train_min_lr": 1.0529751986749836e-05, "train_loss": 2.8270453691935176, "train_loss_scale": 124680.79936051159, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7718186390896639, "test_acc1": 82.36600251037598, "test_acc5": 96.13600261871338, "epoch": 79, "n_parameters": 86406376}
{"train_lr": 0.0004026122972689514, "train_min_lr": 9.564966643135621e-06, "train_loss": 2.8028785404231815, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5297076538217058, "test_loss": 0.770141156220978, "test_acc1": 82.1980024874878, "test_acc5": 96.17400264984131, "epoch": 80, "n_parameters": 86406376}
{"train_lr": 0.00036374944397114204, "train_min_lr": 8.64169157684452e-06, "train_loss": 2.7935122771562337, "train_loss_scale": 98958.83613109513, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5345986534556229, "test_loss": 0.760394756992658, "test_acc1": 82.30000256713868, "test_acc5": 96.23800255218507, "epoch": 81, "n_parameters": 86406376}
{"train_lr": 0.00032667635337582196, "train_min_lr": 7.760936375606073e-06, "train_loss": 2.7894287793208465, "train_loss_scale": 80099.55555555556, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7546397903651902, "test_acc1": 82.45600253967285, "test_acc5": 96.25400263122559, "epoch": 82, "n_parameters": 86406376}
{"train_lr": 0.0002914335643632545, "train_min_lr": 6.92366413230176e-06, "train_loss": 2.7764033634695027, "train_loss_scale": 77270.66346922462, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5368766144311115, "test_loss": 0.7621058657991163, "test_acc1": 82.49200276916504, "test_acc5": 96.29200251678466, "epoch": 83, "n_parameters": 86406376}
{"train_lr": 0.0002580596144057944, "train_min_lr": 6.130790391836908e-06, "train_loss": 2.767983827742932, "train_loss_scale": 90053.06474820143, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7654316950250756, "test_acc1": 82.48400293182372, "test_acc5": 96.33400266418457, "epoch": 84, "n_parameters": 86406376}
{"train_lr": 0.00022659099742773016, "train_min_lr": 5.383182150005858e-06, "train_loss": 2.753948577064023, "train_loss_scale": 67317.15427657873, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5534542350174425, "test_loss": 0.74941034186067, "test_acc1": 82.6560022869873, "test_acc5": 96.29800259155273, "epoch": 85, "n_parameters": 86406376}
{"train_lr": 0.0001970621238997089, "train_min_lr": 4.6816569054447925e-06, "train_loss": 2.7435265696830125, "train_loss_scale": 101578.18065547562, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7539658158114462, "test_acc1": 82.74000243957519, "test_acc5": 96.39000235168457, "epoch": 86, "n_parameters": 86406376}
{"train_lr": 0.00016950528321139544, "train_min_lr": 4.026981765708946e-06, "train_loss": 2.735401282946078, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5583705299859234, "test_loss": 0.7509403758202539, "test_acc1": 82.7800020727539, "test_acc5": 96.33400281677245, "epoch": 87, "n_parameters": 86406376}
{"train_lr": 0.00014395060836349695, "train_min_lr": 3.4198726084517775e-06, "train_loss": 2.726337208950834, "train_loss_scale": 94348.78976818545, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7506256821480665, "test_acc1": 82.76000263183593, "test_acc5": 96.38400246185303, "epoch": 88, "n_parameters": 86406376}
{"train_lr": 0.00012042604301776897, "train_min_lr": 2.8609932986232416e-06, "train_loss": 2.71985708111577, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5624949657659737, "test_loss": 0.7572676462657524, "test_acc1": 82.70600236114502, "test_acc5": 96.33000246185303, "epoch": 89, "n_parameters": 86406376}
{"train_lr": 9.895731094103386e-05, "train_min_lr": 2.350954962543275e-06, "train_loss": 2.720098126777928, "train_loss_scale": 117451.40847322143, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7487046465729222, "test_acc1": 82.79000234710693, "test_acc5": 96.43000239959717, "epoch": 90, "n_parameters": 86406376}
{"train_lr": 7.95678878766162e-05, "train_min_lr": 1.8903153196440734e-06, "train_loss": 2.7175606226892493, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5719138056063633, "test_loss": 0.7427014463552923, "test_acc1": 82.90000255493165, "test_acc5": 96.41400255584716, "epoch": 91, "n_parameters": 86406376}
{"train_lr": 6.227897587396463e-05, "train_min_lr": 1.47957807261209e-06, "train_loss": 2.705379573871001, "train_loss_scale": 70827.0759392486, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7494265872872237, "test_acc1": 82.91000229553222, "test_acc5": 96.39600263763428, "epoch": 92, "n_parameters": 86406376}
{"train_lr": 4.710948010452045e-05, "train_min_lr": 1.1191923565965795e-06, "train_loss": 2.702515984027029, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.577457527938983, "test_loss": 0.7469095602405794, "test_acc1": 82.98600246978759, "test_acc5": 96.43200261444092, "epoch": 93, "n_parameters": 86406376}
{"train_lr": 3.407598818919137e-05, "train_min_lr": 8.095522480868717e-07, "train_loss": 2.6927911086762837, "train_loss_scale": 102573.5315747402, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7422059704408501, "test_acc1": 82.98000233825684, "test_acc5": 96.45200250701905, "epoch": 94, "n_parameters": 86406376}
{"train_lr": 2.319275206003109e-05, "train_min_lr": 5.509963339955348e-07, "train_loss": 2.695670081080674, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.576134430799934, "test_loss": 0.7438320368528366, "test_acc1": 83.03400236755371, "test_acc5": 96.47200257537841, "epoch": 95, "n_parameters": 86406376}
{"train_lr": 1.4471672375960707e-05, "train_min_lr": 3.438073414185469e-07, "train_loss": 2.6918061588832036, "train_loss_scale": 103464.10871302958, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5773896862277024, "test_loss": 0.7437138512279048, "test_acc1": 83.04800273712158, "test_acc5": 96.47400258514405, "epoch": 96, "n_parameters": 86406376}
{"train_lr": 7.922285509573203e-06, "train_min_lr": 1.882118284773719e-07, "train_loss": 2.6919422102012605, "train_loss_scale": 87748.0415667466, "train_weight_decay": 0.049999999999998865, "train_grad_norm": NaN, "test_loss": 0.7450656880709258, "test_acc1": 83.04200254608155, "test_acc5": 96.47800242889404, "epoch": 97, "n_parameters": 86406376}
{"train_lr": 3.551753119249797e-06, "train_min_lr": 8.437993658098134e-08, "train_loss": 2.68782733827472, "train_loss_scale": 69622.17745803358, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 1.5698409668452067, "test_loss": 0.7434264018454335, "test_acc1": 83.03800266601563, "test_acc5": 96.47000250701905, "epoch": 98, "n_parameters": 86406376}
{"train_lr": 1.3648543179918328e-06, "train_min_lr": 3.2425204378719534e-08, "train_loss": 2.6920088201308614, "train_loss_scale": 72713.00399680255, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7439092839080276, "test_acc1": 83.044002578125, "test_acc5": 96.47200254608154, "epoch": 99, "n_parameters": 86406376}