Logging to isic_outputs creating data loader... creating model and diffusion... training... --------------------------- | grad_norm | 21.1 | | loss | 1.02 | | loss_cal | 0.287 | | loss_cal_q0 | 0.228 | | loss_cal_q1 | 0.325 | | loss_cal_q3 | 0.318 | | loss_diff | 1 | | loss_diff_q0 | 1 | | loss_diff_q1 | 1 | | loss_diff_q3 | 1 | | loss_q0 | 1.04 | | loss_q1 | 1.01 | | loss_q3 | 1.01 | | param_norm | 233 | | samples | 8 | | step | 0 | | vb | 0.0222 | | vb_q0 | 0.0436 | | vb_q1 | 0.0073 | | vb_q3 | 0.0124 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 14.4 | | loss | 0.233 | | loss_cal | 0.199 | | loss_cal_q0 | 0.198 | | loss_cal_q1 | 0.204 | | loss_cal_q2 | 0.197 | | loss_cal_q3 | 0.197 | | loss_diff | 0.226 | | loss_diff_q0 | 0.304 | | loss_diff_q1 | 0.204 | | loss_diff_q2 | 0.201 | | loss_diff_q3 | 0.195 | | loss_q0 | 0.326 | | loss_q1 | 0.206 | | loss_q2 | 0.202 | | loss_q3 | 0.198 | | param_norm | 233 | | samples | 808 | | step | 100 | | vb | 0.00676 | | vb_q0 | 0.0214 | | vb_q1 | 0.00151 | | vb_q2 | 0.00188 | | vb_q3 | 0.00257 | --------------------------- --------------------------- | grad_norm | 11.6 | | loss | 0.0196 | | loss_cal | 0.15 | | loss_cal_q0 | 0.155 | | loss_cal_q1 | 0.151 | | loss_cal_q2 | 0.15 | | loss_cal_q3 | 0.144 | | loss_diff | 0.0168 | | loss_diff_q0 | 0.0408 | | loss_diff_q1 | 0.0106 | | loss_diff_q2 | 0.00774 | | loss_diff_q3 | 0.00697 | | loss_q0 | 0.0517 | | loss_q1 | 0.0106 | | loss_q2 | 0.00781 | | loss_q3 | 0.00706 | | param_norm | 233 | | samples | 1.61e+03 | | step | 200 | | vb | 0.00283 | | vb_q0 | 0.0108 | | vb_q1 | 7.98e-05 | | vb_q2 | 7.09e-05 | | vb_q3 | 8.92e-05 | --------------------------- --------------------------- | grad_norm | 10.2 | | loss | 0.0148 | | loss_cal | 0.128 | | loss_cal_q0 | 0.13 | | loss_cal_q1 | 0.125 | | loss_cal_q2 | 0.134 | | loss_cal_q3 | 0.121 | | loss_diff | 0.0138 | | loss_diff_q0 | 0.0359 | | loss_diff_q1 | 0.00867 | | loss_diff_q2 | 0.00614 | | loss_diff_q3 | 0.00547 | | loss_q0 | 0.0402 | | loss_q1 | 0.00874 | | loss_q2 | 0.0062 | | loss_q3 | 0.00554 | | param_norm | 233 | | samples | 2.41e+03 | | step | 300 | | vb | 0.00105 | | vb_q0 | 0.00425 | | vb_q1 | 6.52e-05 | | vb_q2 | 5.72e-05 | | vb_q3 | 7.06e-05 | --------------------------- --------------------------- | grad_norm | 9.56 | | loss | 0.0165 | | loss_cal | 0.122 | | loss_cal_q0 | 0.115 | | loss_cal_q1 | 0.128 | | loss_cal_q2 | 0.123 | | loss_cal_q3 | 0.121 | | loss_diff | 0.013 | | loss_diff_q0 | 0.0334 | | loss_diff_q1 | 0.00706 | | loss_diff_q2 | 0.00479 | | loss_diff_q3 | 0.00399 | | loss_q0 | 0.0461 | | loss_q1 | 0.00712 | | loss_q2 | 0.00483 | | loss_q3 | 0.00405 | | param_norm | 234 | | samples | 3.21e+03 | | step | 400 | | vb | 0.00347 | | vb_q0 | 0.0127 | | vb_q1 | 5.29e-05 | | vb_q2 | 4.43e-05 | | vb_q3 | 5.17e-05 | --------------------------- --------------------------- | grad_norm | 8.87 | | loss | 0.0158 | | loss_cal | 0.114 | | loss_cal_q0 | 0.106 | | loss_cal_q1 | 0.116 | | loss_cal_q2 | 0.114 | | loss_cal_q3 | 0.118 | | loss_diff | 0.00939 | | loss_diff_q0 | 0.0279 | | loss_diff_q1 | 0.00505 | | loss_diff_q2 | 0.00323 | | loss_diff_q3 | 0.00257 | | loss_q0 | 0.0546 | | loss_q1 | 0.00509 | | loss_q2 | 0.00326 | | loss_q3 | 0.0026 | | param_norm | 234 | | samples | 4.01e+03 | | step | 500 | | vb | 0.00637 | | vb_q0 | 0.0267 | | vb_q1 | 3.77e-05 | | vb_q2 | 3e-05 | | vb_q3 | 3.31e-05 | --------------------------- --------------------------- | grad_norm | 8.18 | | loss | 0.00511 | | loss_cal | 0.11 | | loss_cal_q0 | 0.109 | | loss_cal_q1 | 0.111 | | loss_cal_q2 | 0.114 | | loss_cal_q3 | 0.108 | | loss_diff | 0.00495 | | loss_diff_q0 | 0.0128 | | loss_diff_q1 | 0.00325 | | loss_diff_q2 | 0.00182 | | loss_diff_q3 | 0.00118 | | loss_q0 | 0.0133 | | loss_q1 | 0.00328 | | loss_q2 | 0.00184 | | loss_q3 | 0.0012 | | param_norm | 234 | | samples | 4.81e+03 | | step | 600 | | vb | 0.000165 | | vb_q0 | 0.000573 | | vb_q1 | 2.43e-05 | | vb_q2 | 1.68e-05 | | vb_q3 | 1.5e-05 | --------------------------- --------------------------- | grad_norm | 7.6 | | loss | 0.00379 | | loss_cal | 0.106 | | loss_cal_q0 | 0.104 | | loss_cal_q1 | 0.117 | | loss_cal_q2 | 0.106 | | loss_cal_q3 | 0.0962 | | loss_diff | 0.00367 | | loss_diff_q0 | 0.0104 | | loss_diff_q1 | 0.00257 | | loss_diff_q2 | 0.00121 | | loss_diff_q3 | 0.000746 | | loss_q0 | 0.0108 | | loss_q1 | 0.00259 | | loss_q2 | 0.00122 | | loss_q3 | 0.000756 | | param_norm | 234 | | samples | 5.61e+03 | | step | 700 | | vb | 0.000113 | | vb_q0 | 0.000424 | | vb_q1 | 1.92e-05 | | vb_q2 | 1.12e-05 | | vb_q3 | 9.52e-06 | --------------------------- --------------------------- | grad_norm | 7.7 | | loss | 0.00498 | | loss_cal | 0.106 | | loss_cal_q0 | 0.101 | | loss_cal_q1 | 0.1 | | loss_cal_q2 | 0.106 | | loss_cal_q3 | 0.117 | | loss_diff | 0.00471 | | loss_diff_q0 | 0.0131 | | loss_diff_q1 | 0.00262 | | loss_diff_q2 | 0.00132 | | loss_diff_q3 | 0.000794 | | loss_q0 | 0.014 | | loss_q1 | 0.00264 | | loss_q2 | 0.00133 | | loss_q3 | 0.000804 | | param_norm | 234 | | samples | 6.41e+03 | | step | 800 | | vb | 0.000265 | | vb_q0 | 0.000945 | | vb_q1 | 1.95e-05 | | vb_q2 | 1.2e-05 | | vb_q3 | 1.02e-05 | --------------------------- --------------------------- | grad_norm | 7.35 | | loss | 0.00536 | | loss_cal | 0.103 | | loss_cal_q0 | 0.104 | | loss_cal_q1 | 0.111 | | loss_cal_q2 | 0.0976 | | loss_cal_q3 | 0.0999 | | loss_diff | 0.00477 | | loss_diff_q0 | 0.0137 | | loss_diff_q1 | 0.00246 | | loss_diff_q2 | 0.00116 | | loss_diff_q3 | 0.000657 | | loss_q0 | 0.0158 | | loss_q1 | 0.00248 | | loss_q2 | 0.00117 | | loss_q3 | 0.000666 | | param_norm | 234 | | samples | 7.21e+03 | | step | 900 | | vb | 0.000587 | | vb_q0 | 0.00209 | | vb_q1 | 1.84e-05 | | vb_q2 | 1.06e-05 | | vb_q3 | 8.44e-06 | --------------------------- --------------------------- | grad_norm | 6.7 | | loss | 0.00333 | | loss_cal | 0.0992 | | loss_cal_q0 | 0.0915 | | loss_cal_q1 | 0.106 | | loss_cal_q2 | 0.103 | | loss_cal_q3 | 0.0952 | | loss_diff | 0.00322 | | loss_diff_q0 | 0.00892 | | loss_diff_q1 | 0.00245 | | loss_diff_q2 | 0.00112 | | loss_diff_q3 | 0.000586 | | loss_q0 | 0.00932 | | loss_q1 | 0.00247 | | loss_q2 | 0.00113 | | loss_q3 | 0.000594 | | param_norm | 234 | | samples | 8.01e+03 | | step | 1e+03 | | vb | 0.000107 | | vb_q0 | 0.000399 | | vb_q1 | 1.82e-05 | | vb_q2 | 1.03e-05 | | vb_q3 | 7.52e-06 | --------------------------- --------------------------- | grad_norm | 6.49 | | loss | 0.0107 | | loss_cal | 0.0954 | | loss_cal_q0 | 0.0957 | | loss_cal_q1 | 0.103 | | loss_cal_q2 | 0.0898 | | loss_cal_q3 | 0.0937 | | loss_diff | 0.00565 | | loss_diff_q0 | 0.0169 | | loss_diff_q1 | 0.00282 | | loss_diff_q2 | 0.00134 | | loss_diff_q3 | 0.000815 | | loss_q0 | 0.0367 | | loss_q1 | 0.00284 | | loss_q2 | 0.00135 | | loss_q3 | 0.000825 | | param_norm | 234 | | samples | 8.81e+03 | | step | 1.1e+03 | | vb | 0.00502 | | vb_q0 | 0.0197 | | vb_q1 | 2.1e-05 | | vb_q2 | 1.25e-05 | | vb_q3 | 1.05e-05 | --------------------------- --------------------------- | grad_norm | 6.27 | | loss | 0.00333 | | loss_cal | 0.0966 | | loss_cal_q0 | 0.103 | | loss_cal_q1 | 0.101 | | loss_cal_q2 | 0.0875 | | loss_cal_q3 | 0.0982 | | loss_diff | 0.00323 | | loss_diff_q0 | 0.00917 | | loss_diff_q1 | 0.00234 | | loss_diff_q2 | 0.00101 | | loss_diff_q3 | 0.000516 | | loss_q0 | 0.00954 | | loss_q1 | 0.00235 | | loss_q2 | 0.00101 | | loss_q3 | 0.000523 | | param_norm | 234 | | samples | 9.61e+03 | | step | 1.2e+03 | | vb | 0.0001 | | vb_q0 | 0.000368 | | vb_q1 | 1.73e-05 | | vb_q2 | 9.18e-06 | | vb_q3 | 6.56e-06 | --------------------------- --------------------------- | grad_norm | 6.19 | | loss | 0.0104 | | loss_cal | 0.0915 | | loss_cal_q0 | 0.0972 | | loss_cal_q1 | 0.0991 | | loss_cal_q2 | 0.0825 | | loss_cal_q3 | 0.0853 | | loss_diff | 0.00511 | | loss_diff_q0 | 0.0154 | | loss_diff_q1 | 0.0025 | | loss_diff_q2 | 0.00109 | | loss_diff_q3 | 0.000655 | | loss_q0 | 0.0363 | | loss_q1 | 0.00252 | | loss_q2 | 0.0011 | | loss_q3 | 0.000664 | | param_norm | 234 | | samples | 1.04e+04 | | step | 1.3e+03 | | vb | 0.00529 | | vb_q0 | 0.0209 | | vb_q1 | 1.86e-05 | | vb_q2 | 1.01e-05 | | vb_q3 | 8.32e-06 | --------------------------- --------------------------- | grad_norm | 6.03 | | loss | 0.00878 | | loss_cal | 0.093 | | loss_cal_q0 | 0.0926 | | loss_cal_q1 | 0.0865 | | loss_cal_q2 | 0.108 | | loss_cal_q3 | 0.0857 | | loss_diff | 0.00495 | | loss_diff_q0 | 0.014 | | loss_diff_q1 | 0.00265 | | loss_diff_q2 | 0.00121 | | loss_diff_q3 | 0.00069 | | loss_q0 | 0.0279 | | loss_q1 | 0.00267 | | loss_q2 | 0.00122 | | loss_q3 | 0.000698 | | param_norm | 234 | | samples | 1.12e+04 | | step | 1.4e+03 | | vb | 0.00382 | | vb_q0 | 0.0139 | | vb_q1 | 1.97e-05 | | vb_q2 | 1.12e-05 | | vb_q3 | 8.81e-06 | --------------------------- --------------------------- | grad_norm | 5.71 | | loss | 0.00288 | | loss_cal | 0.091 | | loss_cal_q0 | 0.096 | | loss_cal_q1 | 0.0945 | | loss_cal_q2 | 0.0852 | | loss_cal_q3 | 0.0884 | | loss_diff | 0.00276 | | loss_diff_q0 | 0.00752 | | loss_diff_q1 | 0.00215 | | loss_diff_q2 | 0.000808 | | loss_diff_q3 | 0.000399 | | loss_q0 | 0.00798 | | loss_q1 | 0.00217 | | loss_q2 | 0.000816 | | loss_q3 | 0.000404 | | param_norm | 234 | | samples | 1.2e+04 | | step | 1.5e+03 | | vb | 0.000125 | | vb_q0 | 0.00046 | | vb_q1 | 1.6e-05 | | vb_q2 | 7.42e-06 | | vb_q3 | 5.1e-06 | --------------------------- --------------------------- | grad_norm | 5.54 | | loss | 0.00612 | | loss_cal | 0.0855 | | loss_cal_q0 | 0.0896 | | loss_cal_q1 | 0.0939 | | loss_cal_q2 | 0.0825 | | loss_cal_q3 | 0.0746 | | loss_diff | 0.00326 | | loss_diff_q0 | 0.00938 | | loss_diff_q1 | 0.002 | | loss_diff_q2 | 0.000881 | | loss_diff_q3 | 0.000433 | | loss_q0 | 0.0205 | | loss_q1 | 0.00201 | | loss_q2 | 0.000889 | | loss_q3 | 0.000439 | | param_norm | 234 | | samples | 1.28e+04 | | step | 1.6e+03 | | vb | 0.00286 | | vb_q0 | 0.0111 | | vb_q1 | 1.49e-05 | | vb_q2 | 8.12e-06 | | vb_q3 | 5.53e-06 | --------------------------- --------------------------- | grad_norm | 5.44 | | loss | 0.00316 | | loss_cal | 0.0858 | | loss_cal_q0 | 0.0868 | | loss_cal_q1 | 0.089 | | loss_cal_q2 | 0.0739 | | loss_cal_q3 | 0.0947 | | loss_diff | 0.00296 | | loss_diff_q0 | 0.00811 | | loss_diff_q1 | 0.00208 | | loss_diff_q2 | 0.000798 | | loss_diff_q3 | 0.000406 | | loss_q0 | 0.00885 | | loss_q1 | 0.00209 | | loss_q2 | 0.000805 | | loss_q3 | 0.000411 | | param_norm | 234 | | samples | 1.36e+04 | | step | 1.7e+03 | | vb | 0.000204 | | vb_q0 | 0.000744 | | vb_q1 | 1.54e-05 | | vb_q2 | 7.35e-06 | | vb_q3 | 5.16e-06 | --------------------------- --------------------------- | grad_norm | 5.21 | | loss | 0.00653 | | loss_cal | 0.089 | | loss_cal_q0 | 0.0889 | | loss_cal_q1 | 0.0885 | | loss_cal_q2 | 0.092 | | loss_cal_q3 | 0.0873 | | loss_diff | 0.00335 | | loss_diff_q0 | 0.00983 | | loss_diff_q1 | 0.00205 | | loss_diff_q2 | 0.000831 | | loss_diff_q3 | 0.000417 | | loss_q0 | 0.0222 | | loss_q1 | 0.00207 | | loss_q2 | 0.000839 | | loss_q3 | 0.000422 | | param_norm | 234 | | samples | 1.44e+04 | | step | 1.8e+03 | | vb | 0.00317 | | vb_q0 | 0.0124 | | vb_q1 | 1.53e-05 | | vb_q2 | 7.74e-06 | | vb_q3 | 5.39e-06 | --------------------------- --------------------------- | grad_norm | 4.83 | | loss | 0.00715 | | loss_cal | 0.0841 | | loss_cal_q0 | 0.0898 | | loss_cal_q1 | 0.0764 | | loss_cal_q2 | 0.0929 | | loss_cal_q3 | 0.0796 | | loss_diff | 0.00338 | | loss_diff_q0 | 0.00984 | | loss_diff_q1 | 0.00215 | | loss_diff_q2 | 0.000834 | | loss_diff_q3 | 0.000407 | | loss_q0 | 0.0246 | | loss_q1 | 0.00217 | | loss_q2 | 0.000841 | | loss_q3 | 0.000413 | | param_norm | 234 | | samples | 1.52e+04 | | step | 1.9e+03 | | vb | 0.00377 | | vb_q0 | 0.0147 | | vb_q1 | 1.62e-05 | | vb_q2 | 7.7e-06 | | vb_q3 | 5.21e-06 | --------------------------- --------------------------- | grad_norm | 5.07 | | loss | 0.00564 | | loss_cal | 0.0831 | | loss_cal_q0 | 0.0812 | | loss_cal_q1 | 0.0775 | | loss_cal_q2 | 0.0854 | | loss_cal_q3 | 0.0891 | | loss_diff | 0.0037 | | loss_diff_q0 | 0.0101 | | loss_diff_q1 | 0.0024 | | loss_diff_q2 | 0.00104 | | loss_diff_q3 | 0.000549 | | loss_q0 | 0.0173 | | loss_q1 | 0.00242 | | loss_q2 | 0.00104 | | loss_q3 | 0.000556 | | param_norm | 234 | | samples | 1.6e+04 | | step | 2e+03 | | vb | 0.00194 | | vb_q0 | 0.00719 | | vb_q1 | 1.8e-05 | | vb_q2 | 9.64e-06 | | vb_q3 | 7.02e-06 | --------------------------- --------------------------- | grad_norm | 4.94 | | loss | 0.00394 | | loss_cal | 0.079 | | loss_cal_q0 | 0.0953 | | loss_cal_q1 | 0.0775 | | loss_cal_q2 | 0.0761 | | loss_cal_q3 | 0.0688 | | loss_diff | 0.00349 | | loss_diff_q0 | 0.0104 | | loss_diff_q1 | 0.00223 | | loss_diff_q2 | 0.000974 | | loss_diff_q3 | 0.000476 | | loss_q0 | 0.0122 | | loss_q1 | 0.00225 | | loss_q2 | 0.000983 | | loss_q3 | 0.000482 | | param_norm | 234 | | samples | 1.68e+04 | | step | 2.1e+03 | | vb | 0.000458 | | vb_q0 | 0.00182 | | vb_q1 | 1.65e-05 | | vb_q2 | 8.94e-06 | | vb_q3 | 6.02e-06 | --------------------------- --------------------------- | grad_norm | 5.17 | | loss | 0.00309 | | loss_cal | 0.0815 | | loss_cal_q0 | 0.0858 | | loss_cal_q1 | 0.0828 | | loss_cal_q2 | 0.078 | | loss_cal_q3 | 0.0788 | | loss_diff | 0.00293 | | loss_diff_q0 | 0.00832 | | loss_diff_q1 | 0.00195 | | loss_diff_q2 | 0.000731 | | loss_diff_q3 | 0.000384 | | loss_q0 | 0.0089 | | loss_q1 | 0.00197 | | loss_q2 | 0.000737 | | loss_q3 | 0.000388 | | param_norm | 234 | | samples | 1.76e+04 | | step | 2.2e+03 | | vb | 0.000159 | | vb_q0 | 0.00058 | | vb_q1 | 1.45e-05 | | vb_q2 | 6.7e-06 | | vb_q3 | 4.86e-06 | --------------------------- --------------------------- | grad_norm | 5.02 | | loss | 0.00259 | | loss_cal | 0.0828 | | loss_cal_q0 | 0.0879 | | loss_cal_q1 | 0.0781 | | loss_cal_q2 | 0.0876 | | loss_cal_q3 | 0.0778 | | loss_diff | 0.00248 | | loss_diff_q0 | 0.00662 | | loss_diff_q1 | 0.00186 | | loss_diff_q2 | 0.000682 | | loss_diff_q3 | 0.000299 | | loss_q0 | 0.007 | | loss_q1 | 0.00187 | | loss_q2 | 0.000688 | | loss_q3 | 0.000303 | | param_norm | 234 | | samples | 1.84e+04 | | step | 2.3e+03 | | vb | 0.000112 | | vb_q0 | 0.000383 | | vb_q1 | 1.39e-05 | | vb_q2 | 6.28e-06 | | vb_q3 | 3.81e-06 | --------------------------- --------------------------- | grad_norm | 4.75 | | loss | 0.00255 | | loss_cal | 0.0785 | | loss_cal_q0 | 0.0888 | | loss_cal_q1 | 0.0751 | | loss_cal_q2 | 0.0718 | | loss_cal_q3 | 0.0789 | | loss_diff | 0.00244 | | loss_diff_q0 | 0.00677 | | loss_diff_q1 | 0.00178 | | loss_diff_q2 | 0.000651 | | loss_diff_q3 | 0.000271 | | loss_q0 | 0.0072 | | loss_q1 | 0.0018 | | loss_q2 | 0.000657 | | loss_q3 | 0.000274 | | param_norm | 234 | | samples | 1.92e+04 | | step | 2.4e+03 | | vb | 0.000117 | | vb_q0 | 0.000435 | | vb_q1 | 1.33e-05 | | vb_q2 | 5.98e-06 | | vb_q3 | 3.5e-06 | --------------------------- --------------------------- | grad_norm | 4.7 | | loss | 0.00616 | | loss_cal | 0.0784 | | loss_cal_q0 | 0.084 | | loss_cal_q1 | 0.0818 | | loss_cal_q2 | 0.0709 | | loss_cal_q3 | 0.0761 | | loss_diff | 0.00283 | | loss_diff_q0 | 0.00822 | | loss_diff_q1 | 0.00204 | | loss_diff_q2 | 0.000731 | | loss_diff_q3 | 0.000365 | | loss_q0 | 0.0219 | | loss_q1 | 0.00205 | | loss_q2 | 0.000738 | | loss_q3 | 0.000369 | | param_norm | 234 | | samples | 2e+04 | | step | 2.5e+03 | | vb | 0.00333 | | vb_q0 | 0.0136 | | vb_q1 | 1.53e-05 | | vb_q2 | 6.73e-06 | | vb_q3 | 4.69e-06 | --------------------------- --------------------------- | grad_norm | 4.61 | | loss | 0.00298 | | loss_cal | 0.0752 | | loss_cal_q0 | 0.0748 | | loss_cal_q1 | 0.0684 | | loss_cal_q2 | 0.0772 | | loss_cal_q3 | 0.0819 | | loss_diff | 0.00255 | | loss_diff_q0 | 0.00694 | | loss_diff_q1 | 0.00182 | | loss_diff_q2 | 0.000656 | | loss_diff_q3 | 0.000341 | | loss_q0 | 0.00853 | | loss_q1 | 0.00183 | | loss_q2 | 0.000663 | | loss_q3 | 0.000345 | | param_norm | 234 | | samples | 2.08e+04 | | step | 2.6e+03 | | vb | 0.000424 | | vb_q0 | 0.00159 | | vb_q1 | 1.37e-05 | | vb_q2 | 6.12e-06 | | vb_q3 | 4.36e-06 | --------------------------- --------------------------- | grad_norm | 4.96 | | loss | 0.0024 | | loss_cal | 0.0785 | | loss_cal_q0 | 0.0747 | | loss_cal_q1 | 0.0823 | | loss_cal_q2 | 0.0837 | | loss_cal_q3 | 0.0742 | | loss_diff | 0.00226 | | loss_diff_q0 | 0.00598 | | loss_diff_q1 | 0.00182 | | loss_diff_q2 | 0.000641 | | loss_diff_q3 | 0.00025 | | loss_q0 | 0.00646 | | loss_q1 | 0.00183 | | loss_q2 | 0.000647 | | loss_q3 | 0.000253 | | param_norm | 234 | | samples | 2.16e+04 | | step | 2.7e+03 | | vb | 0.000136 | | vb_q0 | 0.000482 | | vb_q1 | 1.35e-05 | | vb_q2 | 5.91e-06 | | vb_q3 | 3.21e-06 | --------------------------- --------------------------- | grad_norm | 4.67 | | loss | 0.00436 | | loss_cal | 0.0765 | | loss_cal_q0 | 0.0776 | | loss_cal_q1 | 0.0838 | | loss_cal_q2 | 0.0736 | | loss_cal_q3 | 0.0723 | | loss_diff | 0.00238 | | loss_diff_q0 | 0.00687 | | loss_diff_q1 | 0.00191 | | loss_diff_q2 | 0.000639 | | loss_diff_q3 | 0.0003 | | loss_q0 | 0.015 | | loss_q1 | 0.00193 | | loss_q2 | 0.000645 | | loss_q3 | 0.000304 | | param_norm | 234 | | samples | 2.24e+04 | | step | 2.8e+03 | | vb | 0.00198 | | vb_q0 | 0.00818 | | vb_q1 | 1.44e-05 | | vb_q2 | 5.86e-06 | | vb_q3 | 3.81e-06 | --------------------------- --------------------------- | grad_norm | 4.5 | | loss | 0.00891 | | loss_cal | 0.0738 | | loss_cal_q0 | 0.0783 | | loss_cal_q1 | 0.0683 | | loss_cal_q2 | 0.0773 | | loss_cal_q3 | 0.0722 | | loss_diff | 0.00602 | | loss_diff_q0 | 0.0176 | | loss_diff_q1 | 0.00323 | | loss_diff_q2 | 0.00152 | | loss_diff_q3 | 0.000948 | | loss_q0 | 0.0286 | | loss_q1 | 0.00325 | | loss_q2 | 0.00153 | | loss_q3 | 0.00096 | | param_norm | 234 | | samples | 2.32e+04 | | step | 2.9e+03 | | vb | 0.00289 | | vb_q0 | 0.011 | | vb_q1 | 2.41e-05 | | vb_q2 | 1.39e-05 | | vb_q3 | 1.21e-05 | --------------------------- --------------------------- | grad_norm | 4.62 | | loss | 0.00531 | | loss_cal | 0.0787 | | loss_cal_q0 | 0.083 | | loss_cal_q1 | 0.0763 | | loss_cal_q2 | 0.0825 | | loss_cal_q3 | 0.073 | | loss_diff | 0.00332 | | loss_diff_q0 | 0.011 | | loss_diff_q1 | 0.0024 | | loss_diff_q2 | 0.00105 | | loss_diff_q3 | 0.000528 | | loss_q0 | 0.0206 | | loss_q1 | 0.00241 | | loss_q2 | 0.00106 | | loss_q3 | 0.000535 | | param_norm | 235 | | samples | 2.4e+04 | | step | 3e+03 | | vb | 0.002 | | vb_q0 | 0.00958 | | vb_q1 | 1.79e-05 | | vb_q2 | 9.56e-06 | | vb_q3 | 6.73e-06 | --------------------------- --------------------------- | grad_norm | 3.9 | | loss | 0.00528 | | loss_cal | 0.0682 | | loss_cal_q0 | 0.0727 | | loss_cal_q1 | 0.0564 | | loss_cal_q2 | 0.0667 | | loss_cal_q3 | 0.0762 | | loss_diff | 0.00368 | | loss_diff_q0 | 0.0106 | | loss_diff_q1 | 0.00232 | | loss_diff_q2 | 0.000998 | | loss_diff_q3 | 0.000445 | | loss_q0 | 0.0167 | | loss_q1 | 0.00234 | | loss_q2 | 0.00101 | | loss_q3 | 0.00045 | | param_norm | 235 | | samples | 2.48e+04 | | step | 3.1e+03 | | vb | 0.0016 | | vb_q0 | 0.00611 | | vb_q1 | 1.73e-05 | | vb_q2 | 9.18e-06 | | vb_q3 | 5.63e-06 | --------------------------- --------------------------- | grad_norm | 4.25 | | loss | 0.00226 | | loss_cal | 0.075 | | loss_cal_q0 | 0.0738 | | loss_cal_q1 | 0.0788 | | loss_cal_q2 | 0.0731 | | loss_cal_q3 | 0.0741 | | loss_diff | 0.00217 | | loss_diff_q0 | 0.00541 | | loss_diff_q1 | 0.00181 | | loss_diff_q2 | 0.000619 | | loss_diff_q3 | 0.000245 | | loss_q0 | 0.00569 | | loss_q1 | 0.00182 | | loss_q2 | 0.000625 | | loss_q3 | 0.000249 | | param_norm | 235 | | samples | 2.56e+04 | | step | 3.2e+03 | | vb | 8.5e-05 | | vb_q0 | 0.000287 | | vb_q1 | 1.35e-05 | | vb_q2 | 5.69e-06 | | vb_q3 | 3.1e-06 | --------------------------- --------------------------- | grad_norm | 4.51 | | loss | 0.00205 | | loss_cal | 0.0709 | | loss_cal_q0 | 0.0732 | | loss_cal_q1 | 0.0737 | | loss_cal_q2 | 0.0678 | | loss_cal_q3 | 0.0687 | | loss_diff | 0.00196 | | loss_diff_q0 | 0.00612 | | loss_diff_q1 | 0.00166 | | loss_diff_q2 | 0.000643 | | loss_diff_q3 | 0.000235 | | loss_q0 | 0.00651 | | loss_q1 | 0.00167 | | loss_q2 | 0.000648 | | loss_q3 | 0.000238 | | param_norm | 235 | | samples | 2.64e+04 | | step | 3.3e+03 | | vb | 9.16e-05 | | vb_q0 | 0.000397 | | vb_q1 | 1.23e-05 | | vb_q2 | 5.81e-06 | | vb_q3 | 2.99e-06 | --------------------------- --------------------------- | grad_norm | 4.41 | | loss | 0.00387 | | loss_cal | 0.0715 | | loss_cal_q0 | 0.0705 | | loss_cal_q1 | 0.0646 | | loss_cal_q2 | 0.0772 | | loss_cal_q3 | 0.073 | | loss_diff | 0.00232 | | loss_diff_q0 | 0.00658 | | loss_diff_q1 | 0.00171 | | loss_diff_q2 | 0.000637 | | loss_diff_q3 | 0.000254 | | loss_q0 | 0.0126 | | loss_q1 | 0.00172 | | loss_q2 | 0.000642 | | loss_q3 | 0.000257 | | param_norm | 235 | | samples | 2.72e+04 | | step | 3.4e+03 | | vb | 0.00155 | | vb_q0 | 0.006 | | vb_q1 | 1.27e-05 | | vb_q2 | 5.77e-06 | | vb_q3 | 3.24e-06 | --------------------------- --------------------------- | grad_norm | 4.25 | | loss | 0.00377 | | loss_cal | 0.0707 | | loss_cal_q0 | 0.0692 | | loss_cal_q1 | 0.0708 | | loss_cal_q2 | 0.0732 | | loss_cal_q3 | 0.0699 | | loss_diff | 0.00239 | | loss_diff_q0 | 0.00693 | | loss_diff_q1 | 0.00175 | | loss_diff_q2 | 0.000561 | | loss_diff_q3 | 0.000258 | | loss_q0 | 0.0125 | | loss_q1 | 0.00177 | | loss_q2 | 0.000566 | | loss_q3 | 0.000261 | | param_norm | 235 | | samples | 2.8e+04 | | step | 3.5e+03 | | vb | 0.00138 | | vb_q0 | 0.00556 | | vb_q1 | 1.3e-05 | | vb_q2 | 5.25e-06 | | vb_q3 | 3.29e-06 | --------------------------- --------------------------- | grad_norm | 4.54 | | loss | 0.00303 | | loss_cal | 0.0683 | | loss_cal_q0 | 0.0711 | | loss_cal_q1 | 0.0685 | | loss_cal_q2 | 0.0645 | | loss_cal_q3 | 0.0664 | | loss_diff | 0.00256 | | loss_diff_q0 | 0.00756 | | loss_diff_q1 | 0.00182 | | loss_diff_q2 | 0.000676 | | loss_diff_q3 | 0.000306 | | loss_q0 | 0.00946 | | loss_q1 | 0.00184 | | loss_q2 | 0.000682 | | loss_q3 | 0.00031 | | param_norm | 235 | | samples | 2.88e+04 | | step | 3.6e+03 | | vb | 0.000469 | | vb_q0 | 0.00191 | | vb_q1 | 1.36e-05 | | vb_q2 | 6.15e-06 | | vb_q3 | 3.89e-06 | --------------------------- --------------------------- | grad_norm | 3.98 | | loss | 0.00199 | | loss_cal | 0.0683 | | loss_cal_q0 | 0.0662 | | loss_cal_q1 | 0.062 | | loss_cal_q2 | 0.0685 | | loss_cal_q3 | 0.0743 | | loss_diff | 0.00191 | | loss_diff_q0 | 0.00499 | | loss_diff_q1 | 0.00166 | | loss_diff_q2 | 0.000578 | | loss_diff_q3 | 0.000232 | | loss_q0 | 0.00528 | | loss_q1 | 0.00167 | | loss_q2 | 0.000583 | | loss_q3 | 0.000235 | | param_norm | 235 | | samples | 2.96e+04 | | step | 3.7e+03 | | vb | 8e-05 | | vb_q0 | 0.000286 | | vb_q1 | 1.24e-05 | | vb_q2 | 5.21e-06 | | vb_q3 | 2.92e-06 | --------------------------- --------------------------- | grad_norm | 4.15 | | loss | 0.0055 | | loss_cal | 0.0682 | | loss_cal_q0 | 0.0656 | | loss_cal_q1 | 0.0635 | | loss_cal_q2 | 0.0783 | | loss_cal_q3 | 0.0676 | | loss_diff | 0.00264 | | loss_diff_q0 | 0.00738 | | loss_diff_q1 | 0.00179 | | loss_diff_q2 | 0.000712 | | loss_diff_q3 | 0.000337 | | loss_q0 | 0.0185 | | loss_q1 | 0.00181 | | loss_q2 | 0.000719 | | loss_q3 | 0.000342 | | param_norm | 235 | | samples | 3.04e+04 | | step | 3.8e+03 | | vb | 0.00287 | | vb_q0 | 0.0112 | | vb_q1 | 1.34e-05 | | vb_q2 | 6.49e-06 | | vb_q3 | 4.27e-06 | --------------------------- --------------------------- | grad_norm | 4.14 | | loss | 0.00607 | | loss_cal | 0.0694 | | loss_cal_q0 | 0.0733 | | loss_cal_q1 | 0.0724 | | loss_cal_q2 | 0.0652 | | loss_cal_q3 | 0.0662 | | loss_diff | 0.00266 | | loss_diff_q0 | 0.00793 | | loss_diff_q1 | 0.00189 | | loss_diff_q2 | 0.00068 | | loss_diff_q3 | 0.000324 | | loss_q0 | 0.0218 | | loss_q1 | 0.0019 | | loss_q2 | 0.000686 | | loss_q3 | 0.000328 | | param_norm | 235 | | samples | 3.12e+04 | | step | 3.9e+03 | | vb | 0.00341 | | vb_q0 | 0.0138 | | vb_q1 | 1.4e-05 | | vb_q2 | 6.22e-06 | | vb_q3 | 4.1e-06 | --------------------------- --------------------------- | grad_norm | 4.2 | | loss | 0.00627 | | loss_cal | 0.0646 | | loss_cal_q0 | 0.0575 | | loss_cal_q1 | 0.0655 | | loss_cal_q2 | 0.0591 | | loss_cal_q3 | 0.0753 | | loss_diff | 0.00342 | | loss_diff_q0 | 0.0103 | | loss_diff_q1 | 0.00204 | | loss_diff_q2 | 0.000851 | | loss_diff_q3 | 0.000446 | | loss_q0 | 0.0216 | | loss_q1 | 0.00205 | | loss_q2 | 0.000859 | | loss_q3 | 0.000452 | | param_norm | 235 | | samples | 3.2e+04 | | step | 4e+03 | | vb | 0.00284 | | vb_q0 | 0.0113 | | vb_q1 | 1.51e-05 | | vb_q2 | 7.79e-06 | | vb_q3 | 5.68e-06 | --------------------------- --------------------------- | grad_norm | 4.15 | | loss | 0.00361 | | loss_cal | 0.068 | | loss_cal_q0 | 0.0672 | | loss_cal_q1 | 0.0594 | | loss_cal_q2 | 0.0679 | | loss_cal_q3 | 0.0778 | | loss_diff | 0.00212 | | loss_diff_q0 | 0.00622 | | loss_diff_q1 | 0.0017 | | loss_diff_q2 | 0.000623 | | loss_diff_q3 | 0.000238 | | loss_q0 | 0.0127 | | loss_q1 | 0.00172 | | loss_q2 | 0.000629 | | loss_q3 | 0.000242 | | param_norm | 235 | | samples | 3.28e+04 | | step | 4.1e+03 | | vb | 0.00149 | | vb_q0 | 0.00647 | | vb_q1 | 1.27e-05 | | vb_q2 | 5.66e-06 | | vb_q3 | 3.08e-06 | --------------------------- --------------------------- | grad_norm | 3.85 | | loss | 0.00342 | | loss_cal | 0.0633 | | loss_cal_q0 | 0.0578 | | loss_cal_q1 | 0.0657 | | loss_cal_q2 | 0.0637 | | loss_cal_q3 | 0.0652 | | loss_diff | 0.00194 | | loss_diff_q0 | 0.00532 | | loss_diff_q1 | 0.00167 | | loss_diff_q2 | 0.000573 | | loss_diff_q3 | 0.000211 | | loss_q0 | 0.0114 | | loss_q1 | 0.00168 | | loss_q2 | 0.000578 | | loss_q3 | 0.000213 | | param_norm | 235 | | samples | 3.36e+04 | | step | 4.2e+03 | | vb | 0.00148 | | vb_q0 | 0.00612 | | vb_q1 | 1.24e-05 | | vb_q2 | 5.15e-06 | | vb_q3 | 2.71e-06 | --------------------------- --------------------------- | grad_norm | 3.92 | | loss | 0.00335 | | loss_cal | 0.0662 | | loss_cal_q0 | 0.0618 | | loss_cal_q1 | 0.0678 | | loss_cal_q2 | 0.0593 | | loss_cal_q3 | 0.0751 | | loss_diff | 0.0019 | | loss_diff_q0 | 0.00536 | | loss_diff_q1 | 0.00169 | | loss_diff_q2 | 0.000539 | | loss_diff_q3 | 0.000209 | | loss_q0 | 0.0113 | | loss_q1 | 0.0017 | | loss_q2 | 0.000544 | | loss_q3 | 0.000211 | | param_norm | 235 | | samples | 3.44e+04 | | step | 4.3e+03 | | vb | 0.00145 | | vb_q0 | 0.00594 | | vb_q1 | 1.26e-05 | | vb_q2 | 4.89e-06 | | vb_q3 | 2.66e-06 | --------------------------- --------------------------- | grad_norm | 4.07 | | loss | 0.00235 | | loss_cal | 0.0636 | | loss_cal_q0 | 0.0564 | | loss_cal_q1 | 0.0651 | | loss_cal_q2 | 0.0673 | | loss_cal_q3 | 0.0668 | | loss_diff | 0.002 | | loss_diff_q0 | 0.00501 | | loss_diff_q1 | 0.00172 | | loss_diff_q2 | 0.000534 | | loss_diff_q3 | 0.000209 | | loss_q0 | 0.00626 | | loss_q1 | 0.00173 | | loss_q2 | 0.000539 | | loss_q3 | 0.000211 | | param_norm | 235 | | samples | 3.52e+04 | | step | 4.4e+03 | | vb | 0.000345 | | vb_q0 | 0.00125 | | vb_q1 | 1.27e-05 | | vb_q2 | 4.85e-06 | | vb_q3 | 2.68e-06 | --------------------------- --------------------------- | grad_norm | 4.08 | | loss | 0.00224 | | loss_cal | 0.0642 | | loss_cal_q0 | 0.0663 | | loss_cal_q1 | 0.0678 | | loss_cal_q2 | 0.071 | | loss_cal_q3 | 0.0528 | | loss_diff | 0.00211 | | loss_diff_q0 | 0.00584 | | loss_diff_q1 | 0.00176 | | loss_diff_q2 | 0.000605 | | loss_diff_q3 | 0.000214 | | loss_q0 | 0.00634 | | loss_q1 | 0.00177 | | loss_q2 | 0.000611 | | loss_q3 | 0.000217 | | param_norm | 235 | | samples | 3.6e+04 | | step | 4.5e+03 | | vb | 0.000131 | | vb_q0 | 0.000502 | | vb_q1 | 1.31e-05 | | vb_q2 | 5.45e-06 | | vb_q3 | 2.74e-06 | --------------------------- --------------------------- | grad_norm | 4.04 | | loss | 0.00315 | | loss_cal | 0.064 | | loss_cal_q0 | 0.0706 | | loss_cal_q1 | 0.0598 | | loss_cal_q2 | 0.0616 | | loss_cal_q3 | 0.0618 | | loss_diff | 0.00185 | | loss_diff_q0 | 0.00529 | | loss_diff_q1 | 0.00167 | | loss_diff_q2 | 0.000535 | | loss_diff_q3 | 0.00021 | | loss_q0 | 0.0108 | | loss_q1 | 0.00169 | | loss_q2 | 0.00054 | | loss_q3 | 0.000212 | | param_norm | 235 | | samples | 3.68e+04 | | step | 4.6e+03 | | vb | 0.0013 | | vb_q0 | 0.0055 | | vb_q1 | 1.24e-05 | | vb_q2 | 4.9e-06 | | vb_q3 | 2.65e-06 | --------------------------- --------------------------- | grad_norm | 4.09 | | loss | 0.00305 | | loss_cal | 0.0655 | | loss_cal_q0 | 0.0627 | | loss_cal_q1 | 0.0764 | | loss_cal_q2 | 0.0613 | | loss_cal_q3 | 0.0618 | | loss_diff | 0.00197 | | loss_diff_q0 | 0.00526 | | loss_diff_q1 | 0.00165 | | loss_diff_q2 | 0.000518 | | loss_diff_q3 | 0.000204 | | loss_q0 | 0.00942 | | loss_q1 | 0.00166 | | loss_q2 | 0.000522 | | loss_q3 | 0.000206 | | param_norm | 235 | | samples | 3.76e+04 | | step | 4.7e+03 | | vb | 0.00108 | | vb_q0 | 0.00416 | | vb_q1 | 1.23e-05 | | vb_q2 | 4.78e-06 | | vb_q3 | 2.59e-06 | --------------------------- --------------------------- | grad_norm | 3.82 | | loss | 0.00149 | | loss_cal | 0.061 | | loss_cal_q0 | 0.0665 | | loss_cal_q1 | 0.0615 | | loss_cal_q2 | 0.061 | | loss_cal_q3 | 0.0557 | | loss_diff | 0.00145 | | loss_diff_q0 | 0.00392 | | loss_diff_q1 | 0.00146 | | loss_diff_q2 | 0.00048 | | loss_diff_q3 | 0.000158 | | loss_q0 | 0.00406 | | loss_q1 | 0.00147 | | loss_q2 | 0.000485 | | loss_q3 | 0.00016 | | param_norm | 235 | | samples | 3.84e+04 | | step | 4.8e+03 | | vb | 3.97e-05 | | vb_q0 | 0.000147 | | vb_q1 | 1.09e-05 | | vb_q2 | 4.43e-06 | | vb_q3 | 1.99e-06 | --------------------------- --------------------------- | grad_norm | 3.57 | | loss | 0.00348 | | loss_cal | 0.0613 | | loss_cal_q0 | 0.0631 | | loss_cal_q1 | 0.0537 | | loss_cal_q2 | 0.0544 | | loss_cal_q3 | 0.0749 | | loss_diff | 0.0018 | | loss_diff_q0 | 0.00453 | | loss_diff_q1 | 0.00162 | | loss_diff_q2 | 0.000486 | | loss_diff_q3 | 0.00021 | | loss_q0 | 0.0107 | | loss_q1 | 0.00163 | | loss_q2 | 0.000491 | | loss_q3 | 0.000213 | | param_norm | 235 | | samples | 3.92e+04 | | step | 4.9e+03 | | vb | 0.00167 | | vb_q0 | 0.00616 | | vb_q1 | 1.2e-05 | | vb_q2 | 4.51e-06 | | vb_q3 | 2.66e-06 | --------------------------- --------------------------- | grad_norm | 3.91 | | loss | 0.00366 | | loss_cal | 0.0597 | | loss_cal_q0 | 0.0542 | | loss_cal_q1 | 0.0603 | | loss_cal_q2 | 0.0678 | | loss_cal_q3 | 0.0578 | | loss_diff | 0.00194 | | loss_diff_q0 | 0.00535 | | loss_diff_q1 | 0.0016 | | loss_diff_q2 | 0.000523 | | loss_diff_q3 | 0.000192 | | loss_q0 | 0.0119 | | loss_q1 | 0.00162 | | loss_q2 | 0.000528 | | loss_q3 | 0.000194 | | param_norm | 235 | | samples | 4e+04 | | step | 5e+03 | | vb | 0.00172 | | vb_q0 | 0.00659 | | vb_q1 | 1.19e-05 | | vb_q2 | 4.77e-06 | | vb_q3 | 2.46e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 4.07 | | loss | 0.00223 | | loss_cal | 0.0621 | | loss_cal_q0 | 0.0654 | | loss_cal_q1 | 0.0599 | | loss_cal_q2 | 0.0613 | | loss_cal_q3 | 0.0622 | | loss_diff | 0.00205 | | loss_diff_q0 | 0.00587 | | loss_diff_q1 | 0.00167 | | loss_diff_q2 | 0.000539 | | loss_diff_q3 | 0.000229 | | loss_q0 | 0.00658 | | loss_q1 | 0.00169 | | loss_q2 | 0.000544 | | loss_q3 | 0.000232 | | param_norm | 236 | | samples | 4.08e+04 | | step | 5.1e+03 | | vb | 0.000179 | | vb_q0 | 0.000717 | | vb_q1 | 1.25e-05 | | vb_q2 | 4.95e-06 | | vb_q3 | 2.96e-06 | --------------------------- --------------------------- | grad_norm | 3.78 | | loss | 0.00327 | | loss_cal | 0.0629 | | loss_cal_q0 | 0.063 | | loss_cal_q1 | 0.0618 | | loss_cal_q2 | 0.0632 | | loss_cal_q3 | 0.0605 | | loss_diff | 0.00159 | | loss_diff_q0 | 0.00408 | | loss_diff_q1 | 0.00157 | | loss_diff_q2 | 0.000474 | | loss_diff_q3 | 0.000165 | | loss_q0 | 0.011 | | loss_q1 | 0.00158 | | loss_q2 | 0.000478 | | loss_q3 | 0.000167 | | param_norm | 236 | | samples | 4.16e+04 | | step | 5.2e+03 | | vb | 0.00168 | | vb_q0 | 0.00687 | | vb_q1 | 1.16e-05 | | vb_q2 | 4.3e-06 | | vb_q3 | 2.09e-06 | --------------------------- --------------------------- | grad_norm | 3.65 | | loss | 0.00323 | | loss_cal | 0.0599 | | loss_cal_q0 | 0.0594 | | loss_cal_q1 | 0.0572 | | loss_cal_q2 | 0.061 | | loss_cal_q3 | 0.0621 | | loss_diff | 0.00172 | | loss_diff_q0 | 0.00452 | | loss_diff_q1 | 0.00163 | | loss_diff_q2 | 0.000571 | | loss_diff_q3 | 0.000214 | | loss_q0 | 0.0107 | | loss_q1 | 0.00164 | | loss_q2 | 0.000576 | | loss_q3 | 0.000217 | | param_norm | 236 | | samples | 4.24e+04 | | step | 5.3e+03 | | vb | 0.00151 | | vb_q0 | 0.00621 | | vb_q1 | 1.21e-05 | | vb_q2 | 5.19e-06 | | vb_q3 | 2.7e-06 | --------------------------- --------------------------- | grad_norm | 3.44 | | loss | 0.00312 | | loss_cal | 0.0593 | | loss_cal_q0 | 0.0565 | | loss_cal_q1 | 0.0521 | | loss_cal_q2 | 0.0661 | | loss_cal_q3 | 0.0633 | | loss_diff | 0.0016 | | loss_diff_q0 | 0.0044 | | loss_diff_q1 | 0.0015 | | loss_diff_q2 | 0.000458 | | loss_diff_q3 | 0.000166 | | loss_q0 | 0.0108 | | loss_q1 | 0.00151 | | loss_q2 | 0.000462 | | loss_q3 | 0.000168 | | param_norm | 236 | | samples | 4.32e+04 | | step | 5.4e+03 | | vb | 0.00152 | | vb_q0 | 0.0064 | | vb_q1 | 1.12e-05 | | vb_q2 | 4.19e-06 | | vb_q3 | 2.12e-06 | --------------------------- --------------------------- | grad_norm | 3.35 | | loss | 0.00407 | | loss_cal | 0.0587 | | loss_cal_q0 | 0.0555 | | loss_cal_q1 | 0.0529 | | loss_cal_q2 | 0.0554 | | loss_cal_q3 | 0.0665 | | loss_diff | 0.00181 | | loss_diff_q0 | 0.00495 | | loss_diff_q1 | 0.0016 | | loss_diff_q2 | 0.000508 | | loss_diff_q3 | 0.000189 | | loss_q0 | 0.0141 | | loss_q1 | 0.00161 | | loss_q2 | 0.000512 | | loss_q3 | 0.000191 | | param_norm | 236 | | samples | 4.4e+04 | | step | 5.5e+03 | | vb | 0.00226 | | vb_q0 | 0.00913 | | vb_q1 | 1.19e-05 | | vb_q2 | 4.64e-06 | | vb_q3 | 2.41e-06 | --------------------------- --------------------------- | grad_norm | 3.66 | | loss | 0.00504 | | loss_cal | 0.0611 | | loss_cal_q0 | 0.0586 | | loss_cal_q1 | 0.0629 | | loss_cal_q2 | 0.0649 | | loss_cal_q3 | 0.0577 | | loss_diff | 0.00248 | | loss_diff_q0 | 0.00699 | | loss_diff_q1 | 0.00171 | | loss_diff_q2 | 0.000628 | | loss_diff_q3 | 0.000311 | | loss_q0 | 0.0169 | | loss_q1 | 0.00173 | | loss_q2 | 0.000634 | | loss_q3 | 0.000315 | | param_norm | 236 | | samples | 4.48e+04 | | step | 5.6e+03 | | vb | 0.00255 | | vb_q0 | 0.00994 | | vb_q1 | 1.27e-05 | | vb_q2 | 5.75e-06 | | vb_q3 | 3.97e-06 | --------------------------- --------------------------- | grad_norm | 3.62 | | loss | 0.00408 | | loss_cal | 0.0549 | | loss_cal_q0 | 0.0517 | | loss_cal_q1 | 0.0586 | | loss_cal_q2 | 0.0568 | | loss_cal_q3 | 0.0518 | | loss_diff | 0.00243 | | loss_diff_q0 | 0.00663 | | loss_diff_q1 | 0.00181 | | loss_diff_q2 | 0.000604 | | loss_diff_q3 | 0.000299 | | loss_q0 | 0.0129 | | loss_q1 | 0.00182 | | loss_q2 | 0.00061 | | loss_q3 | 0.000302 | | param_norm | 236 | | samples | 4.56e+04 | | step | 5.7e+03 | | vb | 0.00166 | | vb_q0 | 0.00623 | | vb_q1 | 1.34e-05 | | vb_q2 | 5.55e-06 | | vb_q3 | 3.82e-06 | --------------------------- --------------------------- | grad_norm | 3.59 | | loss | 0.00463 | | loss_cal | 0.0584 | | loss_cal_q0 | 0.0602 | | loss_cal_q1 | 0.0561 | | loss_cal_q2 | 0.0589 | | loss_cal_q3 | 0.0579 | | loss_diff | 0.00289 | | loss_diff_q0 | 0.00865 | | loss_diff_q1 | 0.00194 | | loss_diff_q2 | 0.000824 | | loss_diff_q3 | 0.000439 | | loss_q0 | 0.0158 | | loss_q1 | 0.00196 | | loss_q2 | 0.000832 | | loss_q3 | 0.000444 | | param_norm | 236 | | samples | 4.64e+04 | | step | 5.8e+03 | | vb | 0.00174 | | vb_q0 | 0.00719 | | vb_q1 | 1.45e-05 | | vb_q2 | 7.63e-06 | | vb_q3 | 5.63e-06 | --------------------------- --------------------------- | grad_norm | 3.52 | | loss | 0.00501 | | loss_cal | 0.0562 | | loss_cal_q0 | 0.0604 | | loss_cal_q1 | 0.0543 | | loss_cal_q2 | 0.0518 | | loss_cal_q3 | 0.0587 | | loss_diff | 0.00273 | | loss_diff_q0 | 0.00789 | | loss_diff_q1 | 0.0018 | | loss_diff_q2 | 0.000697 | | loss_diff_q3 | 0.000367 | | loss_q0 | 0.0169 | | loss_q1 | 0.00182 | | loss_q2 | 0.000703 | | loss_q3 | 0.000371 | | param_norm | 236 | | samples | 4.72e+04 | | step | 5.9e+03 | | vb | 0.00228 | | vb_q0 | 0.00897 | | vb_q1 | 1.34e-05 | | vb_q2 | 6.41e-06 | | vb_q3 | 4.73e-06 | --------------------------- --------------------------- | grad_norm | 3.46 | | loss | 0.00292 | | loss_cal | 0.0569 | | loss_cal_q0 | 0.0628 | | loss_cal_q1 | 0.0578 | | loss_cal_q2 | 0.0561 | | loss_cal_q3 | 0.0518 | | loss_diff | 0.00189 | | loss_diff_q0 | 0.00552 | | loss_diff_q1 | 0.00161 | | loss_diff_q2 | 0.000507 | | loss_diff_q3 | 0.000203 | | loss_q0 | 0.00994 | | loss_q1 | 0.00162 | | loss_q2 | 0.000511 | | loss_q3 | 0.000206 | | param_norm | 236 | | samples | 4.8e+04 | | step | 6e+03 | | vb | 0.00103 | | vb_q0 | 0.00442 | | vb_q1 | 1.19e-05 | | vb_q2 | 4.62e-06 | | vb_q3 | 2.58e-06 | --------------------------- --------------------------- | grad_norm | 3.46 | | loss | 0.00164 | | loss_cal | 0.0576 | | loss_cal_q0 | 0.0609 | | loss_cal_q1 | 0.0547 | | loss_cal_q2 | 0.0576 | | loss_cal_q3 | 0.0567 | | loss_diff | 0.0016 | | loss_diff_q0 | 0.00398 | | loss_diff_q1 | 0.00155 | | loss_diff_q2 | 0.000466 | | loss_diff_q3 | 0.000149 | | loss_q0 | 0.00414 | | loss_q1 | 0.00156 | | loss_q2 | 0.00047 | | loss_q3 | 0.000151 | | param_norm | 236 | | samples | 4.88e+04 | | step | 6.1e+03 | | vb | 4.56e-05 | | vb_q0 | 0.000154 | | vb_q1 | 1.15e-05 | | vb_q2 | 4.21e-06 | | vb_q3 | 1.9e-06 | --------------------------- --------------------------- | grad_norm | 3.46 | | loss | 0.00452 | | loss_cal | 0.0564 | | loss_cal_q0 | 0.0548 | | loss_cal_q1 | 0.0556 | | loss_cal_q2 | 0.052 | | loss_cal_q3 | 0.0633 | | loss_diff | 0.00306 | | loss_diff_q0 | 0.00849 | | loss_diff_q1 | 0.00192 | | loss_diff_q2 | 0.000742 | | loss_diff_q3 | 0.000401 | | loss_q0 | 0.0137 | | loss_q1 | 0.00194 | | loss_q2 | 0.000748 | | loss_q3 | 0.000406 | | param_norm | 236 | | samples | 4.96e+04 | | step | 6.2e+03 | | vb | 0.00145 | | vb_q0 | 0.00524 | | vb_q1 | 1.43e-05 | | vb_q2 | 6.87e-06 | | vb_q3 | 5.08e-06 | --------------------------- --------------------------- | grad_norm | 3.26 | | loss | 0.0033 | | loss_cal | 0.0569 | | loss_cal_q0 | 0.0629 | | loss_cal_q1 | 0.0561 | | loss_cal_q2 | 0.057 | | loss_cal_q3 | 0.0519 | | loss_diff | 0.00175 | | loss_diff_q0 | 0.00473 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.000512 | | loss_diff_q3 | 0.000177 | | loss_q0 | 0.0107 | | loss_q1 | 0.00153 | | loss_q2 | 0.000517 | | loss_q3 | 0.000179 | | param_norm | 236 | | samples | 5.04e+04 | | step | 6.3e+03 | | vb | 0.00155 | | vb_q0 | 0.00594 | | vb_q1 | 1.13e-05 | | vb_q2 | 4.67e-06 | | vb_q3 | 2.25e-06 | --------------------------- --------------------------- | grad_norm | 3.47 | | loss | 0.00158 | | loss_cal | 0.0553 | | loss_cal_q0 | 0.0569 | | loss_cal_q1 | 0.0543 | | loss_cal_q2 | 0.0572 | | loss_cal_q3 | 0.0528 | | loss_diff | 0.00152 | | loss_diff_q0 | 0.00394 | | loss_diff_q1 | 0.00143 | | loss_diff_q2 | 0.000429 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.00417 | | loss_q1 | 0.00144 | | loss_q2 | 0.000433 | | loss_q3 | 0.000141 | | param_norm | 236 | | samples | 5.12e+04 | | step | 6.4e+03 | | vb | 6.4e-05 | | vb_q0 | 0.00023 | | vb_q1 | 1.06e-05 | | vb_q2 | 3.97e-06 | | vb_q3 | 1.77e-06 | --------------------------- --------------------------- | grad_norm | 3.32 | | loss | 0.00138 | | loss_cal | 0.0568 | | loss_cal_q0 | 0.058 | | loss_cal_q1 | 0.0569 | | loss_cal_q2 | 0.0641 | | loss_cal_q3 | 0.0502 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.00336 | | loss_diff_q1 | 0.00157 | | loss_diff_q2 | 0.00043 | | loss_diff_q3 | 0.000135 | | loss_q0 | 0.0035 | | loss_q1 | 0.00158 | | loss_q2 | 0.000434 | | loss_q3 | 0.000136 | | param_norm | 236 | | samples | 5.2e+04 | | step | 6.5e+03 | | vb | 3.57e-05 | | vb_q0 | 0.000136 | | vb_q1 | 1.16e-05 | | vb_q2 | 3.93e-06 | | vb_q3 | 1.75e-06 | --------------------------- --------------------------- | grad_norm | 3.1 | | loss | 0.00177 | | loss_cal | 0.0549 | | loss_cal_q0 | 0.0628 | | loss_cal_q1 | 0.0534 | | loss_cal_q2 | 0.0528 | | loss_cal_q3 | 0.0499 | | loss_diff | 0.00162 | | loss_diff_q0 | 0.00417 | | loss_diff_q1 | 0.00154 | | loss_diff_q2 | 0.000495 | | loss_diff_q3 | 0.000168 | | loss_q0 | 0.0047 | | loss_q1 | 0.00156 | | loss_q2 | 0.000499 | | loss_q3 | 0.00017 | | param_norm | 236 | | samples | 5.28e+04 | | step | 6.6e+03 | | vb | 0.000145 | | vb_q0 | 0.000533 | | vb_q1 | 1.15e-05 | | vb_q2 | 4.43e-06 | | vb_q3 | 2.14e-06 | --------------------------- --------------------------- | grad_norm | 3.53 | | loss | 0.0032 | | loss_cal | 0.0579 | | loss_cal_q0 | 0.0582 | | loss_cal_q1 | 0.056 | | loss_cal_q2 | 0.0652 | | loss_cal_q3 | 0.0525 | | loss_diff | 0.00166 | | loss_diff_q0 | 0.00487 | | loss_diff_q1 | 0.00153 | | loss_diff_q2 | 0.000484 | | loss_diff_q3 | 0.000179 | | loss_q0 | 0.0116 | | loss_q1 | 0.00154 | | loss_q2 | 0.000488 | | loss_q3 | 0.000182 | | param_norm | 236 | | samples | 5.36e+04 | | step | 6.7e+03 | | vb | 0.00154 | | vb_q0 | 0.00673 | | vb_q1 | 1.14e-05 | | vb_q2 | 4.4e-06 | | vb_q3 | 2.28e-06 | --------------------------- --------------------------- | grad_norm | 3.31 | | loss | 0.00144 | | loss_cal | 0.0535 | | loss_cal_q0 | 0.0536 | | loss_cal_q1 | 0.0507 | | loss_cal_q2 | 0.0533 | | loss_cal_q3 | 0.0569 | | loss_diff | 0.00137 | | loss_diff_q0 | 0.00373 | | loss_diff_q1 | 0.00147 | | loss_diff_q2 | 0.000436 | | loss_diff_q3 | 0.000166 | | loss_q0 | 0.004 | | loss_q1 | 0.00148 | | loss_q2 | 0.00044 | | loss_q3 | 0.000168 | | param_norm | 236 | | samples | 5.44e+04 | | step | 6.8e+03 | | vb | 6.52e-05 | | vb_q0 | 0.00027 | | vb_q1 | 1.09e-05 | | vb_q2 | 3.99e-06 | | vb_q3 | 2.1e-06 | --------------------------- --------------------------- | grad_norm | 3.21 | | loss | 0.00157 | | loss_cal | 0.0542 | | loss_cal_q0 | 0.0505 | | loss_cal_q1 | 0.0492 | | loss_cal_q2 | 0.0543 | | loss_cal_q3 | 0.0617 | | loss_diff | 0.00147 | | loss_diff_q0 | 0.00396 | | loss_diff_q1 | 0.00149 | | loss_diff_q2 | 0.000399 | | loss_diff_q3 | 0.000152 | | loss_q0 | 0.00438 | | loss_q1 | 0.0015 | | loss_q2 | 0.000402 | | loss_q3 | 0.000154 | | param_norm | 236 | | samples | 5.52e+04 | | step | 6.9e+03 | | vb | 0.000108 | | vb_q0 | 0.00042 | | vb_q1 | 1.11e-05 | | vb_q2 | 3.66e-06 | | vb_q3 | 1.91e-06 | --------------------------- --------------------------- | grad_norm | 3.28 | | loss | 0.00277 | | loss_cal | 0.0528 | | loss_cal_q0 | 0.0567 | | loss_cal_q1 | 0.052 | | loss_cal_q2 | 0.0547 | | loss_cal_q3 | 0.0476 | | loss_diff | 0.00176 | | loss_diff_q0 | 0.00527 | | loss_diff_q1 | 0.00154 | | loss_diff_q2 | 0.000446 | | loss_diff_q3 | 0.00017 | | loss_q0 | 0.0095 | | loss_q1 | 0.00155 | | loss_q2 | 0.00045 | | loss_q3 | 0.000172 | | param_norm | 236 | | samples | 5.6e+04 | | step | 7e+03 | | vb | 0.001 | | vb_q0 | 0.00423 | | vb_q1 | 1.14e-05 | | vb_q2 | 4.1e-06 | | vb_q3 | 2.18e-06 | --------------------------- --------------------------- | grad_norm | 3.52 | | loss | 0.00182 | | loss_cal | 0.0559 | | loss_cal_q0 | 0.0552 | | loss_cal_q1 | 0.054 | | loss_cal_q2 | 0.0582 | | loss_cal_q3 | 0.0574 | | loss_diff | 0.00172 | | loss_diff_q0 | 0.0045 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.00052 | | loss_diff_q3 | 0.000208 | | loss_q0 | 0.00487 | | loss_q1 | 0.00153 | | loss_q2 | 0.000525 | | loss_q3 | 0.00021 | | param_norm | 236 | | samples | 5.68e+04 | | step | 7.1e+03 | | vb | 9.8e-05 | | vb_q0 | 0.000364 | | vb_q1 | 1.13e-05 | | vb_q2 | 4.77e-06 | | vb_q3 | 2.63e-06 | --------------------------- --------------------------- | grad_norm | 3.54 | | loss | 0.0015 | | loss_cal | 0.0543 | | loss_cal_q0 | 0.0646 | | loss_cal_q1 | 0.0505 | | loss_cal_q2 | 0.0468 | | loss_cal_q3 | 0.0516 | | loss_diff | 0.00145 | | loss_diff_q0 | 0.00402 | | loss_diff_q1 | 0.00143 | | loss_diff_q2 | 0.000402 | | loss_diff_q3 | 0.000134 | | loss_q0 | 0.00417 | | loss_q1 | 0.00144 | | loss_q2 | 0.000405 | | loss_q3 | 0.000136 | | param_norm | 236 | | samples | 5.76e+04 | | step | 7.2e+03 | | vb | 4.12e-05 | | vb_q0 | 0.000156 | | vb_q1 | 1.06e-05 | | vb_q2 | 3.65e-06 | | vb_q3 | 1.71e-06 | --------------------------- --------------------------- | grad_norm | 3.28 | | loss | 0.00188 | | loss_cal | 0.0535 | | loss_cal_q0 | 0.054 | | loss_cal_q1 | 0.0493 | | loss_cal_q2 | 0.0569 | | loss_cal_q3 | 0.052 | | loss_diff | 0.00172 | | loss_diff_q0 | 0.00459 | | loss_diff_q1 | 0.00144 | | loss_diff_q2 | 0.000501 | | loss_diff_q3 | 0.000201 | | loss_q0 | 0.00524 | | loss_q1 | 0.00145 | | loss_q2 | 0.000506 | | loss_q3 | 0.000204 | | param_norm | 237 | | samples | 5.84e+04 | | step | 7.3e+03 | | vb | 0.000168 | | vb_q0 | 0.000649 | | vb_q1 | 1.07e-05 | | vb_q2 | 4.58e-06 | | vb_q3 | 2.58e-06 | --------------------------- --------------------------- | grad_norm | 3.25 | | loss | 0.00387 | | loss_cal | 0.0514 | | loss_cal_q0 | 0.0575 | | loss_cal_q1 | 0.0463 | | loss_cal_q2 | 0.0476 | | loss_cal_q3 | 0.0544 | | loss_diff | 0.00168 | | loss_diff_q0 | 0.0047 | | loss_diff_q1 | 0.00153 | | loss_diff_q2 | 0.000476 | | loss_diff_q3 | 0.000162 | | loss_q0 | 0.0138 | | loss_q1 | 0.00154 | | loss_q2 | 0.00048 | | loss_q3 | 0.000164 | | param_norm | 237 | | samples | 5.92e+04 | | step | 7.4e+03 | | vb | 0.00219 | | vb_q0 | 0.0091 | | vb_q1 | 1.14e-05 | | vb_q2 | 4.29e-06 | | vb_q3 | 2.08e-06 | --------------------------- --------------------------- | grad_norm | 3.11 | | loss | 0.00199 | | loss_cal | 0.0529 | | loss_cal_q0 | 0.052 | | loss_cal_q1 | 0.0477 | | loss_cal_q2 | 0.0506 | | loss_cal_q3 | 0.0596 | | loss_diff | 0.00166 | | loss_diff_q0 | 0.00437 | | loss_diff_q1 | 0.0015 | | loss_diff_q2 | 0.000499 | | loss_diff_q3 | 0.000198 | | loss_q0 | 0.0056 | | loss_q1 | 0.00151 | | loss_q2 | 0.000503 | | loss_q3 | 0.000201 | | param_norm | 237 | | samples | 6e+04 | | step | 7.5e+03 | | vb | 0.000329 | | vb_q0 | 0.00123 | | vb_q1 | 1.12e-05 | | vb_q2 | 4.5e-06 | | vb_q3 | 2.52e-06 | --------------------------- --------------------------- | grad_norm | 3.46 | | loss | 0.00247 | | loss_cal | 0.053 | | loss_cal_q0 | 0.0534 | | loss_cal_q1 | 0.0521 | | loss_cal_q2 | 0.0541 | | loss_cal_q3 | 0.0517 | | loss_diff | 0.00229 | | loss_diff_q0 | 0.00666 | | loss_diff_q1 | 0.00187 | | loss_diff_q2 | 0.000592 | | loss_diff_q3 | 0.000266 | | loss_q0 | 0.0074 | | loss_q1 | 0.00189 | | loss_q2 | 0.000597 | | loss_q3 | 0.000269 | | param_norm | 237 | | samples | 6.08e+04 | | step | 7.6e+03 | | vb | 0.000182 | | vb_q0 | 0.00074 | | vb_q1 | 1.4e-05 | | vb_q2 | 5.5e-06 | | vb_q3 | 3.37e-06 | --------------------------- --------------------------- | grad_norm | 3.1 | | loss | 0.00497 | | loss_cal | 0.0525 | | loss_cal_q0 | 0.0586 | | loss_cal_q1 | 0.0483 | | loss_cal_q2 | 0.0532 | | loss_cal_q3 | 0.0504 | | loss_diff | 0.0025 | | loss_diff_q0 | 0.00706 | | loss_diff_q1 | 0.00176 | | loss_diff_q2 | 0.000627 | | loss_diff_q3 | 0.000287 | | loss_q0 | 0.0166 | | loss_q1 | 0.00177 | | loss_q2 | 0.000633 | | loss_q3 | 0.00029 | | param_norm | 237 | | samples | 6.16e+04 | | step | 7.7e+03 | | vb | 0.00247 | | vb_q0 | 0.00952 | | vb_q1 | 1.31e-05 | | vb_q2 | 5.69e-06 | | vb_q3 | 3.67e-06 | --------------------------- --------------------------- | grad_norm | 3.43 | | loss | 0.00296 | | loss_cal | 0.0532 | | loss_cal_q0 | 0.0486 | | loss_cal_q1 | 0.0561 | | loss_cal_q2 | 0.0522 | | loss_cal_q3 | 0.0556 | | loss_diff | 0.00147 | | loss_diff_q0 | 0.00383 | | loss_diff_q1 | 0.00138 | | loss_diff_q2 | 0.00046 | | loss_diff_q3 | 0.000157 | | loss_q0 | 0.00958 | | loss_q1 | 0.00139 | | loss_q2 | 0.000465 | | loss_q3 | 0.000159 | | param_norm | 237 | | samples | 6.24e+04 | | step | 7.8e+03 | | vb | 0.00149 | | vb_q0 | 0.00574 | | vb_q1 | 1.03e-05 | | vb_q2 | 4.19e-06 | | vb_q3 | 1.98e-06 | --------------------------- --------------------------- | grad_norm | 3.17 | | loss | 0.00331 | | loss_cal | 0.0514 | | loss_cal_q0 | 0.0616 | | loss_cal_q1 | 0.0506 | | loss_cal_q2 | 0.0451 | | loss_cal_q3 | 0.0501 | | loss_diff | 0.00181 | | loss_diff_q0 | 0.00536 | | loss_diff_q1 | 0.00168 | | loss_diff_q2 | 0.00048 | | loss_diff_q3 | 0.000193 | | loss_q0 | 0.0121 | | loss_q1 | 0.00169 | | loss_q2 | 0.000484 | | loss_q3 | 0.000195 | | param_norm | 237 | | samples | 6.32e+04 | | step | 7.9e+03 | | vb | 0.00151 | | vb_q0 | 0.00671 | | vb_q1 | 1.27e-05 | | vb_q2 | 4.36e-06 | | vb_q3 | 2.47e-06 | --------------------------- --------------------------- | grad_norm | 3.08 | | loss | 0.00347 | | loss_cal | 0.0512 | | loss_cal_q0 | 0.0504 | | loss_cal_q1 | 0.0452 | | loss_cal_q2 | 0.0564 | | loss_cal_q3 | 0.0528 | | loss_diff | 0.00205 | | loss_diff_q0 | 0.00554 | | loss_diff_q1 | 0.00157 | | loss_diff_q2 | 0.00057 | | loss_diff_q3 | 0.000267 | | loss_q0 | 0.0108 | | loss_q1 | 0.00158 | | loss_q2 | 0.000575 | | loss_q3 | 0.000271 | | param_norm | 237 | | samples | 6.4e+04 | | step | 8e+03 | | vb | 0.00142 | | vb_q0 | 0.0053 | | vb_q1 | 1.18e-05 | | vb_q2 | 5.18e-06 | | vb_q3 | 3.39e-06 | --------------------------- --------------------------- | grad_norm | 3.39 | | loss | 0.00158 | | loss_cal | 0.052 | | loss_cal_q0 | 0.0472 | | loss_cal_q1 | 0.0561 | | loss_cal_q2 | 0.0509 | | loss_cal_q3 | 0.0532 | | loss_diff | 0.00149 | | loss_diff_q0 | 0.00391 | | loss_diff_q1 | 0.00149 | | loss_diff_q2 | 0.000425 | | loss_diff_q3 | 0.00015 | | loss_q0 | 0.00425 | | loss_q1 | 0.0015 | | loss_q2 | 0.000429 | | loss_q3 | 0.000152 | | param_norm | 237 | | samples | 6.48e+04 | | step | 8.1e+03 | | vb | 8.77e-05 | | vb_q0 | 0.00034 | | vb_q1 | 1.11e-05 | | vb_q2 | 3.9e-06 | | vb_q3 | 1.92e-06 | --------------------------- --------------------------- | grad_norm | 3.11 | | loss | 0.00132 | | loss_cal | 0.0519 | | loss_cal_q0 | 0.0515 | | loss_cal_q1 | 0.0506 | | loss_cal_q2 | 0.0485 | | loss_cal_q3 | 0.0534 | | loss_diff | 0.00128 | | loss_diff_q0 | 0.00358 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000386 | | loss_diff_q3 | 0.000116 | | loss_q0 | 0.00374 | | loss_q1 | 0.00136 | | loss_q2 | 0.00039 | | loss_q3 | 0.000117 | | param_norm | 237 | | samples | 6.56e+04 | | step | 8.2e+03 | | vb | 3.95e-05 | | vb_q0 | 0.000156 | | vb_q1 | 9.97e-06 | | vb_q2 | 3.47e-06 | | vb_q3 | 1.48e-06 | --------------------------- --------------------------- | grad_norm | 3.3 | | loss | 0.00147 | | loss_cal | 0.0503 | | loss_cal_q0 | 0.0486 | | loss_cal_q1 | 0.0531 | | loss_cal_q2 | 0.0486 | | loss_cal_q3 | 0.0521 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00371 | | loss_diff_q1 | 0.00138 | | loss_diff_q2 | 0.000404 | | loss_diff_q3 | 0.000131 | | loss_q0 | 0.00395 | | loss_q1 | 0.00139 | | loss_q2 | 0.000408 | | loss_q3 | 0.000133 | | param_norm | 237 | | samples | 6.64e+04 | | step | 8.3e+03 | | vb | 6.53e-05 | | vb_q0 | 0.00024 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.7e-06 | | vb_q3 | 1.67e-06 | --------------------------- --------------------------- | grad_norm | 3.37 | | loss | 0.00147 | | loss_cal | 0.0507 | | loss_cal_q0 | 0.0495 | | loss_cal_q1 | 0.0478 | | loss_cal_q2 | 0.0513 | | loss_cal_q3 | 0.0534 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00366 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.000384 | | loss_diff_q3 | 0.000126 | | loss_q0 | 0.00389 | | loss_q1 | 0.00146 | | loss_q2 | 0.000388 | | loss_q3 | 0.000127 | | param_norm | 237 | | samples | 6.72e+04 | | step | 8.4e+03 | | vb | 5.93e-05 | | vb_q0 | 0.000223 | | vb_q1 | 1.07e-05 | | vb_q2 | 3.52e-06 | | vb_q3 | 1.62e-06 | --------------------------- --------------------------- | grad_norm | 2.96 | | loss | 0.00515 | | loss_cal | 0.0511 | | loss_cal_q0 | 0.0547 | | loss_cal_q1 | 0.0496 | | loss_cal_q2 | 0.0516 | | loss_cal_q3 | 0.0472 | | loss_diff | 0.00213 | | loss_diff_q0 | 0.00579 | | loss_diff_q1 | 0.00157 | | loss_diff_q2 | 0.000486 | | loss_diff_q3 | 0.000215 | | loss_q0 | 0.0169 | | loss_q1 | 0.00158 | | loss_q2 | 0.00049 | | loss_q3 | 0.000217 | | param_norm | 237 | | samples | 6.8e+04 | | step | 8.5e+03 | | vb | 0.00302 | | vb_q0 | 0.0111 | | vb_q1 | 1.17e-05 | | vb_q2 | 4.5e-06 | | vb_q3 | 2.74e-06 | --------------------------- --------------------------- | grad_norm | 3.02 | | loss | 0.00319 | | loss_cal | 0.0486 | | loss_cal_q0 | 0.044 | | loss_cal_q1 | 0.05 | | loss_cal_q2 | 0.0494 | | loss_cal_q3 | 0.0508 | | loss_diff | 0.00202 | | loss_diff_q0 | 0.00571 | | loss_diff_q1 | 0.00157 | | loss_diff_q2 | 0.000484 | | loss_diff_q3 | 0.000228 | | loss_q0 | 0.0103 | | loss_q1 | 0.00158 | | loss_q2 | 0.000488 | | loss_q3 | 0.000231 | | param_norm | 237 | | samples | 6.88e+04 | | step | 8.6e+03 | | vb | 0.00117 | | vb_q0 | 0.00461 | | vb_q1 | 1.18e-05 | | vb_q2 | 4.47e-06 | | vb_q3 | 2.91e-06 | --------------------------- --------------------------- | grad_norm | 3.15 | | loss | 0.0026 | | loss_cal | 0.0499 | | loss_cal_q0 | 0.0475 | | loss_cal_q1 | 0.0483 | | loss_cal_q2 | 0.0483 | | loss_cal_q3 | 0.0543 | | loss_diff | 0.0022 | | loss_diff_q0 | 0.00675 | | loss_diff_q1 | 0.00177 | | loss_diff_q2 | 0.000658 | | loss_diff_q3 | 0.000341 | | loss_q0 | 0.00854 | | loss_q1 | 0.00178 | | loss_q2 | 0.000664 | | loss_q3 | 0.000346 | | param_norm | 237 | | samples | 6.96e+04 | | step | 8.7e+03 | | vb | 0.000398 | | vb_q0 | 0.00179 | | vb_q1 | 1.32e-05 | | vb_q2 | 6.05e-06 | | vb_q3 | 4.39e-06 | --------------------------- --------------------------- | grad_norm | 3.27 | | loss | 0.00741 | | loss_cal | 0.0508 | | loss_cal_q0 | 0.0529 | | loss_cal_q1 | 0.0508 | | loss_cal_q2 | 0.0535 | | loss_cal_q3 | 0.0463 | | loss_diff | 0.00253 | | loss_diff_q0 | 0.00791 | | loss_diff_q1 | 0.00155 | | loss_diff_q2 | 0.00053 | | loss_diff_q3 | 0.000244 | | loss_q0 | 0.0277 | | loss_q1 | 0.00156 | | loss_q2 | 0.000535 | | loss_q3 | 0.000247 | | param_norm | 237 | | samples | 7.04e+04 | | step | 8.8e+03 | | vb | 0.00488 | | vb_q0 | 0.0198 | | vb_q1 | 1.15e-05 | | vb_q2 | 4.85e-06 | | vb_q3 | 3.08e-06 | --------------------------- --------------------------- | grad_norm | 3 | | loss | 0.0094 | | loss_cal | 0.05 | | loss_cal_q0 | 0.0503 | | loss_cal_q1 | 0.0478 | | loss_cal_q2 | 0.0559 | | loss_cal_q3 | 0.045 | | loss_diff | 0.00543 | | loss_diff_q0 | 0.0163 | | loss_diff_q1 | 0.00333 | | loss_diff_q2 | 0.00131 | | loss_diff_q3 | 0.000748 | | loss_q0 | 0.032 | | loss_q1 | 0.00336 | | loss_q2 | 0.00132 | | loss_q3 | 0.000758 | | param_norm | 238 | | samples | 7.12e+04 | | step | 8.9e+03 | | vb | 0.00396 | | vb_q0 | 0.0157 | | vb_q1 | 2.49e-05 | | vb_q2 | 1.21e-05 | | vb_q3 | 9.66e-06 | --------------------------- --------------------------- | grad_norm | 3 | | loss | 0.00426 | | loss_cal | 0.0489 | | loss_cal_q0 | 0.0527 | | loss_cal_q1 | 0.0477 | | loss_cal_q2 | 0.0492 | | loss_cal_q3 | 0.0461 | | loss_diff | 0.00263 | | loss_diff_q0 | 0.00747 | | loss_diff_q1 | 0.00168 | | loss_diff_q2 | 0.000632 | | loss_diff_q3 | 0.000253 | | loss_q0 | 0.0135 | | loss_q1 | 0.00169 | | loss_q2 | 0.000637 | | loss_q3 | 0.000256 | | param_norm | 238 | | samples | 7.2e+04 | | step | 9e+03 | | vb | 0.00163 | | vb_q0 | 0.00607 | | vb_q1 | 1.24e-05 | | vb_q2 | 5.74e-06 | | vb_q3 | 3.22e-06 | --------------------------- --------------------------- | grad_norm | 2.98 | | loss | 0.00413 | | loss_cal | 0.0477 | | loss_cal_q0 | 0.0479 | | loss_cal_q1 | 0.0451 | | loss_cal_q2 | 0.0504 | | loss_cal_q3 | 0.0469 | | loss_diff | 0.00179 | | loss_diff_q0 | 0.00504 | | loss_diff_q1 | 0.00158 | | loss_diff_q2 | 0.000462 | | loss_diff_q3 | 0.000163 | | loss_q0 | 0.0145 | | loss_q1 | 0.00159 | | loss_q2 | 0.000466 | | loss_q3 | 0.000165 | | param_norm | 238 | | samples | 7.28e+04 | | step | 9.1e+03 | | vb | 0.00234 | | vb_q0 | 0.00948 | | vb_q1 | 1.17e-05 | | vb_q2 | 4.22e-06 | | vb_q3 | 2.08e-06 | --------------------------- --------------------------- | grad_norm | 2.96 | | loss | 0.00504 | | loss_cal | 0.0502 | | loss_cal_q0 | 0.0503 | | loss_cal_q1 | 0.047 | | loss_cal_q2 | 0.0533 | | loss_cal_q3 | 0.0495 | | loss_diff | 0.00213 | | loss_diff_q0 | 0.00622 | | loss_diff_q1 | 0.00149 | | loss_diff_q2 | 0.000506 | | loss_diff_q3 | 0.000202 | | loss_q0 | 0.0176 | | loss_q1 | 0.0015 | | loss_q2 | 0.000511 | | loss_q3 | 0.000205 | | param_norm | 238 | | samples | 7.36e+04 | | step | 9.2e+03 | | vb | 0.00291 | | vb_q0 | 0.0114 | | vb_q1 | 1.11e-05 | | vb_q2 | 4.62e-06 | | vb_q3 | 2.56e-06 | --------------------------- --------------------------- | grad_norm | 2.9 | | loss | 0.00259 | | loss_cal | 0.0473 | | loss_cal_q0 | 0.0497 | | loss_cal_q1 | 0.0449 | | loss_cal_q2 | 0.0413 | | loss_cal_q3 | 0.0514 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00355 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.000401 | | loss_diff_q3 | 0.00013 | | loss_q0 | 0.00824 | | loss_q1 | 0.00153 | | loss_q2 | 0.000405 | | loss_q3 | 0.000131 | | param_norm | 238 | | samples | 7.44e+04 | | step | 9.3e+03 | | vb | 0.00118 | | vb_q0 | 0.00469 | | vb_q1 | 1.14e-05 | | vb_q2 | 3.64e-06 | | vb_q3 | 1.65e-06 | --------------------------- --------------------------- | grad_norm | 3.5 | | loss | 0.00386 | | loss_cal | 0.0478 | | loss_cal_q0 | 0.0488 | | loss_cal_q1 | 0.0483 | | loss_cal_q2 | 0.0449 | | loss_cal_q3 | 0.0497 | | loss_diff | 0.00199 | | loss_diff_q0 | 0.00613 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.000459 | | loss_diff_q3 | 0.000167 | | loss_q0 | 0.0139 | | loss_q1 | 0.00153 | | loss_q2 | 0.000463 | | loss_q3 | 0.000169 | | param_norm | 238 | | samples | 7.52e+04 | | step | 9.4e+03 | | vb | 0.00186 | | vb_q0 | 0.00774 | | vb_q1 | 1.14e-05 | | vb_q2 | 4.19e-06 | | vb_q3 | 2.1e-06 | --------------------------- --------------------------- | grad_norm | 2.89 | | loss | 0.00146 | | loss_cal | 0.0487 | | loss_cal_q0 | 0.0528 | | loss_cal_q1 | 0.0459 | | loss_cal_q2 | 0.0445 | | loss_cal_q3 | 0.0517 | | loss_diff | 0.0014 | | loss_diff_q0 | 0.00356 | | loss_diff_q1 | 0.00144 | | loss_diff_q2 | 0.000437 | | loss_diff_q3 | 0.000137 | | loss_q0 | 0.00378 | | loss_q1 | 0.00146 | | loss_q2 | 0.000441 | | loss_q3 | 0.000139 | | param_norm | 238 | | samples | 7.6e+04 | | step | 9.5e+03 | | vb | 5.94e-05 | | vb_q0 | 0.000216 | | vb_q1 | 1.08e-05 | | vb_q2 | 3.91e-06 | | vb_q3 | 1.73e-06 | --------------------------- --------------------------- | grad_norm | 2.85 | | loss | 0.00139 | | loss_cal | 0.0473 | | loss_cal_q0 | 0.0428 | | loss_cal_q1 | 0.0466 | | loss_cal_q2 | 0.0491 | | loss_cal_q3 | 0.0505 | | loss_diff | 0.00133 | | loss_diff_q0 | 0.00345 | | loss_diff_q1 | 0.00142 | | loss_diff_q2 | 0.000385 | | loss_diff_q3 | 0.000125 | | loss_q0 | 0.00369 | | loss_q1 | 0.00143 | | loss_q2 | 0.000389 | | loss_q3 | 0.000126 | | param_norm | 238 | | samples | 7.68e+04 | | step | 9.6e+03 | | vb | 6.17e-05 | | vb_q0 | 0.000238 | | vb_q1 | 1.06e-05 | | vb_q2 | 3.52e-06 | | vb_q3 | 1.56e-06 | --------------------------- --------------------------- | grad_norm | 2.73 | | loss | 0.00155 | | loss_cal | 0.0437 | | loss_cal_q0 | 0.0453 | | loss_cal_q1 | 0.0447 | | loss_cal_q2 | 0.0447 | | loss_cal_q3 | 0.0408 | | loss_diff | 0.00151 | | loss_diff_q0 | 0.00395 | | loss_diff_q1 | 0.00148 | | loss_diff_q2 | 0.00039 | | loss_diff_q3 | 0.000129 | | loss_q0 | 0.00408 | | loss_q1 | 0.00149 | | loss_q2 | 0.000394 | | loss_q3 | 0.00013 | | param_norm | 238 | | samples | 7.76e+04 | | step | 9.7e+03 | | vb | 3.82e-05 | | vb_q0 | 0.000134 | | vb_q1 | 1.11e-05 | | vb_q2 | 3.59e-06 | | vb_q3 | 1.64e-06 | --------------------------- --------------------------- | grad_norm | 3.1 | | loss | 0.00131 | | loss_cal | 0.0489 | | loss_cal_q0 | 0.0494 | | loss_cal_q1 | 0.0514 | | loss_cal_q2 | 0.0487 | | loss_cal_q3 | 0.0456 | | loss_diff | 0.00126 | | loss_diff_q0 | 0.00309 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000369 | | loss_diff_q3 | 0.000102 | | loss_q0 | 0.00324 | | loss_q1 | 0.0013 | | loss_q2 | 0.000372 | | loss_q3 | 0.000103 | | param_norm | 238 | | samples | 7.84e+04 | | step | 9.8e+03 | | vb | 4.32e-05 | | vb_q0 | 0.00015 | | vb_q1 | 9.66e-06 | | vb_q2 | 3.37e-06 | | vb_q3 | 1.3e-06 | --------------------------- --------------------------- | grad_norm | 2.89 | | loss | 0.00277 | | loss_cal | 0.0438 | | loss_cal_q0 | 0.0471 | | loss_cal_q1 | 0.0381 | | loss_cal_q2 | 0.0427 | | loss_cal_q3 | 0.0471 | | loss_diff | 0.00158 | | loss_diff_q0 | 0.00408 | | loss_diff_q1 | 0.00151 | | loss_diff_q2 | 0.000417 | | loss_diff_q3 | 0.000135 | | loss_q0 | 0.00859 | | loss_q1 | 0.00152 | | loss_q2 | 0.000421 | | loss_q3 | 0.000136 | | param_norm | 238 | | samples | 7.92e+04 | | step | 9.9e+03 | | vb | 0.00119 | | vb_q0 | 0.00451 | | vb_q1 | 1.13e-05 | | vb_q2 | 3.78e-06 | | vb_q3 | 1.69e-06 | --------------------------- --------------------------- | grad_norm | 2.96 | | loss | 0.00523 | | loss_cal | 0.0472 | | loss_cal_q0 | 0.0449 | | loss_cal_q1 | 0.0471 | | loss_cal_q2 | 0.0501 | | loss_cal_q3 | 0.0472 | | loss_diff | 0.00219 | | loss_diff_q0 | 0.00652 | | loss_diff_q1 | 0.00164 | | loss_diff_q2 | 0.000634 | | loss_diff_q3 | 0.000329 | | loss_q0 | 0.0196 | | loss_q1 | 0.00165 | | loss_q2 | 0.00064 | | loss_q3 | 0.000333 | | param_norm | 238 | | samples | 8e+04 | | step | 1e+04 | | vb | 0.00304 | | vb_q0 | 0.0131 | | vb_q1 | 1.24e-05 | | vb_q2 | 5.77e-06 | | vb_q3 | 4.23e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 2.9 | | loss | 0.00159 | | loss_cal | 0.0478 | | loss_cal_q0 | 0.0455 | | loss_cal_q1 | 0.0462 | | loss_cal_q2 | 0.0433 | | loss_cal_q3 | 0.0564 | | loss_diff | 0.00148 | | loss_diff_q0 | 0.00383 | | loss_diff_q1 | 0.00133 | | loss_diff_q2 | 0.000404 | | loss_diff_q3 | 0.000141 | | loss_q0 | 0.00423 | | loss_q1 | 0.00134 | | loss_q2 | 0.000408 | | loss_q3 | 0.000143 | | param_norm | 238 | | samples | 8.08e+04 | | step | 1.01e+04 | | vb | 0.000111 | | vb_q0 | 0.000405 | | vb_q1 | 9.97e-06 | | vb_q2 | 3.67e-06 | | vb_q3 | 1.78e-06 | --------------------------- --------------------------- | grad_norm | 2.98 | | loss | 0.00142 | | loss_cal | 0.0463 | | loss_cal_q0 | 0.0452 | | loss_cal_q1 | 0.0421 | | loss_cal_q2 | 0.0482 | | loss_cal_q3 | 0.0479 | | loss_diff | 0.00137 | | loss_diff_q0 | 0.00352 | | loss_diff_q1 | 0.00153 | | loss_diff_q2 | 0.000426 | | loss_diff_q3 | 0.000143 | | loss_q0 | 0.00368 | | loss_q1 | 0.00154 | | loss_q2 | 0.00043 | | loss_q3 | 0.000145 | | param_norm | 239 | | samples | 8.16e+04 | | step | 1.02e+04 | | vb | 4.33e-05 | | vb_q0 | 0.000163 | | vb_q1 | 1.14e-05 | | vb_q2 | 3.84e-06 | | vb_q3 | 1.81e-06 | --------------------------- --------------------------- | grad_norm | 2.79 | | loss | 0.00338 | | loss_cal | 0.0461 | | loss_cal_q0 | 0.0476 | | loss_cal_q1 | 0.0468 | | loss_cal_q2 | 0.0449 | | loss_cal_q3 | 0.0448 | | loss_diff | 0.00177 | | loss_diff_q0 | 0.0045 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.000471 | | loss_diff_q3 | 0.000186 | | loss_q0 | 0.0105 | | loss_q1 | 0.00153 | | loss_q2 | 0.000475 | | loss_q3 | 0.000188 | | param_norm | 239 | | samples | 8.24e+04 | | step | 1.03e+04 | | vb | 0.00161 | | vb_q0 | 0.00603 | | vb_q1 | 1.14e-05 | | vb_q2 | 4.21e-06 | | vb_q3 | 2.37e-06 | --------------------------- --------------------------- | grad_norm | 2.93 | | loss | 0.00235 | | loss_cal | 0.0482 | | loss_cal_q0 | 0.0492 | | loss_cal_q1 | 0.0449 | | loss_cal_q2 | 0.0506 | | loss_cal_q3 | 0.0482 | | loss_diff | 0.00156 | | loss_diff_q0 | 0.00439 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000367 | | loss_diff_q3 | 0.000142 | | loss_q0 | 0.00749 | | loss_q1 | 0.0013 | | loss_q2 | 0.00037 | | loss_q3 | 0.000144 | | param_norm | 239 | | samples | 8.32e+04 | | step | 1.04e+04 | | vb | 0.000789 | | vb_q0 | 0.00309 | | vb_q1 | 9.57e-06 | | vb_q2 | 3.41e-06 | | vb_q3 | 1.81e-06 | --------------------------- --------------------------- | grad_norm | 3.41 | | loss | 0.00137 | | loss_cal | 0.0481 | | loss_cal_q0 | 0.0467 | | loss_cal_q1 | 0.0481 | | loss_cal_q2 | 0.0512 | | loss_cal_q3 | 0.0464 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00374 | | loss_diff_q1 | 0.00137 | | loss_diff_q2 | 0.000343 | | loss_diff_q3 | 0.000108 | | loss_q0 | 0.00396 | | loss_q1 | 0.00139 | | loss_q2 | 0.000346 | | loss_q3 | 0.000109 | | param_norm | 239 | | samples | 8.4e+04 | | step | 1.05e+04 | | vb | 5.28e-05 | | vb_q0 | 0.000218 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.13e-06 | | vb_q3 | 1.37e-06 | --------------------------- --------------------------- | grad_norm | 2.87 | | loss | 0.0024 | | loss_cal | 0.0456 | | loss_cal_q0 | 0.047 | | loss_cal_q1 | 0.0475 | | loss_cal_q2 | 0.0466 | | loss_cal_q3 | 0.0417 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.0038 | | loss_diff_q1 | 0.00136 | | loss_diff_q2 | 0.000387 | | loss_diff_q3 | 0.00013 | | loss_q0 | 0.00833 | | loss_q1 | 0.00137 | | loss_q2 | 0.000391 | | loss_q3 | 0.000132 | | param_norm | 239 | | samples | 8.48e+04 | | step | 1.06e+04 | | vb | 0.00105 | | vb_q0 | 0.00453 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.5e-06 | | vb_q3 | 1.67e-06 | --------------------------- --------------------------- | grad_norm | 2.5 | | loss | 0.00143 | | loss_cal | 0.0447 | | loss_cal_q0 | 0.0476 | | loss_cal_q1 | 0.037 | | loss_cal_q2 | 0.0466 | | loss_cal_q3 | 0.0476 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00351 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000408 | | loss_diff_q3 | 0.000124 | | loss_q0 | 0.00404 | | loss_q1 | 0.0013 | | loss_q2 | 0.000411 | | loss_q3 | 0.000125 | | param_norm | 239 | | samples | 8.56e+04 | | step | 1.07e+04 | | vb | 0.000128 | | vb_q0 | 0.000536 | | vb_q1 | 9.67e-06 | | vb_q2 | 3.68e-06 | | vb_q3 | 1.56e-06 | --------------------------- --------------------------- | grad_norm | 2.94 | | loss | 0.00115 | | loss_cal | 0.0476 | | loss_cal_q0 | 0.0482 | | loss_cal_q1 | 0.0401 | | loss_cal_q2 | 0.0468 | | loss_cal_q3 | 0.0528 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000356 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.00305 | | loss_q1 | 0.00136 | | loss_q2 | 0.000359 | | loss_q3 | 0.000108 | | param_norm | 239 | | samples | 8.64e+04 | | step | 1.08e+04 | | vb | 2.84e-05 | | vb_q0 | 0.00011 | | vb_q1 | 1e-05 | | vb_q2 | 3.26e-06 | | vb_q3 | 1.34e-06 | --------------------------- --------------------------- | grad_norm | 2.5 | | loss | 0.00123 | | loss_cal | 0.0445 | | loss_cal_q0 | 0.0448 | | loss_cal_q1 | 0.0475 | | loss_cal_q2 | 0.0477 | | loss_cal_q3 | 0.0381 | | loss_diff | 0.00117 | | loss_diff_q0 | 0.0031 | | loss_diff_q1 | 0.00136 | | loss_diff_q2 | 0.000371 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.00337 | | loss_q1 | 0.00137 | | loss_q2 | 0.000375 | | loss_q3 | 0.000108 | | param_norm | 239 | | samples | 8.72e+04 | | step | 1.09e+04 | | vb | 6.61e-05 | | vb_q0 | 0.000273 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.35e-06 | | vb_q3 | 1.35e-06 | --------------------------- --------------------------- | grad_norm | 2.83 | | loss | 0.00238 | | loss_cal | 0.0449 | | loss_cal_q0 | 0.0404 | | loss_cal_q1 | 0.0488 | | loss_cal_q2 | 0.0475 | | loss_cal_q3 | 0.0426 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00383 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.00038 | | loss_diff_q3 | 0.000128 | | loss_q0 | 0.008 | | loss_q1 | 0.00146 | | loss_q2 | 0.000383 | | loss_q3 | 0.00013 | | param_norm | 239 | | samples | 8.8e+04 | | step | 1.1e+04 | | vb | 0.000975 | | vb_q0 | 0.00418 | | vb_q1 | 1.08e-05 | | vb_q2 | 3.47e-06 | | vb_q3 | 1.62e-06 | --------------------------- --------------------------- | grad_norm | 2.78 | | loss | 0.00167 | | loss_cal | 0.0442 | | loss_cal_q0 | 0.043 | | loss_cal_q1 | 0.0482 | | loss_cal_q2 | 0.0485 | | loss_cal_q3 | 0.0384 | | loss_diff | 0.00152 | | loss_diff_q0 | 0.00425 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000356 | | loss_diff_q3 | 0.000132 | | loss_q0 | 0.00482 | | loss_q1 | 0.00136 | | loss_q2 | 0.000359 | | loss_q3 | 0.000133 | | param_norm | 239 | | samples | 8.88e+04 | | step | 1.11e+04 | | vb | 0.000145 | | vb_q0 | 0.000564 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.28e-06 | | vb_q3 | 1.68e-06 | --------------------------- --------------------------- | grad_norm | 2.91 | | loss | 0.00353 | | loss_cal | 0.0452 | | loss_cal_q0 | 0.0467 | | loss_cal_q1 | 0.0433 | | loss_cal_q2 | 0.0481 | | loss_cal_q3 | 0.0432 | | loss_diff | 0.00177 | | loss_diff_q0 | 0.00528 | | loss_diff_q1 | 0.00146 | | loss_diff_q2 | 0.000432 | | loss_diff_q3 | 0.000165 | | loss_q0 | 0.0128 | | loss_q1 | 0.00147 | | loss_q2 | 0.000436 | | loss_q3 | 0.000167 | | param_norm | 239 | | samples | 8.96e+04 | | step | 1.12e+04 | | vb | 0.00175 | | vb_q0 | 0.00753 | | vb_q1 | 1.09e-05 | | vb_q2 | 3.96e-06 | | vb_q3 | 2.1e-06 | --------------------------- --------------------------- | grad_norm | 2.68 | | loss | 0.00118 | | loss_cal | 0.0451 | | loss_cal_q0 | 0.0485 | | loss_cal_q1 | 0.0482 | | loss_cal_q2 | 0.0436 | | loss_cal_q3 | 0.0415 | | loss_diff | 0.00116 | | loss_diff_q0 | 0.00295 | | loss_diff_q1 | 0.0014 | | loss_diff_q2 | 0.000353 | | loss_diff_q3 | 0.000111 | | loss_q0 | 0.00306 | | loss_q1 | 0.00141 | | loss_q2 | 0.000356 | | loss_q3 | 0.000112 | | param_norm | 239 | | samples | 9.04e+04 | | step | 1.13e+04 | | vb | 2.76e-05 | | vb_q0 | 0.000104 | | vb_q1 | 1.04e-05 | | vb_q2 | 3.22e-06 | | vb_q3 | 1.41e-06 | --------------------------- --------------------------- | grad_norm | 3.08 | | loss | 0.00126 | | loss_cal | 0.0456 | | loss_cal_q0 | 0.0442 | | loss_cal_q1 | 0.0474 | | loss_cal_q2 | 0.0461 | | loss_cal_q3 | 0.0446 | | loss_diff | 0.00121 | | loss_diff_q0 | 0.00308 | | loss_diff_q1 | 0.0013 | | loss_diff_q2 | 0.000359 | | loss_diff_q3 | 0.000102 | | loss_q0 | 0.00329 | | loss_q1 | 0.00131 | | loss_q2 | 0.000363 | | loss_q3 | 0.000104 | | param_norm | 239 | | samples | 9.12e+04 | | step | 1.14e+04 | | vb | 5.7e-05 | | vb_q0 | 0.000218 | | vb_q1 | 9.75e-06 | | vb_q2 | 3.24e-06 | | vb_q3 | 1.3e-06 | --------------------------- --------------------------- | grad_norm | 2.75 | | loss | 0.00326 | | loss_cal | 0.0449 | | loss_cal_q0 | 0.0448 | | loss_cal_q1 | 0.0456 | | loss_cal_q2 | 0.0428 | | loss_cal_q3 | 0.0461 | | loss_diff | 0.00172 | | loss_diff_q0 | 0.00481 | | loss_diff_q1 | 0.00132 | | loss_diff_q2 | 0.000402 | | loss_diff_q3 | 0.000156 | | loss_q0 | 0.0107 | | loss_q1 | 0.00133 | | loss_q2 | 0.000405 | | loss_q3 | 0.000158 | | param_norm | 239 | | samples | 9.2e+04 | | step | 1.15e+04 | | vb | 0.00154 | | vb_q0 | 0.00588 | | vb_q1 | 9.89e-06 | | vb_q2 | 3.69e-06 | | vb_q3 | 1.99e-06 | --------------------------- --------------------------- | grad_norm | 2.69 | | loss | 0.0013 | | loss_cal | 0.0445 | | loss_cal_q0 | 0.0408 | | loss_cal_q1 | 0.0407 | | loss_cal_q2 | 0.0512 | | loss_cal_q3 | 0.0445 | | loss_diff | 0.00126 | | loss_diff_q0 | 0.00327 | | loss_diff_q1 | 0.00133 | | loss_diff_q2 | 0.00042 | | loss_diff_q3 | 0.000123 | | loss_q0 | 0.00342 | | loss_q1 | 0.00134 | | loss_q2 | 0.000424 | | loss_q3 | 0.000125 | | param_norm | 239 | | samples | 9.28e+04 | | step | 1.16e+04 | | vb | 3.96e-05 | | vb_q0 | 0.000145 | | vb_q1 | 9.88e-06 | | vb_q2 | 3.82e-06 | | vb_q3 | 1.57e-06 | --------------------------- --------------------------- | grad_norm | 2.71 | | loss | 0.00125 | | loss_cal | 0.044 | | loss_cal_q0 | 0.0428 | | loss_cal_q1 | 0.0488 | | loss_cal_q2 | 0.0426 | | loss_cal_q3 | 0.0416 | | loss_diff | 0.00118 | | loss_diff_q0 | 0.00299 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000359 | | loss_diff_q3 | 0.000108 | | loss_q0 | 0.00326 | | loss_q1 | 0.0013 | | loss_q2 | 0.000362 | | loss_q3 | 0.000109 | | param_norm | 239 | | samples | 9.36e+04 | | step | 1.17e+04 | | vb | 6.74e-05 | | vb_q0 | 0.000267 | | vb_q1 | 9.63e-06 | | vb_q2 | 3.27e-06 | | vb_q3 | 1.39e-06 | --------------------------- --------------------------- | grad_norm | 2.63 | | loss | 0.00234 | | loss_cal | 0.0416 | | loss_cal_q0 | 0.0382 | | loss_cal_q1 | 0.0483 | | loss_cal_q2 | 0.0381 | | loss_cal_q3 | 0.041 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00355 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000386 | | loss_diff_q3 | 0.000128 | | loss_q0 | 0.00709 | | loss_q1 | 0.00136 | | loss_q2 | 0.000389 | | loss_q3 | 0.00013 | | param_norm | 239 | | samples | 9.44e+04 | | step | 1.18e+04 | | vb | 0.000925 | | vb_q0 | 0.00354 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.5e-06 | | vb_q3 | 1.62e-06 | --------------------------- --------------------------- | grad_norm | 2.75 | | loss | 0.00142 | | loss_cal | 0.0448 | | loss_cal_q0 | 0.044 | | loss_cal_q1 | 0.0447 | | loss_cal_q2 | 0.0461 | | loss_cal_q3 | 0.0444 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00342 | | loss_diff_q1 | 0.00137 | | loss_diff_q2 | 0.000368 | | loss_diff_q3 | 0.00012 | | loss_q0 | 0.00386 | | loss_q1 | 0.00139 | | loss_q2 | 0.000372 | | loss_q3 | 0.000122 | | param_norm | 239 | | samples | 9.52e+04 | | step | 1.19e+04 | | vb | 0.00011 | | vb_q0 | 0.000446 | | vb_q1 | 1.06e-05 | | vb_q2 | 3.35e-06 | | vb_q3 | 1.52e-06 | --------------------------- --------------------------- | grad_norm | 2.81 | | loss | 0.00124 | | loss_cal | 0.0439 | | loss_cal_q0 | 0.0472 | | loss_cal_q1 | 0.0415 | | loss_cal_q2 | 0.0489 | | loss_cal_q3 | 0.0383 | | loss_diff | 0.00117 | | loss_diff_q0 | 0.00318 | | loss_diff_q1 | 0.00122 | | loss_diff_q2 | 0.000361 | | loss_diff_q3 | 0.000117 | | loss_q0 | 0.00348 | | loss_q1 | 0.00123 | | loss_q2 | 0.000364 | | loss_q3 | 0.000118 | | param_norm | 239 | | samples | 9.6e+04 | | step | 1.2e+04 | | vb | 7.42e-05 | | vb_q0 | 0.000302 | | vb_q1 | 9.08e-06 | | vb_q2 | 3.28e-06 | | vb_q3 | 1.47e-06 | --------------------------- --------------------------- | grad_norm | 2.64 | | loss | 0.00442 | | loss_cal | 0.0431 | | loss_cal_q0 | 0.0439 | | loss_cal_q1 | 0.039 | | loss_cal_q2 | 0.0415 | | loss_cal_q3 | 0.048 | | loss_diff | 0.00198 | | loss_diff_q0 | 0.00599 | | loss_diff_q1 | 0.00146 | | loss_diff_q2 | 0.000452 | | loss_diff_q3 | 0.000224 | | loss_q0 | 0.0161 | | loss_q1 | 0.00147 | | loss_q2 | 0.000456 | | loss_q3 | 0.000227 | | param_norm | 239 | | samples | 9.68e+04 | | step | 1.21e+04 | | vb | 0.00244 | | vb_q0 | 0.0101 | | vb_q1 | 1.08e-05 | | vb_q2 | 4.12e-06 | | vb_q3 | 2.85e-06 | --------------------------- --------------------------- | grad_norm | 2.82 | | loss | 0.00166 | | loss_cal | 0.0425 | | loss_cal_q0 | 0.0437 | | loss_cal_q1 | 0.0397 | | loss_cal_q2 | 0.0423 | | loss_cal_q3 | 0.0448 | | loss_diff | 0.00151 | | loss_diff_q0 | 0.00387 | | loss_diff_q1 | 0.00137 | | loss_diff_q2 | 0.00047 | | loss_diff_q3 | 0.000199 | | loss_q0 | 0.00444 | | loss_q1 | 0.00138 | | loss_q2 | 0.000474 | | loss_q3 | 0.000202 | | param_norm | 239 | | samples | 9.76e+04 | | step | 1.22e+04 | | vb | 0.000149 | | vb_q0 | 0.000573 | | vb_q1 | 1.02e-05 | | vb_q2 | 4.23e-06 | | vb_q3 | 2.56e-06 | --------------------------- --------------------------- | grad_norm | 2.79 | | loss | 0.0013 | | loss_cal | 0.0441 | | loss_cal_q0 | 0.0435 | | loss_cal_q1 | 0.0381 | | loss_cal_q2 | 0.0488 | | loss_cal_q3 | 0.0464 | | loss_diff | 0.00124 | | loss_diff_q0 | 0.00338 | | loss_diff_q1 | 0.00141 | | loss_diff_q2 | 0.000383 | | loss_diff_q3 | 0.000135 | | loss_q0 | 0.00364 | | loss_q1 | 0.00142 | | loss_q2 | 0.000387 | | loss_q3 | 0.000137 | | param_norm | 239 | | samples | 9.84e+04 | | step | 1.23e+04 | | vb | 5.92e-05 | | vb_q0 | 0.000261 | | vb_q1 | 1.05e-05 | | vb_q2 | 3.52e-06 | | vb_q3 | 1.71e-06 | --------------------------- --------------------------- | grad_norm | 2.58 | | loss | 0.00261 | | loss_cal | 0.0441 | | loss_cal_q0 | 0.0442 | | loss_cal_q1 | 0.0422 | | loss_cal_q2 | 0.0431 | | loss_cal_q3 | 0.0457 | | loss_diff | 0.00149 | | loss_diff_q0 | 0.00373 | | loss_diff_q1 | 0.00134 | | loss_diff_q2 | 0.000385 | | loss_diff_q3 | 0.000136 | | loss_q0 | 0.0077 | | loss_q1 | 0.00135 | | loss_q2 | 0.000388 | | loss_q3 | 0.000138 | | param_norm | 240 | | samples | 9.92e+04 | | step | 1.24e+04 | | vb | 0.00112 | | vb_q0 | 0.00397 | | vb_q1 | 1e-05 | | vb_q2 | 3.49e-06 | | vb_q3 | 1.76e-06 | --------------------------- --------------------------- | grad_norm | 2.66 | | loss | 0.00399 | | loss_cal | 0.0427 | | loss_cal_q0 | 0.0446 | | loss_cal_q1 | 0.0439 | | loss_cal_q2 | 0.0378 | | loss_cal_q3 | 0.0436 | | loss_diff | 0.00208 | | loss_diff_q0 | 0.00559 | | loss_diff_q1 | 0.00154 | | loss_diff_q2 | 0.000493 | | loss_diff_q3 | 0.000217 | | loss_q0 | 0.0127 | | loss_q1 | 0.00155 | | loss_q2 | 0.000497 | | loss_q3 | 0.00022 | | param_norm | 240 | | samples | 1e+05 | | step | 1.25e+04 | | vb | 0.00191 | | vb_q0 | 0.00715 | | vb_q1 | 1.16e-05 | | vb_q2 | 4.47e-06 | | vb_q3 | 2.76e-06 | --------------------------- --------------------------- | grad_norm | 2.5 | | loss | 0.00133 | | loss_cal | 0.0414 | | loss_cal_q0 | 0.045 | | loss_cal_q1 | 0.0479 | | loss_cal_q2 | 0.0338 | | loss_cal_q3 | 0.0401 | | loss_diff | 0.00129 | | loss_diff_q0 | 0.00312 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000395 | | loss_diff_q3 | 0.000132 | | loss_q0 | 0.00326 | | loss_q1 | 0.00136 | | loss_q2 | 0.000398 | | loss_q3 | 0.000133 | | param_norm | 240 | | samples | 1.01e+05 | | step | 1.26e+04 | | vb | 3.95e-05 | | vb_q0 | 0.000135 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.57e-06 | | vb_q3 | 1.66e-06 | --------------------------- --------------------------- | grad_norm | 2.43 | | loss | 0.00191 | | loss_cal | 0.0425 | | loss_cal_q0 | 0.0424 | | loss_cal_q1 | 0.0466 | | loss_cal_q2 | 0.0373 | | loss_cal_q3 | 0.0441 | | loss_diff | 0.00147 | | loss_diff_q0 | 0.00401 | | loss_diff_q1 | 0.00134 | | loss_diff_q2 | 0.000359 | | loss_diff_q3 | 0.000132 | | loss_q0 | 0.00574 | | loss_q1 | 0.00135 | | loss_q2 | 0.000363 | | loss_q3 | 0.000134 | | param_norm | 240 | | samples | 1.02e+05 | | step | 1.27e+04 | | vb | 0.00044 | | vb_q0 | 0.00174 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.29e-06 | | vb_q3 | 1.67e-06 | --------------------------- --------------------------- | grad_norm | 3.1 | | loss | 0.00128 | | loss_cal | 0.0433 | | loss_cal_q0 | 0.044 | | loss_cal_q1 | 0.0444 | | loss_cal_q2 | 0.0388 | | loss_cal_q3 | 0.0454 | | loss_diff | 0.00124 | | loss_diff_q0 | 0.00348 | | loss_diff_q1 | 0.00133 | | loss_diff_q2 | 0.000373 | | loss_diff_q3 | 0.00012 | | loss_q0 | 0.00365 | | loss_q1 | 0.00134 | | loss_q2 | 0.000376 | | loss_q3 | 0.000122 | | param_norm | 240 | | samples | 1.02e+05 | | step | 1.28e+04 | | vb | 4.71e-05 | | vb_q0 | 0.000171 | | vb_q1 | 9.95e-06 | | vb_q2 | 3.34e-06 | | vb_q3 | 1.52e-06 | --------------------------- --------------------------- | grad_norm | 2.63 | | loss | 0.00139 | | loss_cal | 0.0427 | | loss_cal_q0 | 0.0415 | | loss_cal_q1 | 0.0378 | | loss_cal_q2 | 0.0443 | | loss_cal_q3 | 0.0474 | | loss_diff | 0.00129 | | loss_diff_q0 | 0.00349 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000351 | | loss_diff_q3 | 0.000113 | | loss_q0 | 0.00387 | | loss_q1 | 0.00136 | | loss_q2 | 0.000355 | | loss_q3 | 0.000115 | | param_norm | 240 | | samples | 1.03e+05 | | step | 1.29e+04 | | vb | 9.46e-05 | | vb_q0 | 0.000382 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.18e-06 | | vb_q3 | 1.42e-06 | --------------------------- --------------------------- | grad_norm | 2.68 | | loss | 0.00147 | | loss_cal | 0.0436 | | loss_cal_q0 | 0.0435 | | loss_cal_q1 | 0.0479 | | loss_cal_q2 | 0.046 | | loss_cal_q3 | 0.0384 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.00368 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000377 | | loss_diff_q3 | 0.000111 | | loss_q0 | 0.00419 | | loss_q1 | 0.00136 | | loss_q2 | 0.00038 | | loss_q3 | 0.000112 | | param_norm | 240 | | samples | 1.04e+05 | | step | 1.3e+04 | | vb | 0.000126 | | vb_q0 | 0.000503 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.41e-06 | | vb_q3 | 1.43e-06 | --------------------------- --------------------------- | grad_norm | 2.6 | | loss | 0.00116 | | loss_cal | 0.0422 | | loss_cal_q0 | 0.04 | | loss_cal_q1 | 0.0456 | | loss_cal_q2 | 0.0457 | | loss_cal_q3 | 0.0376 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000348 | | loss_diff_q3 | 9.49e-05 | | loss_q0 | 0.00308 | | loss_q1 | 0.0013 | | loss_q2 | 0.000351 | | loss_q3 | 9.61e-05 | | param_norm | 240 | | samples | 1.05e+05 | | step | 1.31e+04 | | vb | 3.7e-05 | | vb_q0 | 0.000139 | | vb_q1 | 9.71e-06 | | vb_q2 | 3.18e-06 | | vb_q3 | 1.21e-06 | --------------------------- --------------------------- | grad_norm | 2.59 | | loss | 0.00202 | | loss_cal | 0.0426 | | loss_cal_q0 | 0.0414 | | loss_cal_q1 | 0.0436 | | loss_cal_q2 | 0.0411 | | loss_cal_q3 | 0.0452 | | loss_diff | 0.00145 | | loss_diff_q0 | 0.00403 | | loss_diff_q1 | 0.00136 | | loss_diff_q2 | 0.000387 | | loss_diff_q3 | 0.000123 | | loss_q0 | 0.00631 | | loss_q1 | 0.00137 | | loss_q2 | 0.000391 | | loss_q3 | 0.000125 | | param_norm | 240 | | samples | 1.06e+05 | | step | 1.32e+04 | | vb | 0.000575 | | vb_q0 | 0.00228 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.5e-06 | | vb_q3 | 1.58e-06 | --------------------------- --------------------------- | grad_norm | 2.51 | | loss | 0.00114 | | loss_cal | 0.0408 | | loss_cal_q0 | 0.0425 | | loss_cal_q1 | 0.0373 | | loss_cal_q2 | 0.0393 | | loss_cal_q3 | 0.0443 | | loss_diff | 0.00107 | | loss_diff_q0 | 0.00283 | | loss_diff_q1 | 0.00131 | | loss_diff_q2 | 0.000338 | | loss_diff_q3 | 0.000103 | | loss_q0 | 0.00313 | | loss_q1 | 0.00132 | | loss_q2 | 0.000342 | | loss_q3 | 0.000104 | | param_norm | 240 | | samples | 1.06e+05 | | step | 1.33e+04 | | vb | 6.98e-05 | | vb_q0 | 0.000302 | | vb_q1 | 9.77e-06 | | vb_q2 | 3.07e-06 | | vb_q3 | 1.3e-06 | --------------------------- --------------------------- | grad_norm | 2.6 | | loss | 0.00184 | | loss_cal | 0.0416 | | loss_cal_q0 | 0.0396 | | loss_cal_q1 | 0.0449 | | loss_cal_q2 | 0.0421 | | loss_cal_q3 | 0.0402 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00403 | | loss_diff_q1 | 0.00136 | | loss_diff_q2 | 0.000387 | | loss_diff_q3 | 0.000153 | | loss_q0 | 0.00595 | | loss_q1 | 0.00137 | | loss_q2 | 0.000391 | | loss_q3 | 0.000155 | | param_norm | 240 | | samples | 1.07e+05 | | step | 1.34e+04 | | vb | 0.000438 | | vb_q0 | 0.00192 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.56e-06 | | vb_q3 | 1.97e-06 | --------------------------- --------------------------- | grad_norm | 2.52 | | loss | 0.00158 | | loss_cal | 0.0404 | | loss_cal_q0 | 0.0408 | | loss_cal_q1 | 0.0433 | | loss_cal_q2 | 0.0387 | | loss_cal_q3 | 0.039 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00314 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000329 | | loss_diff_q3 | 9.67e-05 | | loss_q0 | 0.00476 | | loss_q1 | 0.00125 | | loss_q2 | 0.000332 | | loss_q3 | 9.79e-05 | | param_norm | 240 | | samples | 1.08e+05 | | step | 1.35e+04 | | vb | 0.000389 | | vb_q0 | 0.00161 | | vb_q1 | 9.3e-06 | | vb_q2 | 3.01e-06 | | vb_q3 | 1.23e-06 | --------------------------- --------------------------- | grad_norm | 2.52 | | loss | 0.00168 | | loss_cal | 0.0425 | | loss_cal_q0 | 0.0458 | | loss_cal_q1 | 0.0437 | | loss_cal_q2 | 0.0408 | | loss_cal_q3 | 0.0403 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.0036 | | loss_diff_q1 | 0.00142 | | loss_diff_q2 | 0.000391 | | loss_diff_q3 | 0.000141 | | loss_q0 | 0.00505 | | loss_q1 | 0.00143 | | loss_q2 | 0.000394 | | loss_q3 | 0.000143 | | param_norm | 240 | | samples | 1.09e+05 | | step | 1.36e+04 | | vb | 0.000338 | | vb_q0 | 0.00146 | | vb_q1 | 1.06e-05 | | vb_q2 | 3.55e-06 | | vb_q3 | 1.79e-06 | --------------------------- --------------------------- | grad_norm | 2.52 | | loss | 0.00105 | | loss_cal | 0.0403 | | loss_cal_q0 | 0.0413 | | loss_cal_q1 | 0.0388 | | loss_cal_q2 | 0.0421 | | loss_cal_q3 | 0.0381 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00252 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000343 | | loss_diff_q3 | 9.12e-05 | | loss_q0 | 0.00257 | | loss_q1 | 0.00129 | | loss_q2 | 0.000346 | | loss_q3 | 9.23e-05 | | param_norm | 240 | | samples | 1.1e+05 | | step | 1.37e+04 | | vb | 1.7e-05 | | vb_q0 | 5.44e-05 | | vb_q1 | 9.52e-06 | | vb_q2 | 3.09e-06 | | vb_q3 | 1.15e-06 | --------------------------- --------------------------- | grad_norm | 2.49 | | loss | 0.00264 | | loss_cal | 0.0423 | | loss_cal_q0 | 0.0414 | | loss_cal_q1 | 0.0422 | | loss_cal_q2 | 0.0432 | | loss_cal_q3 | 0.0433 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00392 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000388 | | loss_diff_q3 | 0.000114 | | loss_q0 | 0.00865 | | loss_q1 | 0.00118 | | loss_q2 | 0.000392 | | loss_q3 | 0.000115 | | param_norm | 240 | | samples | 1.1e+05 | | step | 1.38e+04 | | vb | 0.00123 | | vb_q0 | 0.00473 | | vb_q1 | 8.71e-06 | | vb_q2 | 3.5e-06 | | vb_q3 | 1.46e-06 | --------------------------- --------------------------- | grad_norm | 2.84 | | loss | 0.00538 | | loss_cal | 0.0422 | | loss_cal_q0 | 0.0406 | | loss_cal_q1 | 0.0445 | | loss_cal_q2 | 0.0395 | | loss_cal_q3 | 0.0445 | | loss_diff | 0.00221 | | loss_diff_q0 | 0.00623 | | loss_diff_q1 | 0.00152 | | loss_diff_q2 | 0.000568 | | loss_diff_q3 | 0.000267 | | loss_q0 | 0.0184 | | loss_q1 | 0.00153 | | loss_q2 | 0.000573 | | loss_q3 | 0.000271 | | param_norm | 240 | | samples | 1.11e+05 | | step | 1.39e+04 | | vb | 0.00318 | | vb_q0 | 0.0122 | | vb_q1 | 1.13e-05 | | vb_q2 | 5.2e-06 | | vb_q3 | 3.42e-06 | --------------------------- --------------------------- | grad_norm | 2.48 | | loss | 0.0016 | | loss_cal | 0.0419 | | loss_cal_q0 | 0.0448 | | loss_cal_q1 | 0.0419 | | loss_cal_q2 | 0.0443 | | loss_cal_q3 | 0.0367 | | loss_diff | 0.00151 | | loss_diff_q0 | 0.00425 | | loss_diff_q1 | 0.00139 | | loss_diff_q2 | 0.000435 | | loss_diff_q3 | 0.000153 | | loss_q0 | 0.00459 | | loss_q1 | 0.0014 | | loss_q2 | 0.000439 | | loss_q3 | 0.000155 | | param_norm | 240 | | samples | 1.12e+05 | | step | 1.4e+04 | | vb | 8.67e-05 | | vb_q0 | 0.000338 | | vb_q1 | 1.03e-05 | | vb_q2 | 3.97e-06 | | vb_q3 | 1.94e-06 | --------------------------- --------------------------- | grad_norm | 2.62 | | loss | 0.00354 | | loss_cal | 0.0406 | | loss_cal_q0 | 0.0381 | | loss_cal_q1 | 0.0434 | | loss_cal_q2 | 0.0425 | | loss_cal_q3 | 0.0372 | | loss_diff | 0.00193 | | loss_diff_q0 | 0.00586 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.00047 | | loss_diff_q3 | 0.0002 | | loss_q0 | 0.0128 | | loss_q1 | 0.00146 | | loss_q2 | 0.000475 | | loss_q3 | 0.000203 | | param_norm | 240 | | samples | 1.13e+05 | | step | 1.41e+04 | | vb | 0.00161 | | vb_q0 | 0.00695 | | vb_q1 | 1.09e-05 | | vb_q2 | 4.26e-06 | | vb_q3 | 2.55e-06 | --------------------------- --------------------------- | grad_norm | 2.45 | | loss | 0.00248 | | loss_cal | 0.0404 | | loss_cal_q0 | 0.0389 | | loss_cal_q1 | 0.0461 | | loss_cal_q2 | 0.0369 | | loss_cal_q3 | 0.0384 | | loss_diff | 0.00173 | | loss_diff_q0 | 0.00477 | | loss_diff_q1 | 0.00143 | | loss_diff_q2 | 0.000439 | | loss_diff_q3 | 0.000181 | | loss_q0 | 0.00771 | | loss_q1 | 0.00144 | | loss_q2 | 0.000443 | | loss_q3 | 0.000184 | | param_norm | 241 | | samples | 1.14e+05 | | step | 1.42e+04 | | vb | 0.000754 | | vb_q0 | 0.00294 | | vb_q1 | 1.06e-05 | | vb_q2 | 4.02e-06 | | vb_q3 | 2.32e-06 | --------------------------- --------------------------- | grad_norm | 2.45 | | loss | 0.00167 | | loss_cal | 0.0392 | | loss_cal_q0 | 0.0445 | | loss_cal_q1 | 0.0392 | | loss_cal_q2 | 0.0351 | | loss_cal_q3 | 0.0375 | | loss_diff | 0.00127 | | loss_diff_q0 | 0.00362 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000357 | | loss_diff_q3 | 0.000108 | | loss_q0 | 0.00537 | | loss_q1 | 0.0013 | | loss_q2 | 0.00036 | | loss_q3 | 0.000109 | | param_norm | 241 | | samples | 1.14e+05 | | step | 1.43e+04 | | vb | 0.000401 | | vb_q0 | 0.00175 | | vb_q1 | 9.59e-06 | | vb_q2 | 3.22e-06 | | vb_q3 | 1.36e-06 | --------------------------- --------------------------- | grad_norm | 2.41 | | loss | 0.00248 | | loss_cal | 0.0413 | | loss_cal_q0 | 0.0425 | | loss_cal_q1 | 0.0365 | | loss_cal_q2 | 0.0429 | | loss_cal_q3 | 0.0432 | | loss_diff | 0.00195 | | loss_diff_q0 | 0.00522 | | loss_diff_q1 | 0.00139 | | loss_diff_q2 | 0.000455 | | loss_diff_q3 | 0.000213 | | loss_q0 | 0.00711 | | loss_q1 | 0.0014 | | loss_q2 | 0.000459 | | loss_q3 | 0.000216 | | param_norm | 241 | | samples | 1.15e+05 | | step | 1.44e+04 | | vb | 0.000531 | | vb_q0 | 0.0019 | | vb_q1 | 1.04e-05 | | vb_q2 | 4.2e-06 | | vb_q3 | 2.75e-06 | --------------------------- --------------------------- | grad_norm | 2.45 | | loss | 0.00205 | | loss_cal | 0.0384 | | loss_cal_q0 | 0.0406 | | loss_cal_q1 | 0.0379 | | loss_cal_q2 | 0.0377 | | loss_cal_q3 | 0.0375 | | loss_diff | 0.00156 | | loss_diff_q0 | 0.00431 | | loss_diff_q1 | 0.00139 | | loss_diff_q2 | 0.000356 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.00625 | | loss_q1 | 0.0014 | | loss_q2 | 0.000359 | | loss_q3 | 0.00014 | | param_norm | 241 | | samples | 1.16e+05 | | step | 1.45e+04 | | vb | 0.000493 | | vb_q0 | 0.00194 | | vb_q1 | 1.04e-05 | | vb_q2 | 3.22e-06 | | vb_q3 | 1.75e-06 | --------------------------- --------------------------- | grad_norm | 2.44 | | loss | 0.00143 | | loss_cal | 0.0394 | | loss_cal_q0 | 0.0393 | | loss_cal_q1 | 0.0417 | | loss_cal_q2 | 0.0354 | | loss_cal_q3 | 0.0419 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00321 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000348 | | loss_diff_q3 | 0.000117 | | loss_q0 | 0.00363 | | loss_q1 | 0.00127 | | loss_q2 | 0.000351 | | loss_q3 | 0.000118 | | param_norm | 241 | | samples | 1.17e+05 | | step | 1.46e+04 | | vb | 0.000119 | | vb_q0 | 0.000416 | | vb_q1 | 9.47e-06 | | vb_q2 | 3.16e-06 | | vb_q3 | 1.49e-06 | --------------------------- --------------------------- | grad_norm | 2.35 | | loss | 0.00266 | | loss_cal | 0.0395 | | loss_cal_q0 | 0.0403 | | loss_cal_q1 | 0.0382 | | loss_cal_q2 | 0.0402 | | loss_cal_q3 | 0.0393 | | loss_diff | 0.00162 | | loss_diff_q0 | 0.00445 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000406 | | loss_diff_q3 | 0.000142 | | loss_q0 | 0.00848 | | loss_q1 | 0.00125 | | loss_q2 | 0.00041 | | loss_q3 | 0.000144 | | param_norm | 241 | | samples | 1.18e+05 | | step | 1.47e+04 | | vb | 0.00105 | | vb_q0 | 0.00403 | | vb_q1 | 9.46e-06 | | vb_q2 | 3.65e-06 | | vb_q3 | 1.81e-06 | --------------------------- --------------------------- | grad_norm | 2.38 | | loss | 0.00179 | | loss_cal | 0.0401 | | loss_cal_q0 | 0.0366 | | loss_cal_q1 | 0.0459 | | loss_cal_q2 | 0.0418 | | loss_cal_q3 | 0.0367 | | loss_diff | 0.00138 | | loss_diff_q0 | 0.00367 | | loss_diff_q1 | 0.00137 | | loss_diff_q2 | 0.000364 | | loss_diff_q3 | 0.000128 | | loss_q0 | 0.00524 | | loss_q1 | 0.00138 | | loss_q2 | 0.000367 | | loss_q3 | 0.000129 | | param_norm | 241 | | samples | 1.18e+05 | | step | 1.48e+04 | | vb | 0.000408 | | vb_q0 | 0.00157 | | vb_q1 | 1.03e-05 | | vb_q2 | 3.31e-06 | | vb_q3 | 1.63e-06 | --------------------------- --------------------------- | grad_norm | 2.39 | | loss | 0.00148 | | loss_cal | 0.0386 | | loss_cal_q0 | 0.0364 | | loss_cal_q1 | 0.0421 | | loss_cal_q2 | 0.0349 | | loss_cal_q3 | 0.0407 | | loss_diff | 0.00141 | | loss_diff_q0 | 0.00405 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000358 | | loss_diff_q3 | 0.000113 | | loss_q0 | 0.0043 | | loss_q1 | 0.00136 | | loss_q2 | 0.000361 | | loss_q3 | 0.000115 | | param_norm | 241 | | samples | 1.19e+05 | | step | 1.49e+04 | | vb | 6.26e-05 | | vb_q0 | 0.000249 | | vb_q1 | 1.01e-05 | | vb_q2 | 3.21e-06 | | vb_q3 | 1.45e-06 | --------------------------- --------------------------- | grad_norm | 2.46 | | loss | 0.00106 | | loss_cal | 0.0397 | | loss_cal_q0 | 0.0354 | | loss_cal_q1 | 0.0407 | | loss_cal_q2 | 0.0393 | | loss_cal_q3 | 0.0426 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.0027 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000316 | | loss_diff_q3 | 8.06e-05 | | loss_q0 | 0.00282 | | loss_q1 | 0.0012 | | loss_q2 | 0.000319 | | loss_q3 | 8.16e-05 | | param_norm | 241 | | samples | 1.2e+05 | | step | 1.5e+04 | | vb | 3.14e-05 | | vb_q0 | 0.000122 | | vb_q1 | 8.94e-06 | | vb_q2 | 2.84e-06 | | vb_q3 | 1.01e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 2.32 | | loss | 0.00149 | | loss_cal | 0.0388 | | loss_cal_q0 | 0.0409 | | loss_cal_q1 | 0.0395 | | loss_cal_q2 | 0.036 | | loss_cal_q3 | 0.0388 | | loss_diff | 0.00126 | | loss_diff_q0 | 0.00305 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000315 | | loss_diff_q3 | 0.000102 | | loss_q0 | 0.0039 | | loss_q1 | 0.0013 | | loss_q2 | 0.000318 | | loss_q3 | 0.000103 | | param_norm | 241 | | samples | 1.21e+05 | | step | 1.51e+04 | | vb | 0.000234 | | vb_q0 | 0.000842 | | vb_q1 | 9.68e-06 | | vb_q2 | 2.87e-06 | | vb_q3 | 1.29e-06 | --------------------------- --------------------------- | grad_norm | 2.19 | | loss | 0.00107 | | loss_cal | 0.039 | | loss_cal_q0 | 0.0374 | | loss_cal_q1 | 0.0387 | | loss_cal_q2 | 0.0396 | | loss_cal_q3 | 0.0406 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00247 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000335 | | loss_diff_q3 | 8.36e-05 | | loss_q0 | 0.00257 | | loss_q1 | 0.00125 | | loss_q2 | 0.000338 | | loss_q3 | 8.47e-05 | | param_norm | 241 | | samples | 1.22e+05 | | step | 1.52e+04 | | vb | 2.84e-05 | | vb_q0 | 0.000103 | | vb_q1 | 9.26e-06 | | vb_q2 | 3.02e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 2.57 | | loss | 0.00185 | | loss_cal | 0.0408 | | loss_cal_q0 | 0.0383 | | loss_cal_q1 | 0.0443 | | loss_cal_q2 | 0.0409 | | loss_cal_q3 | 0.0398 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00326 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000313 | | loss_diff_q3 | 9.64e-05 | | loss_q0 | 0.00525 | | loss_q1 | 0.00127 | | loss_q2 | 0.000316 | | loss_q3 | 9.77e-05 | | param_norm | 241 | | samples | 1.22e+05 | | step | 1.53e+04 | | vb | 0.000546 | | vb_q0 | 0.00198 | | vb_q1 | 9.4e-06 | | vb_q2 | 2.87e-06 | | vb_q3 | 1.25e-06 | --------------------------- --------------------------- | grad_norm | 2.57 | | loss | 0.00272 | | loss_cal | 0.0384 | | loss_cal_q0 | 0.0373 | | loss_cal_q1 | 0.0367 | | loss_cal_q2 | 0.0442 | | loss_cal_q3 | 0.0353 | | loss_diff | 0.00161 | | loss_diff_q0 | 0.00447 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.000407 | | loss_diff_q3 | 0.000151 | | loss_q0 | 0.00883 | | loss_q1 | 0.00136 | | loss_q2 | 0.00041 | | loss_q3 | 0.000153 | | param_norm | 241 | | samples | 1.23e+05 | | step | 1.54e+04 | | vb | 0.0011 | | vb_q0 | 0.00436 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.72e-06 | | vb_q3 | 1.92e-06 | --------------------------- --------------------------- | grad_norm | 2.56 | | loss | 0.0017 | | loss_cal | 0.0393 | | loss_cal_q0 | 0.0419 | | loss_cal_q1 | 0.0375 | | loss_cal_q2 | 0.0395 | | loss_cal_q3 | 0.036 | | loss_diff | 0.00144 | | loss_diff_q0 | 0.00396 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000408 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.00498 | | loss_q1 | 0.00125 | | loss_q2 | 0.000412 | | loss_q3 | 0.000141 | | param_norm | 241 | | samples | 1.24e+05 | | step | 1.55e+04 | | vb | 0.000262 | | vb_q0 | 0.00102 | | vb_q1 | 9.43e-06 | | vb_q2 | 3.68e-06 | | vb_q3 | 1.76e-06 | --------------------------- --------------------------- | grad_norm | 2.35 | | loss | 0.00113 | | loss_cal | 0.0384 | | loss_cal_q0 | 0.0395 | | loss_cal_q1 | 0.0386 | | loss_cal_q2 | 0.0343 | | loss_cal_q3 | 0.0424 | | loss_diff | 0.00109 | | loss_diff_q0 | 0.00282 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000314 | | loss_diff_q3 | 9.22e-05 | | loss_q0 | 0.00295 | | loss_q1 | 0.0013 | | loss_q2 | 0.000317 | | loss_q3 | 9.34e-05 | | param_norm | 241 | | samples | 1.25e+05 | | step | 1.56e+04 | | vb | 3.18e-05 | | vb_q0 | 0.000123 | | vb_q1 | 9.61e-06 | | vb_q2 | 2.87e-06 | | vb_q3 | 1.17e-06 | --------------------------- --------------------------- | grad_norm | 2.23 | | loss | 0.00301 | | loss_cal | 0.0376 | | loss_cal_q0 | 0.0364 | | loss_cal_q1 | 0.0399 | | loss_cal_q2 | 0.0405 | | loss_cal_q3 | 0.0339 | | loss_diff | 0.0017 | | loss_diff_q0 | 0.00465 | | loss_diff_q1 | 0.00132 | | loss_diff_q2 | 0.000384 | | loss_diff_q3 | 0.000144 | | loss_q0 | 0.00953 | | loss_q1 | 0.00133 | | loss_q2 | 0.000388 | | loss_q3 | 0.000146 | | param_norm | 241 | | samples | 1.26e+05 | | step | 1.57e+04 | | vb | 0.0013 | | vb_q0 | 0.00488 | | vb_q1 | 9.8e-06 | | vb_q2 | 3.52e-06 | | vb_q3 | 1.82e-06 | --------------------------- --------------------------- | grad_norm | 2.43 | | loss | 0.00302 | | loss_cal | 0.0383 | | loss_cal_q0 | 0.042 | | loss_cal_q1 | 0.0364 | | loss_cal_q2 | 0.0371 | | loss_cal_q3 | 0.0383 | | loss_diff | 0.00177 | | loss_diff_q0 | 0.00524 | | loss_diff_q1 | 0.00146 | | loss_diff_q2 | 0.000446 | | loss_diff_q3 | 0.000182 | | loss_q0 | 0.0104 | | loss_q1 | 0.00147 | | loss_q2 | 0.00045 | | loss_q3 | 0.000184 | | param_norm | 241 | | samples | 1.26e+05 | | step | 1.58e+04 | | vb | 0.00125 | | vb_q0 | 0.00519 | | vb_q1 | 1.09e-05 | | vb_q2 | 4.05e-06 | | vb_q3 | 2.35e-06 | --------------------------- --------------------------- | grad_norm | 2.35 | | loss | 0.00203 | | loss_cal | 0.0377 | | loss_cal_q0 | 0.0377 | | loss_cal_q1 | 0.0383 | | loss_cal_q2 | 0.0406 | | loss_cal_q3 | 0.0349 | | loss_diff | 0.00158 | | loss_diff_q0 | 0.00459 | | loss_diff_q1 | 0.00133 | | loss_diff_q2 | 0.000381 | | loss_diff_q3 | 0.000129 | | loss_q0 | 0.00646 | | loss_q1 | 0.00134 | | loss_q2 | 0.000384 | | loss_q3 | 0.000131 | | param_norm | 241 | | samples | 1.27e+05 | | step | 1.59e+04 | | vb | 0.000448 | | vb_q0 | 0.00187 | | vb_q1 | 9.92e-06 | | vb_q2 | 3.48e-06 | | vb_q3 | 1.65e-06 | --------------------------- --------------------------- | grad_norm | 2.22 | | loss | 0.0018 | | loss_cal | 0.0377 | | loss_cal_q0 | 0.0375 | | loss_cal_q1 | 0.0369 | | loss_cal_q2 | 0.0372 | | loss_cal_q3 | 0.0392 | | loss_diff | 0.00142 | | loss_diff_q0 | 0.00383 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000352 | | loss_diff_q3 | 0.00011 | | loss_q0 | 0.00529 | | loss_q1 | 0.00129 | | loss_q2 | 0.000355 | | loss_q3 | 0.000111 | | param_norm | 242 | | samples | 1.28e+05 | | step | 1.6e+04 | | vb | 0.00038 | | vb_q0 | 0.00146 | | vb_q1 | 9.58e-06 | | vb_q2 | 3.17e-06 | | vb_q3 | 1.39e-06 | --------------------------- --------------------------- | grad_norm | 2.61 | | loss | 0.00124 | | loss_cal | 0.0376 | | loss_cal_q0 | 0.0379 | | loss_cal_q1 | 0.039 | | loss_cal_q2 | 0.038 | | loss_cal_q3 | 0.036 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00311 | | loss_diff_q1 | 0.00123 | | loss_diff_q2 | 0.000321 | | loss_diff_q3 | 8.69e-05 | | loss_q0 | 0.0033 | | loss_q1 | 0.00124 | | loss_q2 | 0.000324 | | loss_q3 | 8.8e-05 | | param_norm | 242 | | samples | 1.29e+05 | | step | 1.61e+04 | | vb | 4.83e-05 | | vb_q0 | 0.000185 | | vb_q1 | 9.18e-06 | | vb_q2 | 2.89e-06 | | vb_q3 | 1.09e-06 | --------------------------- --------------------------- | grad_norm | 2.35 | | loss | 0.00149 | | loss_cal | 0.039 | | loss_cal_q0 | 0.0358 | | loss_cal_q1 | 0.0401 | | loss_cal_q2 | 0.0386 | | loss_cal_q3 | 0.0423 | | loss_diff | 0.00117 | | loss_diff_q0 | 0.00312 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000303 | | loss_diff_q3 | 8.68e-05 | | loss_q0 | 0.0044 | | loss_q1 | 0.00117 | | loss_q2 | 0.000306 | | loss_q3 | 8.79e-05 | | param_norm | 242 | | samples | 1.3e+05 | | step | 1.62e+04 | | vb | 0.00032 | | vb_q0 | 0.00129 | | vb_q1 | 8.62e-06 | | vb_q2 | 2.77e-06 | | vb_q3 | 1.11e-06 | --------------------------- --------------------------- | grad_norm | 2.32 | | loss | 0.00188 | | loss_cal | 0.0378 | | loss_cal_q0 | 0.0422 | | loss_cal_q1 | 0.0374 | | loss_cal_q2 | 0.0332 | | loss_cal_q3 | 0.0378 | | loss_diff | 0.00139 | | loss_diff_q0 | 0.00339 | | loss_diff_q1 | 0.00127 | | loss_diff_q2 | 0.000329 | | loss_diff_q3 | 0.00013 | | loss_q0 | 0.00517 | | loss_q1 | 0.00128 | | loss_q2 | 0.000332 | | loss_q3 | 0.000131 | | param_norm | 242 | | samples | 1.3e+05 | | step | 1.63e+04 | | vb | 0.000494 | | vb_q0 | 0.00178 | | vb_q1 | 9.42e-06 | | vb_q2 | 3.02e-06 | | vb_q3 | 1.66e-06 | --------------------------- --------------------------- | grad_norm | 2.22 | | loss | 0.00111 | | loss_cal | 0.0381 | | loss_cal_q0 | 0.0433 | | loss_cal_q1 | 0.0358 | | loss_cal_q2 | 0.0369 | | loss_cal_q3 | 0.0344 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00255 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000313 | | loss_diff_q3 | 8.59e-05 | | loss_q0 | 0.00266 | | loss_q1 | 0.00121 | | loss_q2 | 0.000316 | | loss_q3 | 8.7e-05 | | param_norm | 242 | | samples | 1.31e+05 | | step | 1.64e+04 | | vb | 3.02e-05 | | vb_q0 | 0.000101 | | vb_q1 | 8.93e-06 | | vb_q2 | 2.83e-06 | | vb_q3 | 1.09e-06 | --------------------------- --------------------------- | grad_norm | 2.22 | | loss | 0.00118 | | loss_cal | 0.0374 | | loss_cal_q0 | 0.0442 | | loss_cal_q1 | 0.0291 | | loss_cal_q2 | 0.0369 | | loss_cal_q3 | 0.0369 | | loss_diff | 0.00114 | | loss_diff_q0 | 0.00299 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000319 | | loss_diff_q3 | 9.59e-05 | | loss_q0 | 0.00313 | | loss_q1 | 0.00121 | | loss_q2 | 0.000322 | | loss_q3 | 9.71e-05 | | param_norm | 242 | | samples | 1.32e+05 | | step | 1.65e+04 | | vb | 3.91e-05 | | vb_q0 | 0.000143 | | vb_q1 | 8.94e-06 | | vb_q2 | 2.89e-06 | | vb_q3 | 1.22e-06 | --------------------------- --------------------------- | grad_norm | 2.62 | | loss | 0.00114 | | loss_cal | 0.0378 | | loss_cal_q0 | 0.0373 | | loss_cal_q1 | 0.0375 | | loss_cal_q2 | 0.0425 | | loss_cal_q3 | 0.0338 | | loss_diff | 0.000988 | | loss_diff_q0 | 0.00244 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000306 | | loss_diff_q3 | 7.27e-05 | | loss_q0 | 0.00307 | | loss_q1 | 0.00121 | | loss_q2 | 0.000308 | | loss_q3 | 7.36e-05 | | param_norm | 242 | | samples | 1.33e+05 | | step | 1.66e+04 | | vb | 0.000154 | | vb_q0 | 0.000628 | | vb_q1 | 8.93e-06 | | vb_q2 | 2.75e-06 | | vb_q3 | 9.23e-07 | --------------------------- --------------------------- | grad_norm | 2.29 | | loss | 0.00127 | | loss_cal | 0.0367 | | loss_cal_q0 | 0.0343 | | loss_cal_q1 | 0.0359 | | loss_cal_q2 | 0.0386 | | loss_cal_q3 | 0.0377 | | loss_diff | 0.00109 | | loss_diff_q0 | 0.00266 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000343 | | loss_diff_q3 | 8.94e-05 | | loss_q0 | 0.00335 | | loss_q1 | 0.00117 | | loss_q2 | 0.000347 | | loss_q3 | 9.06e-05 | | param_norm | 242 | | samples | 1.34e+05 | | step | 1.67e+04 | | vb | 0.000179 | | vb_q0 | 0.000688 | | vb_q1 | 8.63e-06 | | vb_q2 | 3.08e-06 | | vb_q3 | 1.13e-06 | --------------------------- --------------------------- | grad_norm | 2.49 | | loss | 0.00232 | | loss_cal | 0.0386 | | loss_cal_q0 | 0.0418 | | loss_cal_q1 | 0.0329 | | loss_cal_q2 | 0.0378 | | loss_cal_q3 | 0.0408 | | loss_diff | 0.00143 | | loss_diff_q0 | 0.00415 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000377 | | loss_diff_q3 | 0.000135 | | loss_q0 | 0.00778 | | loss_q1 | 0.00127 | | loss_q2 | 0.00038 | | loss_q3 | 0.000137 | | param_norm | 242 | | samples | 1.34e+05 | | step | 1.68e+04 | | vb | 0.000894 | | vb_q0 | 0.00363 | | vb_q1 | 9.41e-06 | | vb_q2 | 3.41e-06 | | vb_q3 | 1.72e-06 | --------------------------- --------------------------- | grad_norm | 2.26 | | loss | 0.00297 | | loss_cal | 0.0347 | | loss_cal_q0 | 0.035 | | loss_cal_q1 | 0.0331 | | loss_cal_q2 | 0.0353 | | loss_cal_q3 | 0.0361 | | loss_diff | 0.00202 | | loss_diff_q0 | 0.00616 | | loss_diff_q1 | 0.00138 | | loss_diff_q2 | 0.000466 | | loss_diff_q3 | 0.000211 | | loss_q0 | 0.0101 | | loss_q1 | 0.00139 | | loss_q2 | 0.00047 | | loss_q3 | 0.000214 | | param_norm | 242 | | samples | 1.35e+05 | | step | 1.69e+04 | | vb | 0.000953 | | vb_q0 | 0.00395 | | vb_q1 | 1.03e-05 | | vb_q2 | 4.27e-06 | | vb_q3 | 2.71e-06 | --------------------------- --------------------------- | grad_norm | 2.51 | | loss | 0.00143 | | loss_cal | 0.0378 | | loss_cal_q0 | 0.0368 | | loss_cal_q1 | 0.0357 | | loss_cal_q2 | 0.0372 | | loss_cal_q3 | 0.0418 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00296 | | loss_diff_q1 | 0.00123 | | loss_diff_q2 | 0.000335 | | loss_diff_q3 | 9.49e-05 | | loss_q0 | 0.00391 | | loss_q1 | 0.00123 | | loss_q2 | 0.000338 | | loss_q3 | 9.61e-05 | | param_norm | 242 | | samples | 1.36e+05 | | step | 1.7e+04 | | vb | 0.000247 | | vb_q0 | 0.000951 | | vb_q1 | 9.12e-06 | | vb_q2 | 3.02e-06 | | vb_q3 | 1.21e-06 | --------------------------- --------------------------- | grad_norm | 2.38 | | loss | 0.00132 | | loss_cal | 0.037 | | loss_cal_q0 | 0.0354 | | loss_cal_q1 | 0.0352 | | loss_cal_q2 | 0.0394 | | loss_cal_q3 | 0.0351 | | loss_diff | 0.00123 | | loss_diff_q0 | 0.00301 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000321 | | loss_diff_q3 | 9.45e-05 | | loss_q0 | 0.00335 | | loss_q1 | 0.00125 | | loss_q2 | 0.000323 | | loss_q3 | 9.57e-05 | | param_norm | 242 | | samples | 1.37e+05 | | step | 1.71e+04 | | vb | 9.3e-05 | | vb_q0 | 0.00034 | | vb_q1 | 9.23e-06 | | vb_q2 | 2.91e-06 | | vb_q3 | 1.22e-06 | --------------------------- --------------------------- | grad_norm | 1.93 | | loss | 0.0013 | | loss_cal | 0.0367 | | loss_cal_q0 | 0.0372 | | loss_cal_q1 | 0.0363 | | loss_cal_q2 | 0.0377 | | loss_cal_q3 | 0.0361 | | loss_diff | 0.00125 | | loss_diff_q0 | 0.00322 | | loss_diff_q1 | 0.00123 | | loss_diff_q2 | 0.000354 | | loss_diff_q3 | 9.72e-05 | | loss_q0 | 0.00339 | | loss_q1 | 0.00124 | | loss_q2 | 0.000357 | | loss_q3 | 9.84e-05 | | param_norm | 242 | | samples | 1.38e+05 | | step | 1.72e+04 | | vb | 4.89e-05 | | vb_q0 | 0.000176 | | vb_q1 | 9.13e-06 | | vb_q2 | 3.19e-06 | | vb_q3 | 1.23e-06 | --------------------------- --------------------------- | grad_norm | 2.25 | | loss | 0.00105 | | loss_cal | 0.0371 | | loss_cal_q0 | 0.0397 | | loss_cal_q1 | 0.0345 | | loss_cal_q2 | 0.0342 | | loss_cal_q3 | 0.0401 | | loss_diff | 0.00102 | | loss_diff_q0 | 0.00248 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000312 | | loss_diff_q3 | 9.06e-05 | | loss_q0 | 0.00258 | | loss_q1 | 0.00117 | | loss_q2 | 0.000315 | | loss_q3 | 9.17e-05 | | param_norm | 242 | | samples | 1.38e+05 | | step | 1.73e+04 | | vb | 2.88e-05 | | vb_q0 | 0.000101 | | vb_q1 | 8.58e-06 | | vb_q2 | 2.81e-06 | | vb_q3 | 1.15e-06 | --------------------------- --------------------------- | grad_norm | 2.34 | | loss | 0.00155 | | loss_cal | 0.0363 | | loss_cal_q0 | 0.0421 | | loss_cal_q1 | 0.0341 | | loss_cal_q2 | 0.0332 | | loss_cal_q3 | 0.0359 | | loss_diff | 0.00143 | | loss_diff_q0 | 0.00429 | | loss_diff_q1 | 0.0013 | | loss_diff_q2 | 0.000335 | | loss_diff_q3 | 0.000126 | | loss_q0 | 0.0048 | | loss_q1 | 0.00131 | | loss_q2 | 0.000338 | | loss_q3 | 0.000128 | | param_norm | 242 | | samples | 1.39e+05 | | step | 1.74e+04 | | vb | 0.000126 | | vb_q0 | 0.000512 | | vb_q1 | 9.72e-06 | | vb_q2 | 3.05e-06 | | vb_q3 | 1.61e-06 | --------------------------- --------------------------- | grad_norm | 2.32 | | loss | 0.0011 | | loss_cal | 0.0355 | | loss_cal_q0 | 0.0372 | | loss_cal_q1 | 0.034 | | loss_cal_q2 | 0.031 | | loss_cal_q3 | 0.0411 | | loss_diff | 0.00106 | | loss_diff_q0 | 0.00252 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000324 | | loss_diff_q3 | 8.69e-05 | | loss_q0 | 0.00263 | | loss_q1 | 0.00127 | | loss_q2 | 0.000327 | | loss_q3 | 8.8e-05 | | param_norm | 242 | | samples | 1.4e+05 | | step | 1.75e+04 | | vb | 3.19e-05 | | vb_q0 | 0.000111 | | vb_q1 | 9.35e-06 | | vb_q2 | 2.94e-06 | | vb_q3 | 1.1e-06 | --------------------------- --------------------------- | grad_norm | 2.48 | | loss | 0.00235 | | loss_cal | 0.0366 | | loss_cal_q0 | 0.0368 | | loss_cal_q1 | 0.0348 | | loss_cal_q2 | 0.0389 | | loss_cal_q3 | 0.0364 | | loss_diff | 0.00157 | | loss_diff_q0 | 0.00472 | | loss_diff_q1 | 0.00123 | | loss_diff_q2 | 0.00043 | | loss_diff_q3 | 0.000142 | | loss_q0 | 0.008 | | loss_q1 | 0.00124 | | loss_q2 | 0.000434 | | loss_q3 | 0.000144 | | param_norm | 242 | | samples | 1.41e+05 | | step | 1.76e+04 | | vb | 0.000776 | | vb_q0 | 0.00329 | | vb_q1 | 9.15e-06 | | vb_q2 | 3.83e-06 | | vb_q3 | 1.82e-06 | --------------------------- --------------------------- | grad_norm | 2.17 | | loss | 0.00117 | | loss_cal | 0.0365 | | loss_cal_q0 | 0.0363 | | loss_cal_q1 | 0.0389 | | loss_cal_q2 | 0.0347 | | loss_cal_q3 | 0.035 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00268 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000316 | | loss_diff_q3 | 8.69e-05 | | loss_q0 | 0.00296 | | loss_q1 | 0.00119 | | loss_q2 | 0.000319 | | loss_q3 | 8.8e-05 | | param_norm | 242 | | samples | 1.42e+05 | | step | 1.77e+04 | | vb | 7.5e-05 | | vb_q0 | 0.000281 | | vb_q1 | 8.77e-06 | | vb_q2 | 2.85e-06 | | vb_q3 | 1.09e-06 | --------------------------- --------------------------- | grad_norm | 2.39 | | loss | 0.00234 | | loss_cal | 0.0377 | | loss_cal_q0 | 0.0339 | | loss_cal_q1 | 0.041 | | loss_cal_q2 | 0.036 | | loss_cal_q3 | 0.0408 | | loss_diff | 0.00132 | | loss_diff_q0 | 0.00345 | | loss_diff_q1 | 0.00123 | | loss_diff_q2 | 0.000338 | | loss_diff_q3 | 0.000113 | | loss_q0 | 0.00733 | | loss_q1 | 0.00124 | | loss_q2 | 0.000341 | | loss_q3 | 0.000115 | | param_norm | 243 | | samples | 1.42e+05 | | step | 1.78e+04 | | vb | 0.00102 | | vb_q0 | 0.00388 | | vb_q1 | 9.09e-06 | | vb_q2 | 3.05e-06 | | vb_q3 | 1.45e-06 | --------------------------- --------------------------- | grad_norm | 2.07 | | loss | 0.00155 | | loss_cal | 0.0346 | | loss_cal_q0 | 0.0384 | | loss_cal_q1 | 0.0329 | | loss_cal_q2 | 0.0352 | | loss_cal_q3 | 0.0321 | | loss_diff | 0.00151 | | loss_diff_q0 | 0.00358 | | loss_diff_q1 | 0.0015 | | loss_diff_q2 | 0.000483 | | loss_diff_q3 | 0.000188 | | loss_q0 | 0.00372 | | loss_q1 | 0.00152 | | loss_q2 | 0.000487 | | loss_q3 | 0.00019 | | param_norm | 243 | | samples | 1.43e+05 | | step | 1.79e+04 | | vb | 4.3e-05 | | vb_q0 | 0.000145 | | vb_q1 | 1.12e-05 | | vb_q2 | 4.4e-06 | | vb_q3 | 2.4e-06 | --------------------------- --------------------------- | grad_norm | 2.09 | | loss | 0.00106 | | loss_cal | 0.0351 | | loss_cal_q0 | 0.0348 | | loss_cal_q1 | 0.031 | | loss_cal_q2 | 0.0351 | | loss_cal_q3 | 0.0388 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00246 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000347 | | loss_diff_q3 | 9.15e-05 | | loss_q0 | 0.00253 | | loss_q1 | 0.00116 | | loss_q2 | 0.00035 | | loss_q3 | 9.26e-05 | | param_norm | 243 | | samples | 1.44e+05 | | step | 1.8e+04 | | vb | 2.17e-05 | | vb_q0 | 6.93e-05 | | vb_q1 | 8.54e-06 | | vb_q2 | 3.1e-06 | | vb_q3 | 1.15e-06 | --------------------------- --------------------------- | grad_norm | 2.18 | | loss | 0.00129 | | loss_cal | 0.0347 | | loss_cal_q0 | 0.0344 | | loss_cal_q1 | 0.0341 | | loss_cal_q2 | 0.0347 | | loss_cal_q3 | 0.0361 | | loss_diff | 0.00123 | | loss_diff_q0 | 0.00321 | | loss_diff_q1 | 0.00127 | | loss_diff_q2 | 0.000357 | | loss_diff_q3 | 0.000109 | | loss_q0 | 0.00347 | | loss_q1 | 0.00128 | | loss_q2 | 0.000361 | | loss_q3 | 0.00011 | | param_norm | 243 | | samples | 1.45e+05 | | step | 1.81e+04 | | vb | 6.34e-05 | | vb_q0 | 0.000252 | | vb_q1 | 9.45e-06 | | vb_q2 | 3.25e-06 | | vb_q3 | 1.38e-06 | --------------------------- --------------------------- | grad_norm | 2.1 | | loss | 0.00108 | | loss_cal | 0.0348 | | loss_cal_q0 | 0.0362 | | loss_cal_q1 | 0.0361 | | loss_cal_q2 | 0.0311 | | loss_cal_q3 | 0.0364 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00248 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000327 | | loss_diff_q3 | 8.86e-05 | | loss_q0 | 0.00265 | | loss_q1 | 0.00117 | | loss_q2 | 0.00033 | | loss_q3 | 8.97e-05 | | param_norm | 243 | | samples | 1.46e+05 | | step | 1.82e+04 | | vb | 4.65e-05 | | vb_q0 | 0.00017 | | vb_q1 | 8.65e-06 | | vb_q2 | 2.94e-06 | | vb_q3 | 1.11e-06 | --------------------------- --------------------------- | grad_norm | 2.42 | | loss | 0.00202 | | loss_cal | 0.0367 | | loss_cal_q0 | 0.0369 | | loss_cal_q1 | 0.0318 | | loss_cal_q2 | 0.0396 | | loss_cal_q3 | 0.0379 | | loss_diff | 0.00132 | | loss_diff_q0 | 0.00357 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000394 | | loss_diff_q3 | 0.000132 | | loss_q0 | 0.00628 | | loss_q1 | 0.00121 | | loss_q2 | 0.000397 | | loss_q3 | 0.000134 | | param_norm | 243 | | samples | 1.46e+05 | | step | 1.83e+04 | | vb | 0.000696 | | vb_q0 | 0.00272 | | vb_q1 | 8.96e-06 | | vb_q2 | 3.58e-06 | | vb_q3 | 1.68e-06 | --------------------------- --------------------------- | grad_norm | 2.13 | | loss | 0.0103 | | loss_cal | 0.0348 | | loss_cal_q0 | 0.0365 | | loss_cal_q1 | 0.0314 | | loss_cal_q2 | 0.036 | | loss_cal_q3 | 0.0357 | | loss_diff | 0.00326 | | loss_diff_q0 | 0.0102 | | loss_diff_q1 | 0.00162 | | loss_diff_q2 | 0.000614 | | loss_diff_q3 | 0.000308 | | loss_q0 | 0.0371 | | loss_q1 | 0.00163 | | loss_q2 | 0.000619 | | loss_q3 | 0.000312 | | param_norm | 243 | | samples | 1.47e+05 | | step | 1.84e+04 | | vb | 0.007 | | vb_q0 | 0.0269 | | vb_q1 | 1.22e-05 | | vb_q2 | 5.6e-06 | | vb_q3 | 3.98e-06 | --------------------------- --------------------------- | grad_norm | 2.12 | | loss | 0.00267 | | loss_cal | 0.034 | | loss_cal_q0 | 0.0342 | | loss_cal_q1 | 0.0344 | | loss_cal_q2 | 0.0297 | | loss_cal_q3 | 0.0369 | | loss_diff | 0.00185 | | loss_diff_q0 | 0.00522 | | loss_diff_q1 | 0.00135 | | loss_diff_q2 | 0.00034 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.0083 | | loss_q1 | 0.00136 | | loss_q2 | 0.000343 | | loss_q3 | 0.000141 | | param_norm | 243 | | samples | 1.48e+05 | | step | 1.85e+04 | | vb | 0.000823 | | vb_q0 | 0.00308 | | vb_q1 | 1.02e-05 | | vb_q2 | 3.16e-06 | | vb_q3 | 1.74e-06 | --------------------------- --------------------------- | grad_norm | 2.54 | | loss | 0.0162 | | loss_cal | 0.0363 | | loss_cal_q0 | 0.0379 | | loss_cal_q1 | 0.0353 | | loss_cal_q2 | 0.0361 | | loss_cal_q3 | 0.0352 | | loss_diff | 0.00696 | | loss_diff_q0 | 0.0147 | | loss_diff_q1 | 0.00211 | | loss_diff_q2 | 0.000817 | | loss_diff_q3 | 0.000416 | | loss_q0 | 0.0336 | | loss_q1 | 0.00213 | | loss_q2 | 0.000824 | | loss_q3 | 0.000421 | | param_norm | 243 | | samples | 1.49e+05 | | step | 1.86e+04 | | vb | 0.00926 | | vb_q0 | 0.0189 | | vb_q1 | 1.63e-05 | | vb_q2 | 7.49e-06 | | vb_q3 | 5.33e-06 | --------------------------- --------------------------- | grad_norm | 2.13 | | loss | 0.00168 | | loss_cal | 0.0353 | | loss_cal_q0 | 0.0323 | | loss_cal_q1 | 0.0375 | | loss_cal_q2 | 0.0366 | | loss_cal_q3 | 0.035 | | loss_diff | 0.00156 | | loss_diff_q0 | 0.0043 | | loss_diff_q1 | 0.00133 | | loss_diff_q2 | 0.000422 | | loss_diff_q3 | 0.000158 | | loss_q0 | 0.00478 | | loss_q1 | 0.00134 | | loss_q2 | 0.000426 | | loss_q3 | 0.00016 | | param_norm | 244 | | samples | 1.5e+05 | | step | 1.87e+04 | | vb | 0.000124 | | vb_q0 | 0.000484 | | vb_q1 | 1.03e-05 | | vb_q2 | 3.87e-06 | | vb_q3 | 2.01e-06 | --------------------------- --------------------------- | grad_norm | 2.05 | | loss | 0.00136 | | loss_cal | 0.0349 | | loss_cal_q0 | 0.0366 | | loss_cal_q1 | 0.0333 | | loss_cal_q2 | 0.0338 | | loss_cal_q3 | 0.0358 | | loss_diff | 0.00123 | | loss_diff_q0 | 0.0032 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000329 | | loss_diff_q3 | 0.000113 | | loss_q0 | 0.00371 | | loss_q1 | 0.00129 | | loss_q2 | 0.000332 | | loss_q3 | 0.000114 | | param_norm | 244 | | samples | 1.5e+05 | | step | 1.88e+04 | | vb | 0.000137 | | vb_q0 | 0.000507 | | vb_q1 | 9.76e-06 | | vb_q2 | 3.02e-06 | | vb_q3 | 1.44e-06 | --------------------------- --------------------------- | grad_norm | 2.12 | | loss | 0.00209 | | loss_cal | 0.035 | | loss_cal_q0 | 0.0365 | | loss_cal_q1 | 0.0361 | | loss_cal_q2 | 0.0339 | | loss_cal_q3 | 0.0326 | | loss_diff | 0.00184 | | loss_diff_q0 | 0.00516 | | loss_diff_q1 | 0.00127 | | loss_diff_q2 | 0.000364 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.00608 | | loss_q1 | 0.00128 | | loss_q2 | 0.000367 | | loss_q3 | 0.000141 | | param_norm | 244 | | samples | 1.51e+05 | | step | 1.89e+04 | | vb | 0.000252 | | vb_q0 | 0.000921 | | vb_q1 | 9.84e-06 | | vb_q2 | 3.36e-06 | | vb_q3 | 1.78e-06 | --------------------------- --------------------------- | grad_norm | 2.21 | | loss | 0.0012 | | loss_cal | 0.0346 | | loss_cal_q0 | 0.035 | | loss_cal_q1 | 0.0337 | | loss_cal_q2 | 0.0344 | | loss_cal_q3 | 0.0333 | | loss_diff | 0.00115 | | loss_diff_q0 | 0.00282 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000304 | | loss_diff_q3 | 9.11e-05 | | loss_q0 | 0.00302 | | loss_q1 | 0.00116 | | loss_q2 | 0.000306 | | loss_q3 | 9.23e-05 | | param_norm | 244 | | samples | 1.52e+05 | | step | 1.9e+04 | | vb | 5.62e-05 | | vb_q0 | 0.0002 | | vb_q1 | 8.69e-06 | | vb_q2 | 2.79e-06 | | vb_q3 | 1.15e-06 | --------------------------- --------------------------- | grad_norm | 2.16 | | loss | 0.00276 | | loss_cal | 0.0339 | | loss_cal_q0 | 0.0359 | | loss_cal_q1 | 0.0356 | | loss_cal_q2 | 0.0324 | | loss_cal_q3 | 0.0313 | | loss_diff | 0.00121 | | loss_diff_q0 | 0.00327 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000314 | | loss_diff_q3 | 9.22e-05 | | loss_q0 | 0.00941 | | loss_q1 | 0.00115 | | loss_q2 | 0.000317 | | loss_q3 | 9.34e-05 | | param_norm | 244 | | samples | 1.53e+05 | | step | 1.91e+04 | | vb | 0.00155 | | vb_q0 | 0.00614 | | vb_q1 | 8.58e-06 | | vb_q2 | 2.84e-06 | | vb_q3 | 1.16e-06 | --------------------------- --------------------------- | grad_norm | 2.08 | | loss | 0.00202 | | loss_cal | 0.0346 | | loss_cal_q0 | 0.0306 | | loss_cal_q1 | 0.038 | | loss_cal_q2 | 0.0343 | | loss_cal_q3 | 0.036 | | loss_diff | 0.00129 | | loss_diff_q0 | 0.00335 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000318 | | loss_diff_q3 | 0.000109 | | loss_q0 | 0.00609 | | loss_q1 | 0.00122 | | loss_q2 | 0.000321 | | loss_q3 | 0.00011 | | param_norm | 244 | | samples | 1.54e+05 | | step | 1.92e+04 | | vb | 0.000735 | | vb_q0 | 0.00274 | | vb_q1 | 9.02e-06 | | vb_q2 | 2.92e-06 | | vb_q3 | 1.41e-06 | --------------------------- --------------------------- | grad_norm | 2.13 | | loss | 0.000993 | | loss_cal | 0.0335 | | loss_cal_q0 | 0.0359 | | loss_cal_q1 | 0.0362 | | loss_cal_q2 | 0.0312 | | loss_cal_q3 | 0.0307 | | loss_diff | 0.000971 | | loss_diff_q0 | 0.00259 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000284 | | loss_diff_q3 | 7.34e-05 | | loss_q0 | 0.00268 | | loss_q1 | 0.00112 | | loss_q2 | 0.000287 | | loss_q3 | 7.43e-05 | | param_norm | 244 | | samples | 1.54e+05 | | step | 1.93e+04 | | vb | 2.2e-05 | | vb_q0 | 8.42e-05 | | vb_q1 | 8.28e-06 | | vb_q2 | 2.58e-06 | | vb_q3 | 9.24e-07 | --------------------------- --------------------------- | grad_norm | 2.24 | | loss | 0.00117 | | loss_cal | 0.0357 | | loss_cal_q0 | 0.0317 | | loss_cal_q1 | 0.0389 | | loss_cal_q2 | 0.0372 | | loss_cal_q3 | 0.0361 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00258 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000287 | | loss_diff_q3 | 7.81e-05 | | loss_q0 | 0.00283 | | loss_q1 | 0.00115 | | loss_q2 | 0.00029 | | loss_q3 | 7.91e-05 | | param_norm | 244 | | samples | 1.55e+05 | | step | 1.94e+04 | | vb | 7.37e-05 | | vb_q0 | 0.000251 | | vb_q1 | 8.53e-06 | | vb_q2 | 2.64e-06 | | vb_q3 | 9.86e-07 | --------------------------- --------------------------- | grad_norm | 2.24 | | loss | 0.00241 | | loss_cal | 0.0361 | | loss_cal_q0 | 0.0341 | | loss_cal_q1 | 0.0418 | | loss_cal_q2 | 0.0357 | | loss_cal_q3 | 0.0326 | | loss_diff | 0.00132 | | loss_diff_q0 | 0.00346 | | loss_diff_q1 | 0.00129 | | loss_diff_q2 | 0.000321 | | loss_diff_q3 | 0.000103 | | loss_q0 | 0.00772 | | loss_q1 | 0.0013 | | loss_q2 | 0.000324 | | loss_q3 | 0.000104 | | param_norm | 244 | | samples | 1.56e+05 | | step | 1.95e+04 | | vb | 0.00108 | | vb_q0 | 0.00426 | | vb_q1 | 9.78e-06 | | vb_q2 | 2.97e-06 | | vb_q3 | 1.29e-06 | --------------------------- --------------------------- | grad_norm | 2.07 | | loss | 0.00302 | | loss_cal | 0.0329 | | loss_cal_q0 | 0.0322 | | loss_cal_q1 | 0.0352 | | loss_cal_q2 | 0.032 | | loss_cal_q3 | 0.0325 | | loss_diff | 0.00171 | | loss_diff_q0 | 0.00496 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.000455 | | loss_diff_q3 | 0.000214 | | loss_q0 | 0.0106 | | loss_q1 | 0.00146 | | loss_q2 | 0.000459 | | loss_q3 | 0.000217 | | param_norm | 244 | | samples | 1.57e+05 | | step | 1.96e+04 | | vb | 0.00131 | | vb_q0 | 0.00559 | | vb_q1 | 1.11e-05 | | vb_q2 | 4.18e-06 | | vb_q3 | 2.73e-06 | --------------------------- --------------------------- | grad_norm | 2.04 | | loss | 0.00183 | | loss_cal | 0.0339 | | loss_cal_q0 | 0.0292 | | loss_cal_q1 | 0.0334 | | loss_cal_q2 | 0.0409 | | loss_cal_q3 | 0.0328 | | loss_diff | 0.00148 | | loss_diff_q0 | 0.00414 | | loss_diff_q1 | 0.00127 | | loss_diff_q2 | 0.000431 | | loss_diff_q3 | 0.000152 | | loss_q0 | 0.00555 | | loss_q1 | 0.00128 | | loss_q2 | 0.000434 | | loss_q3 | 0.000153 | | param_norm | 244 | | samples | 1.58e+05 | | step | 1.97e+04 | | vb | 0.000351 | | vb_q0 | 0.00141 | | vb_q1 | 9.73e-06 | | vb_q2 | 3.94e-06 | | vb_q3 | 1.96e-06 | --------------------------- --------------------------- | grad_norm | 2.18 | | loss | 0.00178 | | loss_cal | 0.0351 | | loss_cal_q0 | 0.0339 | | loss_cal_q1 | 0.0374 | | loss_cal_q2 | 0.0337 | | loss_cal_q3 | 0.0351 | | loss_diff | 0.00121 | | loss_diff_q0 | 0.00317 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000319 | | loss_diff_q3 | 9.98e-05 | | loss_q0 | 0.0054 | | loss_q1 | 0.00118 | | loss_q2 | 0.000322 | | loss_q3 | 0.000101 | | param_norm | 244 | | samples | 1.58e+05 | | step | 1.98e+04 | | vb | 0.000566 | | vb_q0 | 0.00223 | | vb_q1 | 8.78e-06 | | vb_q2 | 2.91e-06 | | vb_q3 | 1.26e-06 | --------------------------- --------------------------- | grad_norm | 1.87 | | loss | 0.00184 | | loss_cal | 0.0319 | | loss_cal_q0 | 0.0322 | | loss_cal_q1 | 0.0333 | | loss_cal_q2 | 0.0312 | | loss_cal_q3 | 0.0309 | | loss_diff | 0.00127 | | loss_diff_q0 | 0.00331 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000327 | | loss_diff_q3 | 0.000109 | | loss_q0 | 0.00543 | | loss_q1 | 0.00118 | | loss_q2 | 0.00033 | | loss_q3 | 0.00011 | | param_norm | 244 | | samples | 1.59e+05 | | step | 1.99e+04 | | vb | 0.000567 | | vb_q0 | 0.00213 | | vb_q1 | 8.77e-06 | | vb_q2 | 2.96e-06 | | vb_q3 | 1.38e-06 | --------------------------- --------------------------- | grad_norm | 2.29 | | loss | 0.00111 | | loss_cal | 0.0324 | | loss_cal_q0 | 0.0318 | | loss_cal_q1 | 0.0314 | | loss_cal_q2 | 0.0316 | | loss_cal_q3 | 0.0356 | | loss_diff | 0.00107 | | loss_diff_q0 | 0.00252 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000316 | | loss_diff_q3 | 9.41e-05 | | loss_q0 | 0.00264 | | loss_q1 | 0.00118 | | loss_q2 | 0.000319 | | loss_q3 | 9.53e-05 | | param_norm | 244 | | samples | 1.6e+05 | | step | 2e+04 | | vb | 3.37e-05 | | vb_q0 | 0.000118 | | vb_q1 | 8.72e-06 | | vb_q2 | 2.87e-06 | | vb_q3 | 1.18e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 1.93 | | loss | 0.00249 | | loss_cal | 0.0318 | | loss_cal_q0 | 0.0304 | | loss_cal_q1 | 0.0331 | | loss_cal_q2 | 0.0317 | | loss_cal_q3 | 0.0322 | | loss_diff | 0.0015 | | loss_diff_q0 | 0.00426 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000369 | | loss_diff_q3 | 0.000139 | | loss_q0 | 0.00827 | | loss_q1 | 0.00127 | | loss_q2 | 0.000372 | | loss_q3 | 0.000141 | | param_norm | 244 | | samples | 1.61e+05 | | step | 2.01e+04 | | vb | 0.000995 | | vb_q0 | 0.00401 | | vb_q1 | 9.36e-06 | | vb_q2 | 3.33e-06 | | vb_q3 | 1.76e-06 | --------------------------- --------------------------- | grad_norm | 2.18 | | loss | 0.00105 | | loss_cal | 0.0335 | | loss_cal_q0 | 0.034 | | loss_cal_q1 | 0.0317 | | loss_cal_q2 | 0.0339 | | loss_cal_q3 | 0.0343 | | loss_diff | 0.001 | | loss_diff_q0 | 0.00261 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000324 | | loss_diff_q3 | 7.93e-05 | | loss_q0 | 0.00283 | | loss_q1 | 0.00119 | | loss_q2 | 0.000326 | | loss_q3 | 8.03e-05 | | param_norm | 244 | | samples | 1.62e+05 | | step | 2.02e+04 | | vb | 5.11e-05 | | vb_q0 | 0.000215 | | vb_q1 | 8.83e-06 | | vb_q2 | 2.89e-06 | | vb_q3 | 1.01e-06 | --------------------------- --------------------------- | grad_norm | 1.94 | | loss | 0.00106 | | loss_cal | 0.0342 | | loss_cal_q0 | 0.0321 | | loss_cal_q1 | 0.0339 | | loss_cal_q2 | 0.036 | | loss_cal_q3 | 0.0354 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.0025 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000318 | | loss_diff_q3 | 8.9e-05 | | loss_q0 | 0.00257 | | loss_q1 | 0.00116 | | loss_q2 | 0.00032 | | loss_q3 | 9.01e-05 | | param_norm | 244 | | samples | 1.62e+05 | | step | 2.03e+04 | | vb | 2.08e-05 | | vb_q0 | 7e-05 | | vb_q1 | 8.56e-06 | | vb_q2 | 2.86e-06 | | vb_q3 | 1.12e-06 | --------------------------- --------------------------- | grad_norm | 1.9 | | loss | 0.000953 | | loss_cal | 0.032 | | loss_cal_q0 | 0.0344 | | loss_cal_q1 | 0.028 | | loss_cal_q2 | 0.0369 | | loss_cal_q3 | 0.0291 | | loss_diff | 0.000937 | | loss_diff_q0 | 0.00206 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000293 | | loss_diff_q3 | 6.9e-05 | | loss_q0 | 0.0021 | | loss_q1 | 0.00119 | | loss_q2 | 0.000296 | | loss_q3 | 6.99e-05 | | param_norm | 244 | | samples | 1.63e+05 | | step | 2.04e+04 | | vb | 1.55e-05 | | vb_q0 | 4.57e-05 | | vb_q1 | 8.77e-06 | | vb_q2 | 2.66e-06 | | vb_q3 | 8.84e-07 | --------------------------- --------------------------- | grad_norm | 1.85 | | loss | 0.00232 | | loss_cal | 0.0326 | | loss_cal_q0 | 0.0341 | | loss_cal_q1 | 0.0302 | | loss_cal_q2 | 0.0353 | | loss_cal_q3 | 0.0313 | | loss_diff | 0.0015 | | loss_diff_q0 | 0.00385 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000369 | | loss_diff_q3 | 0.000149 | | loss_q0 | 0.00688 | | loss_q1 | 0.00129 | | loss_q2 | 0.000372 | | loss_q3 | 0.000151 | | param_norm | 244 | | samples | 1.64e+05 | | step | 2.05e+04 | | vb | 0.000821 | | vb_q0 | 0.00303 | | vb_q1 | 9.5e-06 | | vb_q2 | 3.37e-06 | | vb_q3 | 1.89e-06 | --------------------------- --------------------------- | grad_norm | 2.32 | | loss | 0.0013 | | loss_cal | 0.0327 | | loss_cal_q0 | 0.0341 | | loss_cal_q1 | 0.0325 | | loss_cal_q2 | 0.0307 | | loss_cal_q3 | 0.0316 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.0031 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000307 | | loss_diff_q3 | 0.000104 | | loss_q0 | 0.00352 | | loss_q1 | 0.00115 | | loss_q2 | 0.00031 | | loss_q3 | 0.000106 | | param_norm | 244 | | samples | 1.65e+05 | | step | 2.06e+04 | | vb | 0.00011 | | vb_q0 | 0.000419 | | vb_q1 | 8.52e-06 | | vb_q2 | 2.81e-06 | | vb_q3 | 1.33e-06 | --------------------------- --------------------------- | grad_norm | 1.9 | | loss | 0.00221 | | loss_cal | 0.0327 | | loss_cal_q0 | 0.0293 | | loss_cal_q1 | 0.0333 | | loss_cal_q2 | 0.0327 | | loss_cal_q3 | 0.0359 | | loss_diff | 0.00151 | | loss_diff_q0 | 0.00365 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000357 | | loss_diff_q3 | 0.000133 | | loss_q0 | 0.00613 | | loss_q1 | 0.00129 | | loss_q2 | 0.000361 | | loss_q3 | 0.000134 | | param_norm | 245 | | samples | 1.66e+05 | | step | 2.07e+04 | | vb | 0.000705 | | vb_q0 | 0.00248 | | vb_q1 | 9.52e-06 | | vb_q2 | 3.24e-06 | | vb_q3 | 1.66e-06 | --------------------------- --------------------------- | grad_norm | 1.96 | | loss | 0.0011 | | loss_cal | 0.0322 | | loss_cal_q0 | 0.0303 | | loss_cal_q1 | 0.0301 | | loss_cal_q2 | 0.0325 | | loss_cal_q3 | 0.035 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00267 | | loss_diff_q1 | 0.00125 | | loss_diff_q2 | 0.00033 | | loss_diff_q3 | 0.000117 | | loss_q0 | 0.00277 | | loss_q1 | 0.00126 | | loss_q2 | 0.000333 | | loss_q3 | 0.000119 | | param_norm | 245 | | samples | 1.66e+05 | | step | 2.08e+04 | | vb | 2.77e-05 | | vb_q0 | 9.75e-05 | | vb_q1 | 9.29e-06 | | vb_q2 | 2.98e-06 | | vb_q3 | 1.49e-06 | --------------------------- --------------------------- | grad_norm | 1.97 | | loss | 0.00154 | | loss_cal | 0.0335 | | loss_cal_q0 | 0.0342 | | loss_cal_q1 | 0.0333 | | loss_cal_q2 | 0.0321 | | loss_cal_q3 | 0.0344 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.003 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000303 | | loss_diff_q3 | 8.67e-05 | | loss_q0 | 0.00526 | | loss_q1 | 0.00112 | | loss_q2 | 0.000306 | | loss_q3 | 8.78e-05 | | param_norm | 245 | | samples | 1.67e+05 | | step | 2.09e+04 | | vb | 0.000498 | | vb_q0 | 0.00226 | | vb_q1 | 8.28e-06 | | vb_q2 | 2.71e-06 | | vb_q3 | 1.1e-06 | --------------------------- --------------------------- | grad_norm | 1.92 | | loss | 0.00328 | | loss_cal | 0.0319 | | loss_cal_q0 | 0.0309 | | loss_cal_q1 | 0.0294 | | loss_cal_q2 | 0.036 | | loss_cal_q3 | 0.0321 | | loss_diff | 0.00133 | | loss_diff_q0 | 0.00357 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000344 | | loss_diff_q3 | 9.98e-05 | | loss_q0 | 0.011 | | loss_q1 | 0.00113 | | loss_q2 | 0.000347 | | loss_q3 | 0.000101 | | param_norm | 245 | | samples | 1.68e+05 | | step | 2.1e+04 | | vb | 0.00195 | | vb_q0 | 0.00743 | | vb_q1 | 8.35e-06 | | vb_q2 | 3.1e-06 | | vb_q3 | 1.26e-06 | --------------------------- --------------------------- | grad_norm | 2.12 | | loss | 0.00257 | | loss_cal | 0.032 | | loss_cal_q0 | 0.0319 | | loss_cal_q1 | 0.0316 | | loss_cal_q2 | 0.0294 | | loss_cal_q3 | 0.0358 | | loss_diff | 0.00152 | | loss_diff_q0 | 0.00446 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000351 | | loss_diff_q3 | 0.000162 | | loss_q0 | 0.0087 | | loss_q1 | 0.00122 | | loss_q2 | 0.000354 | | loss_q3 | 0.000164 | | param_norm | 245 | | samples | 1.69e+05 | | step | 2.11e+04 | | vb | 0.00105 | | vb_q0 | 0.00424 | | vb_q1 | 9.05e-06 | | vb_q2 | 3.19e-06 | | vb_q3 | 2.06e-06 | --------------------------- --------------------------- | grad_norm | 2.28 | | loss | 0.00227 | | loss_cal | 0.0347 | | loss_cal_q0 | 0.0346 | | loss_cal_q1 | 0.0323 | | loss_cal_q2 | 0.0348 | | loss_cal_q3 | 0.0362 | | loss_diff | 0.00154 | | loss_diff_q0 | 0.00464 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.00036 | | loss_diff_q3 | 0.000147 | | loss_q0 | 0.0077 | | loss_q1 | 0.00129 | | loss_q2 | 0.000363 | | loss_q3 | 0.000149 | | param_norm | 245 | | samples | 1.7e+05 | | step | 2.12e+04 | | vb | 0.000727 | | vb_q0 | 0.00306 | | vb_q1 | 9.51e-06 | | vb_q2 | 3.28e-06 | | vb_q3 | 1.86e-06 | --------------------------- --------------------------- | grad_norm | 2 | | loss | 0.00121 | | loss_cal | 0.0317 | | loss_cal_q0 | 0.0309 | | loss_cal_q1 | 0.0315 | | loss_cal_q2 | 0.0318 | | loss_cal_q3 | 0.031 | | loss_diff | 0.00101 | | loss_diff_q0 | 0.00269 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000329 | | loss_diff_q3 | 7.55e-05 | | loss_q0 | 0.0035 | | loss_q1 | 0.00114 | | loss_q2 | 0.000332 | | loss_q3 | 7.65e-05 | | param_norm | 245 | | samples | 1.7e+05 | | step | 2.13e+04 | | vb | 0.000194 | | vb_q0 | 0.000815 | | vb_q1 | 8.43e-06 | | vb_q2 | 2.93e-06 | | vb_q3 | 9.65e-07 | --------------------------- --------------------------- | grad_norm | 1.81 | | loss | 0.0012 | | loss_cal | 0.0333 | | loss_cal_q0 | 0.0316 | | loss_cal_q1 | 0.0313 | | loss_cal_q2 | 0.0365 | | loss_cal_q3 | 0.0342 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00259 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.00031 | | loss_diff_q3 | 7.13e-05 | | loss_q0 | 0.00317 | | loss_q1 | 0.00112 | | loss_q2 | 0.000312 | | loss_q3 | 7.22e-05 | | param_norm | 245 | | samples | 1.71e+05 | | step | 2.14e+04 | | vb | 0.000158 | | vb_q0 | 0.000582 | | vb_q1 | 8.32e-06 | | vb_q2 | 2.81e-06 | | vb_q3 | 9.18e-07 | --------------------------- --------------------------- | grad_norm | 2.05 | | loss | 0.000927 | | loss_cal | 0.0328 | | loss_cal_q0 | 0.0333 | | loss_cal_q1 | 0.0307 | | loss_cal_q2 | 0.0307 | | loss_cal_q3 | 0.0364 | | loss_diff | 0.000905 | | loss_diff_q0 | 0.00237 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000284 | | loss_diff_q3 | 8.45e-05 | | loss_q0 | 0.00245 | | loss_q1 | 0.00109 | | loss_q2 | 0.000287 | | loss_q3 | 8.56e-05 | | param_norm | 245 | | samples | 1.72e+05 | | step | 2.15e+04 | | vb | 2.18e-05 | | vb_q0 | 8.1e-05 | | vb_q1 | 8.05e-06 | | vb_q2 | 2.6e-06 | | vb_q3 | 1.05e-06 | --------------------------- --------------------------- | grad_norm | 1.92 | | loss | 0.0011 | | loss_cal | 0.0307 | | loss_cal_q0 | 0.0291 | | loss_cal_q1 | 0.0322 | | loss_cal_q2 | 0.0322 | | loss_cal_q3 | 0.0296 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00255 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.00031 | | loss_diff_q3 | 8.18e-05 | | loss_q0 | 0.00277 | | loss_q1 | 0.00117 | | loss_q2 | 0.000312 | | loss_q3 | 8.29e-05 | | param_norm | 245 | | samples | 1.73e+05 | | step | 2.16e+04 | | vb | 6.05e-05 | | vb_q0 | 0.00022 | | vb_q1 | 8.68e-06 | | vb_q2 | 2.77e-06 | | vb_q3 | 1.03e-06 | --------------------------- --------------------------- | grad_norm | 1.92 | | loss | 0.00114 | | loss_cal | 0.0321 | | loss_cal_q0 | 0.0291 | | loss_cal_q1 | 0.031 | | loss_cal_q2 | 0.0335 | | loss_cal_q3 | 0.0331 | | loss_diff | 0.000996 | | loss_diff_q0 | 0.00251 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000315 | | loss_diff_q3 | 7.17e-05 | | loss_q0 | 0.00312 | | loss_q1 | 0.00118 | | loss_q2 | 0.000318 | | loss_q3 | 7.26e-05 | | param_norm | 245 | | samples | 1.74e+05 | | step | 2.17e+04 | | vb | 0.000148 | | vb_q0 | 0.000613 | | vb_q1 | 8.73e-06 | | vb_q2 | 2.83e-06 | | vb_q3 | 9.08e-07 | --------------------------- --------------------------- | grad_norm | 1.91 | | loss | 0.00118 | | loss_cal | 0.0319 | | loss_cal_q0 | 0.0308 | | loss_cal_q1 | 0.0343 | | loss_cal_q2 | 0.0339 | | loss_cal_q3 | 0.0286 | | loss_diff | 0.00111 | | loss_diff_q0 | 0.00305 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000321 | | loss_diff_q3 | 9.6e-05 | | loss_q0 | 0.00334 | | loss_q1 | 0.00112 | | loss_q2 | 0.000324 | | loss_q3 | 9.72e-05 | | param_norm | 245 | | samples | 1.74e+05 | | step | 2.18e+04 | | vb | 7.12e-05 | | vb_q0 | 0.000285 | | vb_q1 | 8.21e-06 | | vb_q2 | 2.91e-06 | | vb_q3 | 1.22e-06 | --------------------------- --------------------------- | grad_norm | 2.06 | | loss | 0.00113 | | loss_cal | 0.0323 | | loss_cal_q0 | 0.0335 | | loss_cal_q1 | 0.0309 | | loss_cal_q2 | 0.0291 | | loss_cal_q3 | 0.0358 | | loss_diff | 0.00109 | | loss_diff_q0 | 0.00264 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000304 | | loss_diff_q3 | 8.9e-05 | | loss_q0 | 0.00277 | | loss_q1 | 0.00112 | | loss_q2 | 0.000306 | | loss_q3 | 9.01e-05 | | param_norm | 245 | | samples | 1.75e+05 | | step | 2.19e+04 | | vb | 3.78e-05 | | vb_q0 | 0.000131 | | vb_q1 | 8.2e-06 | | vb_q2 | 2.75e-06 | | vb_q3 | 1.13e-06 | --------------------------- --------------------------- | grad_norm | 1.98 | | loss | 0.00111 | | loss_cal | 0.033 | | loss_cal_q0 | 0.0303 | | loss_cal_q1 | 0.0295 | | loss_cal_q2 | 0.0347 | | loss_cal_q3 | 0.0343 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00252 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000327 | | loss_diff_q3 | 8.8e-05 | | loss_q0 | 0.00276 | | loss_q1 | 0.00119 | | loss_q2 | 0.00033 | | loss_q3 | 8.92e-05 | | param_norm | 245 | | samples | 1.76e+05 | | step | 2.2e+04 | | vb | 6.71e-05 | | vb_q0 | 0.000242 | | vb_q1 | 8.76e-06 | | vb_q2 | 2.95e-06 | | vb_q3 | 1.13e-06 | --------------------------- --------------------------- | grad_norm | 1.83 | | loss | 0.00131 | | loss_cal | 0.0295 | | loss_cal_q0 | 0.0281 | | loss_cal_q1 | 0.0294 | | loss_cal_q2 | 0.0316 | | loss_cal_q3 | 0.0292 | | loss_diff | 0.00114 | | loss_diff_q0 | 0.00287 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000305 | | loss_diff_q3 | 8.69e-05 | | loss_q0 | 0.00353 | | loss_q1 | 0.00119 | | loss_q2 | 0.000308 | | loss_q3 | 8.8e-05 | | param_norm | 245 | | samples | 1.77e+05 | | step | 2.21e+04 | | vb | 0.000173 | | vb_q0 | 0.000656 | | vb_q1 | 8.81e-06 | | vb_q2 | 2.75e-06 | | vb_q3 | 1.1e-06 | --------------------------- --------------------------- | grad_norm | 2.35 | | loss | 0.000996 | | loss_cal | 0.0325 | | loss_cal_q0 | 0.0355 | | loss_cal_q1 | 0.0288 | | loss_cal_q2 | 0.0356 | | loss_cal_q3 | 0.0298 | | loss_diff | 0.000969 | | loss_diff_q0 | 0.00241 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000289 | | loss_diff_q3 | 7.77e-05 | | loss_q0 | 0.0025 | | loss_q1 | 0.00102 | | loss_q2 | 0.000292 | | loss_q3 | 7.87e-05 | | param_norm | 245 | | samples | 1.78e+05 | | step | 2.22e+04 | | vb | 2.7e-05 | | vb_q0 | 9.1e-05 | | vb_q1 | 7.5e-06 | | vb_q2 | 2.63e-06 | | vb_q3 | 9.84e-07 | --------------------------- --------------------------- | grad_norm | 1.99 | | loss | 0.00158 | | loss_cal | 0.0323 | | loss_cal_q0 | 0.0361 | | loss_cal_q1 | 0.032 | | loss_cal_q2 | 0.0326 | | loss_cal_q3 | 0.029 | | loss_diff | 0.00133 | | loss_diff_q0 | 0.00355 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000323 | | loss_diff_q3 | 0.000114 | | loss_q0 | 0.0045 | | loss_q1 | 0.00119 | | loss_q2 | 0.000326 | | loss_q3 | 0.000116 | | param_norm | 245 | | samples | 1.78e+05 | | step | 2.23e+04 | | vb | 0.000256 | | vb_q0 | 0.000953 | | vb_q1 | 8.75e-06 | | vb_q2 | 2.96e-06 | | vb_q3 | 1.45e-06 | --------------------------- --------------------------- | grad_norm | 1.97 | | loss | 0.00161 | | loss_cal | 0.0318 | | loss_cal_q0 | 0.0334 | | loss_cal_q1 | 0.0301 | | loss_cal_q2 | 0.029 | | loss_cal_q3 | 0.0336 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00331 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000331 | | loss_diff_q3 | 0.000108 | | loss_q0 | 0.00504 | | loss_q1 | 0.00114 | | loss_q2 | 0.000334 | | loss_q3 | 0.000109 | | param_norm | 245 | | samples | 1.79e+05 | | step | 2.24e+04 | | vb | 0.000415 | | vb_q0 | 0.00172 | | vb_q1 | 8.33e-06 | | vb_q2 | 2.99e-06 | | vb_q3 | 1.37e-06 | --------------------------- --------------------------- | grad_norm | 1.65 | | loss | 0.000978 | | loss_cal | 0.0306 | | loss_cal_q0 | 0.0302 | | loss_cal_q1 | 0.0297 | | loss_cal_q2 | 0.0328 | | loss_cal_q3 | 0.0295 | | loss_diff | 0.000942 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.0011 | | loss_diff_q2 | 0.000286 | | loss_diff_q3 | 7.41e-05 | | loss_q0 | 0.00253 | | loss_q1 | 0.0011 | | loss_q2 | 0.000289 | | loss_q3 | 7.51e-05 | | param_norm | 245 | | samples | 1.8e+05 | | step | 2.25e+04 | | vb | 3.62e-05 | | vb_q0 | 0.000138 | | vb_q1 | 8.14e-06 | | vb_q2 | 2.62e-06 | | vb_q3 | 9.33e-07 | --------------------------- --------------------------- | grad_norm | 2.15 | | loss | 0.00112 | | loss_cal | 0.0315 | | loss_cal_q0 | 0.0299 | | loss_cal_q1 | 0.0309 | | loss_cal_q2 | 0.0318 | | loss_cal_q3 | 0.0334 | | loss_diff | 0.000961 | | loss_diff_q0 | 0.00233 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 7.96e-05 | | loss_q0 | 0.00299 | | loss_q1 | 0.00113 | | loss_q2 | 0.000299 | | loss_q3 | 8.07e-05 | | param_norm | 245 | | samples | 1.81e+05 | | step | 2.26e+04 | | vb | 0.000163 | | vb_q0 | 0.000662 | | vb_q1 | 8.26e-06 | | vb_q2 | 2.64e-06 | | vb_q3 | 1.01e-06 | --------------------------- --------------------------- | grad_norm | 1.8 | | loss | 0.00126 | | loss_cal | 0.0308 | | loss_cal_q0 | 0.0326 | | loss_cal_q1 | 0.0317 | | loss_cal_q2 | 0.0308 | | loss_cal_q3 | 0.028 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00296 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000314 | | loss_diff_q3 | 0.000109 | | loss_q0 | 0.0032 | | loss_q1 | 0.00122 | | loss_q2 | 0.000317 | | loss_q3 | 0.00011 | | param_norm | 246 | | samples | 1.82e+05 | | step | 2.27e+04 | | vb | 6.63e-05 | | vb_q0 | 0.000238 | | vb_q1 | 9.01e-06 | | vb_q2 | 2.88e-06 | | vb_q3 | 1.4e-06 | --------------------------- --------------------------- | grad_norm | 1.91 | | loss | 0.000997 | | loss_cal | 0.0319 | | loss_cal_q0 | 0.0274 | | loss_cal_q1 | 0.0337 | | loss_cal_q2 | 0.0334 | | loss_cal_q3 | 0.0331 | | loss_diff | 0.000943 | | loss_diff_q0 | 0.0025 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000301 | | loss_diff_q3 | 8.15e-05 | | loss_q0 | 0.00271 | | loss_q1 | 0.00106 | | loss_q2 | 0.000304 | | loss_q3 | 8.25e-05 | | param_norm | 246 | | samples | 1.82e+05 | | step | 2.28e+04 | | vb | 5.31e-05 | | vb_q0 | 0.000214 | | vb_q1 | 7.78e-06 | | vb_q2 | 2.71e-06 | | vb_q3 | 1.04e-06 | --------------------------- --------------------------- | grad_norm | 1.68 | | loss | 0.00105 | | loss_cal | 0.0304 | | loss_cal_q0 | 0.0274 | | loss_cal_q1 | 0.0287 | | loss_cal_q2 | 0.034 | | loss_cal_q3 | 0.0326 | | loss_diff | 0.000996 | | loss_diff_q0 | 0.00233 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000288 | | loss_diff_q3 | 7.13e-05 | | loss_q0 | 0.00251 | | loss_q1 | 0.00112 | | loss_q2 | 0.00029 | | loss_q3 | 7.22e-05 | | param_norm | 246 | | samples | 1.83e+05 | | step | 2.29e+04 | | vb | 4.94e-05 | | vb_q0 | 0.000178 | | vb_q1 | 8.3e-06 | | vb_q2 | 2.59e-06 | | vb_q3 | 9e-07 | --------------------------- --------------------------- | grad_norm | 1.73 | | loss | 0.00126 | | loss_cal | 0.0295 | | loss_cal_q0 | 0.0281 | | loss_cal_q1 | 0.0305 | | loss_cal_q2 | 0.0298 | | loss_cal_q3 | 0.0294 | | loss_diff | 0.00112 | | loss_diff_q0 | 0.0028 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 7.66e-05 | | loss_q0 | 0.00334 | | loss_q1 | 0.00118 | | loss_q2 | 0.000299 | | loss_q3 | 7.76e-05 | | param_norm | 246 | | samples | 1.84e+05 | | step | 2.3e+04 | | vb | 0.000143 | | vb_q0 | 0.000541 | | vb_q1 | 8.67e-06 | | vb_q2 | 2.66e-06 | | vb_q3 | 9.81e-07 | --------------------------- --------------------------- | grad_norm | 1.76 | | loss | 0.00148 | | loss_cal | 0.0288 | | loss_cal_q0 | 0.0283 | | loss_cal_q1 | 0.0286 | | loss_cal_q2 | 0.031 | | loss_cal_q3 | 0.0263 | | loss_diff | 0.00118 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.00033 | | loss_diff_q3 | 9.77e-05 | | loss_q0 | 0.00406 | | loss_q1 | 0.00119 | | loss_q2 | 0.000332 | | loss_q3 | 9.9e-05 | | param_norm | 246 | | samples | 1.85e+05 | | step | 2.31e+04 | | vb | 0.000299 | | vb_q0 | 0.00113 | | vb_q1 | 8.77e-06 | | vb_q2 | 2.95e-06 | | vb_q3 | 1.24e-06 | --------------------------- --------------------------- | grad_norm | 1.82 | | loss | 0.000971 | | loss_cal | 0.0311 | | loss_cal_q0 | 0.0322 | | loss_cal_q1 | 0.0322 | | loss_cal_q2 | 0.0311 | | loss_cal_q3 | 0.029 | | loss_diff | 0.000952 | | loss_diff_q0 | 0.00237 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000302 | | loss_diff_q3 | 7.39e-05 | | loss_q0 | 0.00243 | | loss_q1 | 0.00117 | | loss_q2 | 0.000304 | | loss_q3 | 7.48e-05 | | param_norm | 246 | | samples | 1.86e+05 | | step | 2.32e+04 | | vb | 1.84e-05 | | vb_q0 | 6.58e-05 | | vb_q1 | 8.6e-06 | | vb_q2 | 2.74e-06 | | vb_q3 | 9.32e-07 | --------------------------- --------------------------- | grad_norm | 1.91 | | loss | 0.00204 | | loss_cal | 0.0313 | | loss_cal_q0 | 0.0293 | | loss_cal_q1 | 0.0346 | | loss_cal_q2 | 0.0329 | | loss_cal_q3 | 0.028 | | loss_diff | 0.0014 | | loss_diff_q0 | 0.00385 | | loss_diff_q1 | 0.00119 | | loss_diff_q2 | 0.000362 | | loss_diff_q3 | 0.000136 | | loss_q0 | 0.00632 | | loss_q1 | 0.0012 | | loss_q2 | 0.000365 | | loss_q3 | 0.000137 | | param_norm | 246 | | samples | 1.86e+05 | | step | 2.33e+04 | | vb | 0.000637 | | vb_q0 | 0.00247 | | vb_q1 | 8.86e-06 | | vb_q2 | 3.28e-06 | | vb_q3 | 1.72e-06 | --------------------------- --------------------------- | grad_norm | 1.61 | | loss | 0.00284 | | loss_cal | 0.0299 | | loss_cal_q0 | 0.0282 | | loss_cal_q1 | 0.0317 | | loss_cal_q2 | 0.031 | | loss_cal_q3 | 0.0291 | | loss_diff | 0.00142 | | loss_diff_q0 | 0.00366 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000364 | | loss_diff_q3 | 0.000131 | | loss_q0 | 0.00877 | | loss_q1 | 0.00119 | | loss_q2 | 0.000367 | | loss_q3 | 0.000133 | | param_norm | 246 | | samples | 1.87e+05 | | step | 2.34e+04 | | vb | 0.00142 | | vb_q0 | 0.0051 | | vb_q1 | 8.81e-06 | | vb_q2 | 3.32e-06 | | vb_q3 | 1.71e-06 | --------------------------- --------------------------- | grad_norm | 1.78 | | loss | 0.00324 | | loss_cal | 0.03 | | loss_cal_q0 | 0.0322 | | loss_cal_q1 | 0.0295 | | loss_cal_q2 | 0.03 | | loss_cal_q3 | 0.0282 | | loss_diff | 0.00163 | | loss_diff_q0 | 0.00448 | | loss_diff_q1 | 0.00132 | | loss_diff_q2 | 0.000439 | | loss_diff_q3 | 0.00017 | | loss_q0 | 0.0108 | | loss_q1 | 0.00133 | | loss_q2 | 0.000443 | | loss_q3 | 0.000172 | | param_norm | 246 | | samples | 1.88e+05 | | step | 2.35e+04 | | vb | 0.00161 | | vb_q0 | 0.00634 | | vb_q1 | 1e-05 | | vb_q2 | 3.98e-06 | | vb_q3 | 2.16e-06 | --------------------------- --------------------------- | grad_norm | 2.13 | | loss | 0.00258 | | loss_cal | 0.0303 | | loss_cal_q0 | 0.0286 | | loss_cal_q1 | 0.0343 | | loss_cal_q2 | 0.0271 | | loss_cal_q3 | 0.0315 | | loss_diff | 0.00142 | | loss_diff_q0 | 0.00368 | | loss_diff_q1 | 0.00134 | | loss_diff_q2 | 0.000383 | | loss_diff_q3 | 0.000152 | | loss_q0 | 0.00818 | | loss_q1 | 0.00135 | | loss_q2 | 0.000386 | | loss_q3 | 0.000154 | | param_norm | 246 | | samples | 1.89e+05 | | step | 2.36e+04 | | vb | 0.00116 | | vb_q0 | 0.00451 | | vb_q1 | 1.03e-05 | | vb_q2 | 3.44e-06 | | vb_q3 | 1.92e-06 | --------------------------- --------------------------- | grad_norm | 1.88 | | loss | 0.00158 | | loss_cal | 0.0309 | | loss_cal_q0 | 0.0301 | | loss_cal_q1 | 0.0336 | | loss_cal_q2 | 0.0303 | | loss_cal_q3 | 0.0302 | | loss_diff | 0.00133 | | loss_diff_q0 | 0.00377 | | loss_diff_q1 | 0.00109 | | loss_diff_q2 | 0.000379 | | loss_diff_q3 | 0.000169 | | loss_q0 | 0.00478 | | loss_q1 | 0.0011 | | loss_q2 | 0.000383 | | loss_q3 | 0.000171 | | param_norm | 246 | | samples | 1.9e+05 | | step | 2.37e+04 | | vb | 0.000253 | | vb_q0 | 0.001 | | vb_q1 | 8.22e-06 | | vb_q2 | 3.43e-06 | | vb_q3 | 2.17e-06 | --------------------------- --------------------------- | grad_norm | 1.99 | | loss | 0.00105 | | loss_cal | 0.0309 | | loss_cal_q0 | 0.0319 | | loss_cal_q1 | 0.0291 | | loss_cal_q2 | 0.0333 | | loss_cal_q3 | 0.0293 | | loss_diff | 0.00101 | | loss_diff_q0 | 0.00258 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000326 | | loss_diff_q3 | 9.35e-05 | | loss_q0 | 0.00273 | | loss_q1 | 0.00115 | | loss_q2 | 0.000328 | | loss_q3 | 9.47e-05 | | param_norm | 246 | | samples | 1.9e+05 | | step | 2.38e+04 | | vb | 3.9e-05 | | vb_q0 | 0.000149 | | vb_q1 | 8.64e-06 | | vb_q2 | 2.97e-06 | | vb_q3 | 1.19e-06 | --------------------------- --------------------------- | grad_norm | 1.6 | | loss | 0.0094 | | loss_cal | 0.0283 | | loss_cal_q0 | 0.0289 | | loss_cal_q1 | 0.0296 | | loss_cal_q2 | 0.0282 | | loss_cal_q3 | 0.0256 | | loss_diff | 0.0038 | | loss_diff_q0 | 0.0103 | | loss_diff_q1 | 0.00158 | | loss_diff_q2 | 0.00061 | | loss_diff_q3 | 0.000339 | | loss_q0 | 0.0235 | | loss_q1 | 0.00159 | | loss_q2 | 0.000616 | | loss_q3 | 0.000343 | | param_norm | 246 | | samples | 1.91e+05 | | step | 2.39e+04 | | vb | 0.0056 | | vb_q0 | 0.0133 | | vb_q1 | 1.2e-05 | | vb_q2 | 5.62e-06 | | vb_q3 | 4.35e-06 | --------------------------- --------------------------- | grad_norm | 1.73 | | loss | 0.00298 | | loss_cal | 0.0284 | | loss_cal_q0 | 0.0268 | | loss_cal_q1 | 0.0302 | | loss_cal_q2 | 0.0282 | | loss_cal_q3 | 0.0289 | | loss_diff | 0.00174 | | loss_diff_q0 | 0.00502 | | loss_diff_q1 | 0.00126 | | loss_diff_q2 | 0.000388 | | loss_diff_q3 | 0.000127 | | loss_q0 | 0.00988 | | loss_q1 | 0.00127 | | loss_q2 | 0.000391 | | loss_q3 | 0.000129 | | param_norm | 246 | | samples | 1.92e+05 | | step | 2.4e+04 | | vb | 0.00124 | | vb_q0 | 0.00485 | | vb_q1 | 9.44e-06 | | vb_q2 | 3.5e-06 | | vb_q3 | 1.62e-06 | --------------------------- --------------------------- | grad_norm | 1.62 | | loss | 0.00132 | | loss_cal | 0.0292 | | loss_cal_q0 | 0.0304 | | loss_cal_q1 | 0.0299 | | loss_cal_q2 | 0.0272 | | loss_cal_q3 | 0.0293 | | loss_diff | 0.0012 | | loss_diff_q0 | 0.00319 | | loss_diff_q1 | 0.00119 | | loss_diff_q2 | 0.000323 | | loss_diff_q3 | 0.000115 | | loss_q0 | 0.00368 | | loss_q1 | 0.0012 | | loss_q2 | 0.000326 | | loss_q3 | 0.000117 | | param_norm | 246 | | samples | 1.93e+05 | | step | 2.41e+04 | | vb | 0.00012 | | vb_q0 | 0.000486 | | vb_q1 | 9.1e-06 | | vb_q2 | 2.96e-06 | | vb_q3 | 1.47e-06 | --------------------------- --------------------------- | grad_norm | 1.75 | | loss | 0.00274 | | loss_cal | 0.0294 | | loss_cal_q0 | 0.0292 | | loss_cal_q1 | 0.03 | | loss_cal_q2 | 0.0288 | | loss_cal_q3 | 0.0295 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.00372 | | loss_diff_q1 | 0.00118 | | loss_diff_q2 | 0.000314 | | loss_diff_q3 | 8.96e-05 | | loss_q0 | 0.00931 | | loss_q1 | 0.00119 | | loss_q2 | 0.000317 | | loss_q3 | 9.08e-05 | | param_norm | 247 | | samples | 1.94e+05 | | step | 2.42e+04 | | vb | 0.0014 | | vb_q0 | 0.00558 | | vb_q1 | 8.83e-06 | | vb_q2 | 2.84e-06 | | vb_q3 | 1.14e-06 | --------------------------- --------------------------- | grad_norm | 1.85 | | loss | 0.00107 | | loss_cal | 0.0303 | | loss_cal_q0 | 0.0301 | | loss_cal_q1 | 0.0304 | | loss_cal_q2 | 0.0298 | | loss_cal_q3 | 0.0308 | | loss_diff | 0.00101 | | loss_diff_q0 | 0.00266 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000309 | | loss_diff_q3 | 7.91e-05 | | loss_q0 | 0.00286 | | loss_q1 | 0.00117 | | loss_q2 | 0.000312 | | loss_q3 | 8.01e-05 | | param_norm | 247 | | samples | 1.94e+05 | | step | 2.43e+04 | | vb | 5.09e-05 | | vb_q0 | 0.000201 | | vb_q1 | 8.62e-06 | | vb_q2 | 2.78e-06 | | vb_q3 | 1e-06 | --------------------------- --------------------------- | grad_norm | 1.88 | | loss | 0.000992 | | loss_cal | 0.03 | | loss_cal_q0 | 0.033 | | loss_cal_q1 | 0.0289 | | loss_cal_q2 | 0.0295 | | loss_cal_q3 | 0.0286 | | loss_diff | 0.000966 | | loss_diff_q0 | 0.00224 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000281 | | loss_diff_q3 | 7.29e-05 | | loss_q0 | 0.00233 | | loss_q1 | 0.00118 | | loss_q2 | 0.000283 | | loss_q3 | 7.38e-05 | | param_norm | 247 | | samples | 1.95e+05 | | step | 2.44e+04 | | vb | 2.65e-05 | | vb_q0 | 9.15e-05 | | vb_q1 | 8.73e-06 | | vb_q2 | 2.55e-06 | | vb_q3 | 9.26e-07 | --------------------------- --------------------------- | grad_norm | 1.8 | | loss | 0.00138 | | loss_cal | 0.0289 | | loss_cal_q0 | 0.0302 | | loss_cal_q1 | 0.0272 | | loss_cal_q2 | 0.029 | | loss_cal_q3 | 0.029 | | loss_diff | 0.00106 | | loss_diff_q0 | 0.00265 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000297 | | loss_diff_q3 | 7.28e-05 | | loss_q0 | 0.00394 | | loss_q1 | 0.00118 | | loss_q2 | 0.0003 | | loss_q3 | 7.37e-05 | | param_norm | 247 | | samples | 1.96e+05 | | step | 2.45e+04 | | vb | 0.000326 | | vb_q0 | 0.00129 | | vb_q1 | 8.72e-06 | | vb_q2 | 2.69e-06 | | vb_q3 | 9.26e-07 | --------------------------- --------------------------- | grad_norm | 1.51 | | loss | 0.000941 | | loss_cal | 0.0277 | | loss_cal_q0 | 0.0257 | | loss_cal_q1 | 0.0289 | | loss_cal_q2 | 0.0273 | | loss_cal_q3 | 0.0288 | | loss_diff | 0.000906 | | loss_diff_q0 | 0.00222 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000267 | | loss_diff_q3 | 6.46e-05 | | loss_q0 | 0.00234 | | loss_q1 | 0.00106 | | loss_q2 | 0.00027 | | loss_q3 | 6.54e-05 | | param_norm | 247 | | samples | 1.97e+05 | | step | 2.46e+04 | | vb | 3.46e-05 | | vb_q0 | 0.000124 | | vb_q1 | 7.84e-06 | | vb_q2 | 2.43e-06 | | vb_q3 | 8.13e-07 | --------------------------- --------------------------- | grad_norm | 1.76 | | loss | 0.00172 | | loss_cal | 0.0289 | | loss_cal_q0 | 0.028 | | loss_cal_q1 | 0.03 | | loss_cal_q2 | 0.028 | | loss_cal_q3 | 0.0292 | | loss_diff | 0.00112 | | loss_diff_q0 | 0.00302 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000285 | | loss_diff_q3 | 7.16e-05 | | loss_q0 | 0.00535 | | loss_q1 | 0.00112 | | loss_q2 | 0.000287 | | loss_q3 | 7.26e-05 | | param_norm | 247 | | samples | 1.98e+05 | | step | 2.47e+04 | | vb | 0.000601 | | vb_q0 | 0.00234 | | vb_q1 | 8.26e-06 | | vb_q2 | 2.56e-06 | | vb_q3 | 9.11e-07 | --------------------------- --------------------------- | grad_norm | 1.8 | | loss | 0.00109 | | loss_cal | 0.0287 | | loss_cal_q0 | 0.0297 | | loss_cal_q1 | 0.0294 | | loss_cal_q2 | 0.0272 | | loss_cal_q3 | 0.0286 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00269 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000293 | | loss_diff_q3 | 8.42e-05 | | loss_q0 | 0.00289 | | loss_q1 | 0.00116 | | loss_q2 | 0.000296 | | loss_q3 | 8.53e-05 | | param_norm | 247 | | samples | 1.98e+05 | | step | 2.48e+04 | | vb | 5.22e-05 | | vb_q0 | 0.000199 | | vb_q1 | 8.53e-06 | | vb_q2 | 2.67e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.89 | | loss | 0.00102 | | loss_cal | 0.0298 | | loss_cal_q0 | 0.0296 | | loss_cal_q1 | 0.03 | | loss_cal_q2 | 0.0315 | | loss_cal_q3 | 0.0284 | | loss_diff | 0.000982 | | loss_diff_q0 | 0.00229 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000305 | | loss_diff_q3 | 7.3e-05 | | loss_q0 | 0.00243 | | loss_q1 | 0.00108 | | loss_q2 | 0.000308 | | loss_q3 | 7.4e-05 | | param_norm | 247 | | samples | 1.99e+05 | | step | 2.49e+04 | | vb | 3.78e-05 | | vb_q0 | 0.000132 | | vb_q1 | 7.98e-06 | | vb_q2 | 2.75e-06 | | vb_q3 | 9.37e-07 | --------------------------- --------------------------- | grad_norm | 1.82 | | loss | 0.0014 | | loss_cal | 0.0291 | | loss_cal_q0 | 0.0276 | | loss_cal_q1 | 0.0289 | | loss_cal_q2 | 0.0293 | | loss_cal_q3 | 0.0308 | | loss_diff | 0.00107 | | loss_diff_q0 | 0.00265 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000294 | | loss_diff_q3 | 9.42e-05 | | loss_q0 | 0.00389 | | loss_q1 | 0.00111 | | loss_q2 | 0.000297 | | loss_q3 | 9.54e-05 | | param_norm | 247 | | samples | 2e+05 | | step | 2.5e+04 | | vb | 0.000333 | | vb_q0 | 0.00124 | | vb_q1 | 8.2e-06 | | vb_q2 | 2.69e-06 | | vb_q3 | 1.19e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 1.73 | | loss | 0.00201 | | loss_cal | 0.0283 | | loss_cal_q0 | 0.0312 | | loss_cal_q1 | 0.0254 | | loss_cal_q2 | 0.0276 | | loss_cal_q3 | 0.0295 | | loss_diff | 0.00109 | | loss_diff_q0 | 0.00316 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000277 | | loss_diff_q3 | 6.95e-05 | | loss_q0 | 0.0072 | | loss_q1 | 0.00106 | | loss_q2 | 0.000279 | | loss_q3 | 7.04e-05 | | param_norm | 247 | | samples | 2.01e+05 | | step | 2.51e+04 | | vb | 0.000922 | | vb_q0 | 0.00404 | | vb_q1 | 7.85e-06 | | vb_q2 | 2.51e-06 | | vb_q3 | 8.73e-07 | --------------------------- --------------------------- | grad_norm | 1.82 | | loss | 0.00356 | | loss_cal | 0.0295 | | loss_cal_q0 | 0.0296 | | loss_cal_q1 | 0.0286 | | loss_cal_q2 | 0.0281 | | loss_cal_q3 | 0.0303 | | loss_diff | 0.00217 | | loss_diff_q0 | 0.00667 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.000503 | | loss_diff_q3 | 0.000258 | | loss_q0 | 0.0125 | | loss_q1 | 0.00146 | | loss_q2 | 0.000508 | | loss_q3 | 0.000261 | | param_norm | 247 | | samples | 2.02e+05 | | step | 2.52e+04 | | vb | 0.00139 | | vb_q0 | 0.00581 | | vb_q1 | 1.08e-05 | | vb_q2 | 4.57e-06 | | vb_q3 | 3.3e-06 | --------------------------- --------------------------- | grad_norm | 1.61 | | loss | 0.00116 | | loss_cal | 0.0281 | | loss_cal_q0 | 0.0278 | | loss_cal_q1 | 0.0283 | | loss_cal_q2 | 0.0256 | | loss_cal_q3 | 0.0307 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000311 | | loss_diff_q3 | 9.23e-05 | | loss_q0 | 0.00317 | | loss_q1 | 0.00108 | | loss_q2 | 0.000314 | | loss_q3 | 9.35e-05 | | param_norm | 247 | | samples | 2.02e+05 | | step | 2.53e+04 | | vb | 6e-05 | | vb_q0 | 0.000229 | | vb_q1 | 7.97e-06 | | vb_q2 | 2.81e-06 | | vb_q3 | 1.18e-06 | --------------------------- --------------------------- | grad_norm | 1.99 | | loss | 0.00484 | | loss_cal | 0.0301 | | loss_cal_q0 | 0.0307 | | loss_cal_q1 | 0.0293 | | loss_cal_q2 | 0.0332 | | loss_cal_q3 | 0.027 | | loss_diff | 0.00218 | | loss_diff_q0 | 0.00597 | | loss_diff_q1 | 0.00156 | | loss_diff_q2 | 0.000437 | | loss_diff_q3 | 0.000205 | | loss_q0 | 0.0158 | | loss_q1 | 0.00157 | | loss_q2 | 0.000441 | | loss_q3 | 0.000208 | | param_norm | 247 | | samples | 2.03e+05 | | step | 2.54e+04 | | vb | 0.00266 | | vb_q0 | 0.00983 | | vb_q1 | 1.19e-05 | | vb_q2 | 4.01e-06 | | vb_q3 | 2.67e-06 | --------------------------- --------------------------- | grad_norm | 1.63 | | loss | 0.00156 | | loss_cal | 0.0274 | | loss_cal_q0 | 0.0274 | | loss_cal_q1 | 0.0274 | | loss_cal_q2 | 0.028 | | loss_cal_q3 | 0.0265 | | loss_diff | 0.00146 | | loss_diff_q0 | 0.00421 | | loss_diff_q1 | 0.0013 | | loss_diff_q2 | 0.000407 | | loss_diff_q3 | 0.00016 | | loss_q0 | 0.00461 | | loss_q1 | 0.00131 | | loss_q2 | 0.000411 | | loss_q3 | 0.000162 | | param_norm | 248 | | samples | 2.04e+05 | | step | 2.55e+04 | | vb | 9.83e-05 | | vb_q0 | 0.0004 | | vb_q1 | 1.05e-05 | | vb_q2 | 3.74e-06 | | vb_q3 | 2.05e-06 | --------------------------- --------------------------- | grad_norm | 1.53 | | loss | 0.0031 | | loss_cal | 0.0275 | | loss_cal_q0 | 0.0271 | | loss_cal_q1 | 0.0274 | | loss_cal_q2 | 0.0277 | | loss_cal_q3 | 0.0279 | | loss_diff | 0.00125 | | loss_diff_q0 | 0.00356 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000342 | | loss_diff_q3 | 0.000116 | | loss_q0 | 0.0115 | | loss_q1 | 0.00118 | | loss_q2 | 0.000345 | | loss_q3 | 0.000117 | | param_norm | 248 | | samples | 2.05e+05 | | step | 2.56e+04 | | vb | 0.00185 | | vb_q0 | 0.00797 | | vb_q1 | 8.95e-06 | | vb_q2 | 3.11e-06 | | vb_q3 | 1.48e-06 | --------------------------- --------------------------- | grad_norm | 1.54 | | loss | 0.00327 | | loss_cal | 0.0268 | | loss_cal_q0 | 0.0277 | | loss_cal_q1 | 0.0267 | | loss_cal_q2 | 0.0263 | | loss_cal_q3 | 0.0266 | | loss_diff | 0.0017 | | loss_diff_q0 | 0.00564 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000361 | | loss_diff_q3 | 0.000125 | | loss_q0 | 0.0124 | | loss_q1 | 0.0012 | | loss_q2 | 0.000364 | | loss_q3 | 0.000126 | | param_norm | 248 | | samples | 2.06e+05 | | step | 2.57e+04 | | vb | 0.00157 | | vb_q0 | 0.00678 | | vb_q1 | 9.01e-06 | | vb_q2 | 3.29e-06 | | vb_q3 | 1.59e-06 | --------------------------- --------------------------- | grad_norm | 1.74 | | loss | 0.00139 | | loss_cal | 0.0295 | | loss_cal_q0 | 0.0307 | | loss_cal_q1 | 0.0263 | | loss_cal_q2 | 0.032 | | loss_cal_q3 | 0.029 | | loss_diff | 0.00128 | | loss_diff_q0 | 0.00332 | | loss_diff_q1 | 0.00122 | | loss_diff_q2 | 0.000372 | | loss_diff_q3 | 0.000122 | | loss_q0 | 0.00373 | | loss_q1 | 0.00123 | | loss_q2 | 0.000376 | | loss_q3 | 0.000124 | | param_norm | 248 | | samples | 2.06e+05 | | step | 2.58e+04 | | vb | 0.000111 | | vb_q0 | 0.000411 | | vb_q1 | 9.24e-06 | | vb_q2 | 3.36e-06 | | vb_q3 | 1.54e-06 | --------------------------- --------------------------- | grad_norm | 1.5 | | loss | 0.00164 | | loss_cal | 0.0265 | | loss_cal_q0 | 0.0245 | | loss_cal_q1 | 0.0277 | | loss_cal_q2 | 0.0245 | | loss_cal_q3 | 0.029 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00327 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000306 | | loss_diff_q3 | 9.21e-05 | | loss_q0 | 0.00525 | | loss_q1 | 0.00122 | | loss_q2 | 0.000309 | | loss_q3 | 9.33e-05 | | param_norm | 248 | | samples | 2.07e+05 | | step | 2.59e+04 | | vb | 0.000451 | | vb_q0 | 0.00198 | | vb_q1 | 9.02e-06 | | vb_q2 | 2.76e-06 | | vb_q3 | 1.18e-06 | --------------------------- --------------------------- | grad_norm | 1.6 | | loss | 0.001 | | loss_cal | 0.0275 | | loss_cal_q0 | 0.027 | | loss_cal_q1 | 0.026 | | loss_cal_q2 | 0.0285 | | loss_cal_q3 | 0.0286 | | loss_diff | 0.00097 | | loss_diff_q0 | 0.00236 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000274 | | loss_diff_q3 | 7.75e-05 | | loss_q0 | 0.00246 | | loss_q1 | 0.00109 | | loss_q2 | 0.000277 | | loss_q3 | 7.84e-05 | | param_norm | 248 | | samples | 2.08e+05 | | step | 2.6e+04 | | vb | 3.02e-05 | | vb_q0 | 0.000106 | | vb_q1 | 8.05e-06 | | vb_q2 | 2.5e-06 | | vb_q3 | 9.76e-07 | --------------------------- --------------------------- | grad_norm | 1.71 | | loss | 0.000955 | | loss_cal | 0.0279 | | loss_cal_q0 | 0.0274 | | loss_cal_q1 | 0.0267 | | loss_cal_q2 | 0.0281 | | loss_cal_q3 | 0.0293 | | loss_diff | 0.000918 | | loss_diff_q0 | 0.00209 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000309 | | loss_diff_q3 | 7.3e-05 | | loss_q0 | 0.00222 | | loss_q1 | 0.00115 | | loss_q2 | 0.000311 | | loss_q3 | 7.39e-05 | | param_norm | 248 | | samples | 2.09e+05 | | step | 2.61e+04 | | vb | 3.75e-05 | | vb_q0 | 0.000137 | | vb_q1 | 8.53e-06 | | vb_q2 | 2.76e-06 | | vb_q3 | 9.26e-07 | --------------------------- --------------------------- | grad_norm | 1.62 | | loss | 0.00142 | | loss_cal | 0.0279 | | loss_cal_q0 | 0.0297 | | loss_cal_q1 | 0.0268 | | loss_cal_q2 | 0.0286 | | loss_cal_q3 | 0.0262 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00289 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000306 | | loss_diff_q3 | 8.35e-05 | | loss_q0 | 0.00397 | | loss_q1 | 0.00117 | | loss_q2 | 0.000308 | | loss_q3 | 8.46e-05 | | param_norm | 248 | | samples | 2.1e+05 | | step | 2.62e+04 | | vb | 0.000288 | | vb_q0 | 0.00108 | | vb_q1 | 8.65e-06 | | vb_q2 | 2.77e-06 | | vb_q3 | 1.05e-06 | --------------------------- --------------------------- | grad_norm | 1.82 | | loss | 0.00141 | | loss_cal | 0.0285 | | loss_cal_q0 | 0.0276 | | loss_cal_q1 | 0.0287 | | loss_cal_q2 | 0.0274 | | loss_cal_q3 | 0.0299 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00291 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 8.17e-05 | | loss_q0 | 0.0045 | | loss_q1 | 0.00107 | | loss_q2 | 0.000272 | | loss_q3 | 8.27e-05 | | param_norm | 248 | | samples | 2.1e+05 | | step | 2.63e+04 | | vb | 0.000367 | | vb_q0 | 0.00159 | | vb_q1 | 7.89e-06 | | vb_q2 | 2.47e-06 | | vb_q3 | 1.03e-06 | --------------------------- --------------------------- | grad_norm | 1.57 | | loss | 0.000998 | | loss_cal | 0.0278 | | loss_cal_q0 | 0.0283 | | loss_cal_q1 | 0.0274 | | loss_cal_q2 | 0.0298 | | loss_cal_q3 | 0.0257 | | loss_diff | 0.000955 | | loss_diff_q0 | 0.0024 | | loss_diff_q1 | 0.0011 | | loss_diff_q2 | 0.000302 | | loss_diff_q3 | 7.6e-05 | | loss_q0 | 0.00256 | | loss_q1 | 0.00111 | | loss_q2 | 0.000305 | | loss_q3 | 7.69e-05 | | param_norm | 248 | | samples | 2.11e+05 | | step | 2.64e+04 | | vb | 4.32e-05 | | vb_q0 | 0.000164 | | vb_q1 | 8.2e-06 | | vb_q2 | 2.69e-06 | | vb_q3 | 9.55e-07 | --------------------------- --------------------------- | grad_norm | 1.47 | | loss | 0.00195 | | loss_cal | 0.0265 | | loss_cal_q0 | 0.0275 | | loss_cal_q1 | 0.0269 | | loss_cal_q2 | 0.0245 | | loss_cal_q3 | 0.0265 | | loss_diff | 0.00125 | | loss_diff_q0 | 0.00291 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000286 | | loss_diff_q3 | 9.7e-05 | | loss_q0 | 0.00414 | | loss_q1 | 0.00116 | | loss_q2 | 0.000288 | | loss_q3 | 9.83e-05 | | param_norm | 248 | | samples | 2.12e+05 | | step | 2.65e+04 | | vb | 0.000692 | | vb_q0 | 0.00123 | | vb_q1 | 8.58e-06 | | vb_q2 | 2.61e-06 | | vb_q3 | 1.24e-06 | --------------------------- --------------------------- | grad_norm | 1.68 | | loss | 0.00118 | | loss_cal | 0.0273 | | loss_cal_q0 | 0.0263 | | loss_cal_q1 | 0.0281 | | loss_cal_q2 | 0.0272 | | loss_cal_q3 | 0.0277 | | loss_diff | 0.00111 | | loss_diff_q0 | 0.00286 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000287 | | loss_diff_q3 | 7.87e-05 | | loss_q0 | 0.00311 | | loss_q1 | 0.00109 | | loss_q2 | 0.00029 | | loss_q3 | 7.97e-05 | | param_norm | 248 | | samples | 2.13e+05 | | step | 2.66e+04 | | vb | 6.86e-05 | | vb_q0 | 0.000248 | | vb_q1 | 8.02e-06 | | vb_q2 | 2.61e-06 | | vb_q3 | 1e-06 | --------------------------- --------------------------- | grad_norm | 1.55 | | loss | 0.00208 | | loss_cal | 0.0268 | | loss_cal_q0 | 0.0273 | | loss_cal_q1 | 0.0266 | | loss_cal_q2 | 0.0253 | | loss_cal_q3 | 0.0276 | | loss_diff | 0.00125 | | loss_diff_q0 | 0.00333 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000272 | | loss_diff_q3 | 8.03e-05 | | loss_q0 | 0.00637 | | loss_q1 | 0.00114 | | loss_q2 | 0.000274 | | loss_q3 | 8.13e-05 | | param_norm | 248 | | samples | 2.14e+05 | | step | 2.67e+04 | | vb | 0.000831 | | vb_q0 | 0.00304 | | vb_q1 | 8.39e-06 | | vb_q2 | 2.46e-06 | | vb_q3 | 1.02e-06 | --------------------------- --------------------------- | grad_norm | 1.78 | | loss | 0.00121 | | loss_cal | 0.0288 | | loss_cal_q0 | 0.0335 | | loss_cal_q1 | 0.0285 | | loss_cal_q2 | 0.0278 | | loss_cal_q3 | 0.0254 | | loss_diff | 0.00114 | | loss_diff_q0 | 0.00309 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 8.47e-05 | | loss_q0 | 0.00337 | | loss_q1 | 0.00114 | | loss_q2 | 0.000298 | | loss_q3 | 8.57e-05 | | param_norm | 248 | | samples | 2.14e+05 | | step | 2.68e+04 | | vb | 7.18e-05 | | vb_q0 | 0.000287 | | vb_q1 | 8.45e-06 | | vb_q2 | 2.66e-06 | | vb_q3 | 1.08e-06 | --------------------------- --------------------------- | grad_norm | 1.57 | | loss | 0.00101 | | loss_cal | 0.0267 | | loss_cal_q0 | 0.026 | | loss_cal_q1 | 0.0275 | | loss_cal_q2 | 0.0276 | | loss_cal_q3 | 0.0256 | | loss_diff | 0.000964 | | loss_diff_q0 | 0.00259 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000274 | | loss_diff_q3 | 7.06e-05 | | loss_q0 | 0.00278 | | loss_q1 | 0.00104 | | loss_q2 | 0.000276 | | loss_q3 | 7.15e-05 | | param_norm | 248 | | samples | 2.15e+05 | | step | 2.69e+04 | | vb | 4.64e-05 | | vb_q0 | 0.000189 | | vb_q1 | 7.67e-06 | | vb_q2 | 2.47e-06 | | vb_q3 | 9.12e-07 | --------------------------- --------------------------- | grad_norm | 1.67 | | loss | 0.00394 | | loss_cal | 0.0265 | | loss_cal_q0 | 0.0255 | | loss_cal_q1 | 0.0269 | | loss_cal_q2 | 0.0267 | | loss_cal_q3 | 0.0268 | | loss_diff | 0.00149 | | loss_diff_q0 | 0.00441 | | loss_diff_q1 | 0.00119 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.0144 | | loss_q1 | 0.0012 | | loss_q2 | 0.000298 | | loss_q3 | 0.000108 | | param_norm | 248 | | samples | 2.16e+05 | | step | 2.7e+04 | | vb | 0.00246 | | vb_q0 | 0.01 | | vb_q1 | 8.8e-06 | | vb_q2 | 2.69e-06 | | vb_q3 | 1.35e-06 | --------------------------- --------------------------- | grad_norm | 1.65 | | loss | 0.00124 | | loss_cal | 0.0277 | | loss_cal_q0 | 0.0284 | | loss_cal_q1 | 0.0271 | | loss_cal_q2 | 0.0272 | | loss_cal_q3 | 0.0274 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00253 | | loss_diff_q1 | 0.00109 | | loss_diff_q2 | 0.000276 | | loss_diff_q3 | 8.2e-05 | | loss_q0 | 0.00336 | | loss_q1 | 0.0011 | | loss_q2 | 0.000279 | | loss_q3 | 8.31e-05 | | param_norm | 248 | | samples | 2.17e+05 | | step | 2.71e+04 | | vb | 0.000217 | | vb_q0 | 0.000824 | | vb_q1 | 8.13e-06 | | vb_q2 | 2.5e-06 | | vb_q3 | 1.04e-06 | --------------------------- --------------------------- | grad_norm | 1.57 | | loss | 0.00104 | | loss_cal | 0.0268 | | loss_cal_q0 | 0.0251 | | loss_cal_q1 | 0.0247 | | loss_cal_q2 | 0.0275 | | loss_cal_q3 | 0.0303 | | loss_diff | 0.000982 | | loss_diff_q0 | 0.0024 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000282 | | loss_diff_q3 | 7.08e-05 | | loss_q0 | 0.00262 | | loss_q1 | 0.00112 | | loss_q2 | 0.000284 | | loss_q3 | 7.17e-05 | | param_norm | 248 | | samples | 2.18e+05 | | step | 2.72e+04 | | vb | 5.91e-05 | | vb_q0 | 0.00022 | | vb_q1 | 8.3e-06 | | vb_q2 | 2.55e-06 | | vb_q3 | 9.02e-07 | --------------------------- --------------------------- | grad_norm | 1.64 | | loss | 0.000972 | | loss_cal | 0.0275 | | loss_cal_q0 | 0.0264 | | loss_cal_q1 | 0.0263 | | loss_cal_q2 | 0.0275 | | loss_cal_q3 | 0.0302 | | loss_diff | 0.000926 | | loss_diff_q0 | 0.00224 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 6.49e-05 | | loss_q0 | 0.00242 | | loss_q1 | 0.00104 | | loss_q2 | 0.000264 | | loss_q3 | 6.57e-05 | | param_norm | 248 | | samples | 2.18e+05 | | step | 2.73e+04 | | vb | 4.64e-05 | | vb_q0 | 0.000177 | | vb_q1 | 7.73e-06 | | vb_q2 | 2.39e-06 | | vb_q3 | 8.25e-07 | --------------------------- --------------------------- | grad_norm | 1.51 | | loss | 0.00187 | | loss_cal | 0.0265 | | loss_cal_q0 | 0.0282 | | loss_cal_q1 | 0.0242 | | loss_cal_q2 | 0.0247 | | loss_cal_q3 | 0.0288 | | loss_diff | 0.00115 | | loss_diff_q0 | 0.00318 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 9.17e-05 | | loss_q0 | 0.00597 | | loss_q1 | 0.00107 | | loss_q2 | 0.000272 | | loss_q3 | 9.29e-05 | | param_norm | 248 | | samples | 2.19e+05 | | step | 2.74e+04 | | vb | 0.000718 | | vb_q0 | 0.00279 | | vb_q1 | 7.85e-06 | | vb_q2 | 2.45e-06 | | vb_q3 | 1.17e-06 | --------------------------- --------------------------- | grad_norm | 1.76 | | loss | 0.00132 | | loss_cal | 0.0277 | | loss_cal_q0 | 0.0263 | | loss_cal_q1 | 0.0262 | | loss_cal_q2 | 0.0299 | | loss_cal_q3 | 0.0288 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00298 | | loss_diff_q1 | 0.0011 | | loss_diff_q2 | 0.000276 | | loss_diff_q3 | 7.16e-05 | | loss_q0 | 0.00396 | | loss_q1 | 0.00111 | | loss_q2 | 0.000278 | | loss_q3 | 7.25e-05 | | param_norm | 249 | | samples | 2.2e+05 | | step | 2.75e+04 | | vb | 0.000239 | | vb_q0 | 0.000979 | | vb_q1 | 8.21e-06 | | vb_q2 | 2.5e-06 | | vb_q3 | 9.06e-07 | --------------------------- --------------------------- | grad_norm | 1.58 | | loss | 0.00131 | | loss_cal | 0.028 | | loss_cal_q0 | 0.0276 | | loss_cal_q1 | 0.0327 | | loss_cal_q2 | 0.0237 | | loss_cal_q3 | 0.0254 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00256 | | loss_diff_q1 | 0.00119 | | loss_diff_q2 | 0.00026 | | loss_diff_q3 | 6.95e-05 | | loss_q0 | 0.00371 | | loss_q1 | 0.0012 | | loss_q2 | 0.000262 | | loss_q3 | 7.04e-05 | | param_norm | 249 | | samples | 2.21e+05 | | step | 2.76e+04 | | vb | 0.000286 | | vb_q0 | 0.00115 | | vb_q1 | 8.83e-06 | | vb_q2 | 2.35e-06 | | vb_q3 | 8.8e-07 | --------------------------- --------------------------- | grad_norm | 1.39 | | loss | 0.00213 | | loss_cal | 0.0257 | | loss_cal_q0 | 0.026 | | loss_cal_q1 | 0.025 | | loss_cal_q2 | 0.0264 | | loss_cal_q3 | 0.0259 | | loss_diff | 0.00127 | | loss_diff_q0 | 0.00366 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000316 | | loss_diff_q3 | 0.000111 | | loss_q0 | 0.00749 | | loss_q1 | 0.00118 | | loss_q2 | 0.000318 | | loss_q3 | 0.000113 | | param_norm | 249 | | samples | 2.22e+05 | | step | 2.77e+04 | | vb | 0.000861 | | vb_q0 | 0.00383 | | vb_q1 | 8.72e-06 | | vb_q2 | 2.85e-06 | | vb_q3 | 1.44e-06 | --------------------------- --------------------------- | grad_norm | 1.65 | | loss | 0.00093 | | loss_cal | 0.0272 | | loss_cal_q0 | 0.027 | | loss_cal_q1 | 0.0269 | | loss_cal_q2 | 0.0262 | | loss_cal_q3 | 0.0288 | | loss_diff | 0.000907 | | loss_diff_q0 | 0.00215 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000269 | | loss_diff_q3 | 7.41e-05 | | loss_q0 | 0.00223 | | loss_q1 | 0.00113 | | loss_q2 | 0.000272 | | loss_q3 | 7.5e-05 | | param_norm | 249 | | samples | 2.22e+05 | | step | 2.78e+04 | | vb | 2.28e-05 | | vb_q0 | 7.88e-05 | | vb_q1 | 8.35e-06 | | vb_q2 | 2.44e-06 | | vb_q3 | 9.48e-07 | --------------------------- --------------------------- | grad_norm | 1.68 | | loss | 0.000955 | | loss_cal | 0.0269 | | loss_cal_q0 | 0.0237 | | loss_cal_q1 | 0.0278 | | loss_cal_q2 | 0.0284 | | loss_cal_q3 | 0.0279 | | loss_diff | 0.000921 | | loss_diff_q0 | 0.00211 | | loss_diff_q1 | 0.00109 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 8.41e-05 | | loss_q0 | 0.00223 | | loss_q1 | 0.0011 | | loss_q2 | 0.000298 | | loss_q3 | 8.52e-05 | | param_norm | 249 | | samples | 2.23e+05 | | step | 2.79e+04 | | vb | 3.41e-05 | | vb_q0 | 0.000118 | | vb_q1 | 8.06e-06 | | vb_q2 | 2.7e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.58 | | loss | 0.00102 | | loss_cal | 0.0267 | | loss_cal_q0 | 0.0263 | | loss_cal_q1 | 0.0247 | | loss_cal_q2 | 0.0274 | | loss_cal_q3 | 0.0287 | | loss_diff | 0.000982 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000292 | | loss_diff_q3 | 7.87e-05 | | loss_q0 | 0.00252 | | loss_q1 | 0.00108 | | loss_q2 | 0.000295 | | loss_q3 | 7.97e-05 | | param_norm | 249 | | samples | 2.24e+05 | | step | 2.8e+04 | | vb | 3.66e-05 | | vb_q0 | 0.00013 | | vb_q1 | 7.96e-06 | | vb_q2 | 2.62e-06 | | vb_q3 | 1.01e-06 | --------------------------- --------------------------- | grad_norm | 1.69 | | loss | 0.00112 | | loss_cal | 0.0282 | | loss_cal_q0 | 0.0263 | | loss_cal_q1 | 0.0289 | | loss_cal_q2 | 0.0308 | | loss_cal_q3 | 0.0271 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00263 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000289 | | loss_diff_q3 | 8.79e-05 | | loss_q0 | 0.00286 | | loss_q1 | 0.00115 | | loss_q2 | 0.000292 | | loss_q3 | 8.91e-05 | | param_norm | 249 | | samples | 2.25e+05 | | step | 2.81e+04 | | vb | 6.39e-05 | | vb_q0 | 0.000231 | | vb_q1 | 8.48e-06 | | vb_q2 | 2.63e-06 | | vb_q3 | 1.12e-06 | --------------------------- --------------------------- | grad_norm | 1.58 | | loss | 0.000979 | | loss_cal | 0.0268 | | loss_cal_q0 | 0.0281 | | loss_cal_q1 | 0.0258 | | loss_cal_q2 | 0.0254 | | loss_cal_q3 | 0.0284 | | loss_diff | 0.000934 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000289 | | loss_diff_q3 | 7.32e-05 | | loss_q0 | 0.00258 | | loss_q1 | 0.00112 | | loss_q2 | 0.000292 | | loss_q3 | 7.41e-05 | | param_norm | 249 | | samples | 2.26e+05 | | step | 2.82e+04 | | vb | 4.56e-05 | | vb_q0 | 0.000185 | | vb_q1 | 8.26e-06 | | vb_q2 | 2.58e-06 | | vb_q3 | 9.28e-07 | --------------------------- --------------------------- | grad_norm | 1.54 | | loss | 0.0226 | | loss_cal | 0.0256 | | loss_cal_q0 | 0.0253 | | loss_cal_q1 | 0.0237 | | loss_cal_q2 | 0.0268 | | loss_cal_q3 | 0.0268 | | loss_diff | 0.00472 | | loss_diff_q0 | 0.016 | | loss_diff_q1 | 0.00184 | | loss_diff_q2 | 0.000725 | | loss_diff_q3 | 0.000343 | | loss_q0 | 0.0883 | | loss_q1 | 0.00185 | | loss_q2 | 0.000732 | | loss_q3 | 0.000348 | | param_norm | 249 | | samples | 2.26e+05 | | step | 2.83e+04 | | vb | 0.0179 | | vb_q0 | 0.0724 | | vb_q1 | 1.37e-05 | | vb_q2 | 6.59e-06 | | vb_q3 | 4.4e-06 | --------------------------- --------------------------- | grad_norm | 1.58 | | loss | 0.00124 | | loss_cal | 0.0272 | | loss_cal_q0 | 0.0297 | | loss_cal_q1 | 0.025 | | loss_cal_q2 | 0.0263 | | loss_cal_q3 | 0.0268 | | loss_diff | 0.0012 | | loss_diff_q0 | 0.00305 | | loss_diff_q1 | 0.00131 | | loss_diff_q2 | 0.00032 | | loss_diff_q3 | 0.000113 | | loss_q0 | 0.00319 | | loss_q1 | 0.00132 | | loss_q2 | 0.000323 | | loss_q3 | 0.000115 | | param_norm | 249 | | samples | 2.27e+05 | | step | 2.84e+04 | | vb | 3.72e-05 | | vb_q0 | 0.000135 | | vb_q1 | 9.78e-06 | | vb_q2 | 2.94e-06 | | vb_q3 | 1.43e-06 | --------------------------- --------------------------- | grad_norm | 1.65 | | loss | 0.00199 | | loss_cal | 0.0264 | | loss_cal_q0 | 0.0269 | | loss_cal_q1 | 0.0278 | | loss_cal_q2 | 0.0262 | | loss_cal_q3 | 0.0246 | | loss_diff | 0.00128 | | loss_diff_q0 | 0.00359 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000294 | | loss_diff_q3 | 8.79e-05 | | loss_q0 | 0.0065 | | loss_q1 | 0.00122 | | loss_q2 | 0.000296 | | loss_q3 | 8.9e-05 | | param_norm | 249 | | samples | 2.28e+05 | | step | 2.85e+04 | | vb | 0.000708 | | vb_q0 | 0.0029 | | vb_q1 | 9.06e-06 | | vb_q2 | 2.68e-06 | | vb_q3 | 1.11e-06 | --------------------------- --------------------------- | grad_norm | 1.52 | | loss | 0.000983 | | loss_cal | 0.0249 | | loss_cal_q0 | 0.0261 | | loss_cal_q1 | 0.0253 | | loss_cal_q2 | 0.0248 | | loss_cal_q3 | 0.0239 | | loss_diff | 0.000958 | | loss_diff_q0 | 0.00242 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000282 | | loss_diff_q3 | 7.21e-05 | | loss_q0 | 0.00251 | | loss_q1 | 0.00106 | | loss_q2 | 0.000284 | | loss_q3 | 7.3e-05 | | param_norm | 249 | | samples | 2.29e+05 | | step | 2.86e+04 | | vb | 2.56e-05 | | vb_q0 | 9.08e-05 | | vb_q1 | 7.88e-06 | | vb_q2 | 2.55e-06 | | vb_q3 | 9.18e-07 | --------------------------- --------------------------- | grad_norm | 1.5 | | loss | 0.00111 | | loss_cal | 0.0263 | | loss_cal_q0 | 0.0297 | | loss_cal_q1 | 0.0265 | | loss_cal_q2 | 0.0229 | | loss_cal_q3 | 0.0253 | | loss_diff | 0.00107 | | loss_diff_q0 | 0.0025 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000271 | | loss_diff_q3 | 7.52e-05 | | loss_q0 | 0.00263 | | loss_q1 | 0.00122 | | loss_q2 | 0.000273 | | loss_q3 | 7.62e-05 | | param_norm | 249 | | samples | 2.3e+05 | | step | 2.87e+04 | | vb | 3.96e-05 | | vb_q0 | 0.000134 | | vb_q1 | 9.02e-06 | | vb_q2 | 2.47e-06 | | vb_q3 | 9.6e-07 | --------------------------- --------------------------- | grad_norm | 1.44 | | loss | 0.00148 | | loss_cal | 0.0258 | | loss_cal_q0 | 0.0246 | | loss_cal_q1 | 0.0271 | | loss_cal_q2 | 0.0235 | | loss_cal_q3 | 0.0278 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00283 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000293 | | loss_diff_q3 | 8.41e-05 | | loss_q0 | 0.0044 | | loss_q1 | 0.00109 | | loss_q2 | 0.000296 | | loss_q3 | 8.52e-05 | | param_norm | 249 | | samples | 2.3e+05 | | step | 2.88e+04 | | vb | 0.000395 | | vb_q0 | 0.00158 | | vb_q1 | 8.03e-06 | | vb_q2 | 2.64e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.42 | | loss | 0.00101 | | loss_cal | 0.0259 | | loss_cal_q0 | 0.0261 | | loss_cal_q1 | 0.0235 | | loss_cal_q2 | 0.0282 | | loss_cal_q3 | 0.0259 | | loss_diff | 0.000955 | | loss_diff_q0 | 0.00229 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 6.64e-05 | | loss_q0 | 0.00247 | | loss_q1 | 0.00107 | | loss_q2 | 0.000264 | | loss_q3 | 6.72e-05 | | param_norm | 249 | | samples | 2.31e+05 | | step | 2.89e+04 | | vb | 5.05e-05 | | vb_q0 | 0.000178 | | vb_q1 | 7.86e-06 | | vb_q2 | 2.38e-06 | | vb_q3 | 8.28e-07 | --------------------------- --------------------------- | grad_norm | 1.45 | | loss | 0.000888 | | loss_cal | 0.0239 | | loss_cal_q0 | 0.0255 | | loss_cal_q1 | 0.0253 | | loss_cal_q2 | 0.0214 | | loss_cal_q3 | 0.0239 | | loss_diff | 0.000867 | | loss_diff_q0 | 0.00216 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.00025 | | loss_diff_q3 | 7.45e-05 | | loss_q0 | 0.00223 | | loss_q1 | 0.00103 | | loss_q2 | 0.000252 | | loss_q3 | 7.55e-05 | | param_norm | 250 | | samples | 2.32e+05 | | step | 2.9e+04 | | vb | 2.14e-05 | | vb_q0 | 7.74e-05 | | vb_q1 | 7.64e-06 | | vb_q2 | 2.25e-06 | | vb_q3 | 9.34e-07 | --------------------------- --------------------------- | grad_norm | 1.3 | | loss | 0.000966 | | loss_cal | 0.0249 | | loss_cal_q0 | 0.0242 | | loss_cal_q1 | 0.024 | | loss_cal_q2 | 0.0254 | | loss_cal_q3 | 0.0261 | | loss_diff | 0.000938 | | loss_diff_q0 | 0.00233 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000276 | | loss_diff_q3 | 6.67e-05 | | loss_q0 | 0.00243 | | loss_q1 | 0.00102 | | loss_q2 | 0.000278 | | loss_q3 | 6.75e-05 | | param_norm | 250 | | samples | 2.33e+05 | | step | 2.91e+04 | | vb | 2.81e-05 | | vb_q0 | 0.000101 | | vb_q1 | 7.55e-06 | | vb_q2 | 2.5e-06 | | vb_q3 | 8.4e-07 | --------------------------- --------------------------- | grad_norm | 1.55 | | loss | 0.00108 | | loss_cal | 0.0258 | | loss_cal_q0 | 0.0253 | | loss_cal_q1 | 0.0248 | | loss_cal_q2 | 0.0276 | | loss_cal_q3 | 0.0254 | | loss_diff | 0.000998 | | loss_diff_q0 | 0.00262 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.00026 | | loss_diff_q3 | 6.29e-05 | | loss_q0 | 0.00294 | | loss_q1 | 0.00109 | | loss_q2 | 0.000263 | | loss_q3 | 6.37e-05 | | param_norm | 250 | | samples | 2.34e+05 | | step | 2.92e+04 | | vb | 7.77e-05 | | vb_q0 | 0.000313 | | vb_q1 | 8.06e-06 | | vb_q2 | 2.37e-06 | | vb_q3 | 7.94e-07 | --------------------------- --------------------------- | grad_norm | 1.51 | | loss | 0.00099 | | loss_cal | 0.025 | | loss_cal_q0 | 0.0223 | | loss_cal_q1 | 0.0271 | | loss_cal_q2 | 0.0245 | | loss_cal_q3 | 0.0272 | | loss_diff | 0.000965 | | loss_diff_q0 | 0.00223 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000251 | | loss_diff_q3 | 6.33e-05 | | loss_q0 | 0.00231 | | loss_q1 | 0.00112 | | loss_q2 | 0.000254 | | loss_q3 | 6.41e-05 | | param_norm | 250 | | samples | 2.34e+05 | | step | 2.93e+04 | | vb | 2.46e-05 | | vb_q0 | 7.95e-05 | | vb_q1 | 8.26e-06 | | vb_q2 | 2.27e-06 | | vb_q3 | 8.06e-07 | --------------------------- --------------------------- | grad_norm | 1.59 | | loss | 0.000914 | | loss_cal | 0.0259 | | loss_cal_q0 | 0.0261 | | loss_cal_q1 | 0.024 | | loss_cal_q2 | 0.0263 | | loss_cal_q3 | 0.0272 | | loss_diff | 0.00089 | | loss_diff_q0 | 0.00217 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000285 | | loss_diff_q3 | 7.06e-05 | | loss_q0 | 0.00225 | | loss_q1 | 0.00106 | | loss_q2 | 0.000287 | | loss_q3 | 7.15e-05 | | param_norm | 250 | | samples | 2.35e+05 | | step | 2.94e+04 | | vb | 2.38e-05 | | vb_q0 | 8.33e-05 | | vb_q1 | 7.83e-06 | | vb_q2 | 2.56e-06 | | vb_q3 | 8.97e-07 | --------------------------- --------------------------- | grad_norm | 1.49 | | loss | 0.00257 | | loss_cal | 0.0253 | | loss_cal_q0 | 0.0231 | | loss_cal_q1 | 0.0263 | | loss_cal_q2 | 0.0255 | | loss_cal_q3 | 0.0263 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00317 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000285 | | loss_diff_q3 | 6.93e-05 | | loss_q0 | 0.00899 | | loss_q1 | 0.00107 | | loss_q2 | 0.000288 | | loss_q3 | 7.02e-05 | | param_norm | 250 | | samples | 2.36e+05 | | step | 2.95e+04 | | vb | 0.00144 | | vb_q0 | 0.00582 | | vb_q1 | 7.93e-06 | | vb_q2 | 2.58e-06 | | vb_q3 | 8.78e-07 | --------------------------- --------------------------- | grad_norm | 1.49 | | loss | 0.00218 | | loss_cal | 0.0252 | | loss_cal_q0 | 0.0259 | | loss_cal_q1 | 0.0234 | | loss_cal_q2 | 0.0259 | | loss_cal_q3 | 0.0258 | | loss_diff | 0.00127 | | loss_diff_q0 | 0.00363 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000298 | | loss_diff_q3 | 7.86e-05 | | loss_q0 | 0.00728 | | loss_q1 | 0.00102 | | loss_q2 | 0.000301 | | loss_q3 | 7.96e-05 | | param_norm | 250 | | samples | 2.37e+05 | | step | 2.96e+04 | | vb | 0.000907 | | vb_q0 | 0.00365 | | vb_q1 | 7.53e-06 | | vb_q2 | 2.68e-06 | | vb_q3 | 1.01e-06 | --------------------------- --------------------------- | grad_norm | 1.38 | | loss | 0.00287 | | loss_cal | 0.0244 | | loss_cal_q0 | 0.0268 | | loss_cal_q1 | 0.0218 | | loss_cal_q2 | 0.0251 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.0014 | | loss_diff_q0 | 0.0039 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000295 | | loss_diff_q3 | 8.77e-05 | | loss_q0 | 0.00931 | | loss_q1 | 0.00107 | | loss_q2 | 0.000298 | | loss_q3 | 8.88e-05 | | param_norm | 250 | | samples | 2.38e+05 | | step | 2.97e+04 | | vb | 0.00147 | | vb_q0 | 0.0054 | | vb_q1 | 7.94e-06 | | vb_q2 | 2.68e-06 | | vb_q3 | 1.11e-06 | --------------------------- --------------------------- | grad_norm | 1.51 | | loss | 0.00102 | | loss_cal | 0.0251 | | loss_cal_q0 | 0.0248 | | loss_cal_q1 | 0.0241 | | loss_cal_q2 | 0.0251 | | loss_cal_q3 | 0.0254 | | loss_diff | 0.000967 | | loss_diff_q0 | 0.00266 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000286 | | loss_diff_q3 | 8.17e-05 | | loss_q0 | 0.00289 | | loss_q1 | 0.00104 | | loss_q2 | 0.000289 | | loss_q3 | 8.27e-05 | | param_norm | 250 | | samples | 2.38e+05 | | step | 2.98e+04 | | vb | 5.47e-05 | | vb_q0 | 0.000228 | | vb_q1 | 7.67e-06 | | vb_q2 | 2.58e-06 | | vb_q3 | 1.05e-06 | --------------------------- --------------------------- | grad_norm | 1.41 | | loss | 0.00137 | | loss_cal | 0.0251 | | loss_cal_q0 | 0.0258 | | loss_cal_q1 | 0.0241 | | loss_cal_q2 | 0.0258 | | loss_cal_q3 | 0.0248 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00262 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000276 | | loss_diff_q3 | 6.36e-05 | | loss_q0 | 0.00394 | | loss_q1 | 0.00108 | | loss_q2 | 0.000279 | | loss_q3 | 6.44e-05 | | param_norm | 250 | | samples | 2.39e+05 | | step | 2.99e+04 | | vb | 0.000341 | | vb_q0 | 0.00132 | | vb_q1 | 7.93e-06 | | vb_q2 | 2.53e-06 | | vb_q3 | 8.05e-07 | --------------------------- --------------------------- | grad_norm | 1.51 | | loss | 0.00139 | | loss_cal | 0.0258 | | loss_cal_q0 | 0.0283 | | loss_cal_q1 | 0.0264 | | loss_cal_q2 | 0.0239 | | loss_cal_q3 | 0.025 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00297 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000266 | | loss_diff_q3 | 7.88e-05 | | loss_q0 | 0.00448 | | loss_q1 | 0.00106 | | loss_q2 | 0.000268 | | loss_q3 | 7.99e-05 | | param_norm | 250 | | samples | 2.4e+05 | | step | 3e+04 | | vb | 0.000347 | | vb_q0 | 0.00151 | | vb_q1 | 7.8e-06 | | vb_q2 | 2.43e-06 | | vb_q3 | 1e-06 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 1.4 | | loss | 0.000914 | | loss_cal | 0.0241 | | loss_cal_q0 | 0.0242 | | loss_cal_q1 | 0.027 | | loss_cal_q2 | 0.0232 | | loss_cal_q3 | 0.0226 | | loss_diff | 0.00088 | | loss_diff_q0 | 0.00223 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000258 | | loss_diff_q3 | 6.92e-05 | | loss_q0 | 0.00236 | | loss_q1 | 0.00104 | | loss_q2 | 0.000261 | | loss_q3 | 7.01e-05 | | param_norm | 250 | | samples | 2.41e+05 | | step | 3.01e+04 | | vb | 3.4e-05 | | vb_q0 | 0.000127 | | vb_q1 | 7.63e-06 | | vb_q2 | 2.34e-06 | | vb_q3 | 8.92e-07 | --------------------------- --------------------------- | grad_norm | 1.9 | | loss | 0.00125 | | loss_cal | 0.0248 | | loss_cal_q0 | 0.0235 | | loss_cal_q1 | 0.0241 | | loss_cal_q2 | 0.0259 | | loss_cal_q3 | 0.0259 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.0026 | | loss_diff_q1 | 0.000999 | | loss_diff_q2 | 0.000269 | | loss_diff_q3 | 7.59e-05 | | loss_q0 | 0.00342 | | loss_q1 | 0.00101 | | loss_q2 | 0.000271 | | loss_q3 | 7.68e-05 | | param_norm | 250 | | samples | 2.42e+05 | | step | 3.02e+04 | | vb | 0.000221 | | vb_q0 | 0.000815 | | vb_q1 | 7.45e-06 | | vb_q2 | 2.43e-06 | | vb_q3 | 9.5e-07 | --------------------------- --------------------------- | grad_norm | 1.44 | | loss | 0.000894 | | loss_cal | 0.0255 | | loss_cal_q0 | 0.0283 | | loss_cal_q1 | 0.0267 | | loss_cal_q2 | 0.025 | | loss_cal_q3 | 0.0219 | | loss_diff | 0.000867 | | loss_diff_q0 | 0.00209 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.00025 | | loss_diff_q3 | 6.69e-05 | | loss_q0 | 0.00218 | | loss_q1 | 0.00105 | | loss_q2 | 0.000253 | | loss_q3 | 6.77e-05 | | param_norm | 250 | | samples | 2.42e+05 | | step | 3.03e+04 | | vb | 2.71e-05 | | vb_q0 | 9.56e-05 | | vb_q1 | 7.78e-06 | | vb_q2 | 2.27e-06 | | vb_q3 | 8.39e-07 | --------------------------- --------------------------- | grad_norm | 1.45 | | loss | 0.00101 | | loss_cal | 0.0249 | | loss_cal_q0 | 0.0256 | | loss_cal_q1 | 0.0248 | | loss_cal_q2 | 0.0263 | | loss_cal_q3 | 0.023 | | loss_diff | 0.000963 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 8.32e-05 | | loss_q0 | 0.00258 | | loss_q1 | 0.00114 | | loss_q2 | 0.000273 | | loss_q3 | 8.42e-05 | | param_norm | 250 | | samples | 2.43e+05 | | step | 3.04e+04 | | vb | 4.98e-05 | | vb_q0 | 0.00019 | | vb_q1 | 8.42e-06 | | vb_q2 | 2.46e-06 | | vb_q3 | 1.05e-06 | --------------------------- --------------------------- | grad_norm | 1.46 | | loss | 0.00133 | | loss_cal | 0.0252 | | loss_cal_q0 | 0.0246 | | loss_cal_q1 | 0.0299 | | loss_cal_q2 | 0.0208 | | loss_cal_q3 | 0.0258 | | loss_diff | 0.00098 | | loss_diff_q0 | 0.00242 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.00026 | | loss_diff_q3 | 7.01e-05 | | loss_q0 | 0.00375 | | loss_q1 | 0.00108 | | loss_q2 | 0.000263 | | loss_q3 | 7.1e-05 | | param_norm | 250 | | samples | 2.44e+05 | | step | 3.05e+04 | | vb | 0.000353 | | vb_q0 | 0.00133 | | vb_q1 | 7.93e-06 | | vb_q2 | 2.35e-06 | | vb_q3 | 9e-07 | --------------------------- --------------------------- | grad_norm | 1.41 | | loss | 0.000884 | | loss_cal | 0.0241 | | loss_cal_q0 | 0.0229 | | loss_cal_q1 | 0.0245 | | loss_cal_q2 | 0.0239 | | loss_cal_q3 | 0.0252 | | loss_diff | 0.00086 | | loss_diff_q0 | 0.00217 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000256 | | loss_diff_q3 | 7.07e-05 | | loss_q0 | 0.00226 | | loss_q1 | 0.00109 | | loss_q2 | 0.000258 | | loss_q3 | 7.16e-05 | | param_norm | 250 | | samples | 2.45e+05 | | step | 3.06e+04 | | vb | 2.37e-05 | | vb_q0 | 9.15e-05 | | vb_q1 | 8.02e-06 | | vb_q2 | 2.33e-06 | | vb_q3 | 9.05e-07 | --------------------------- --------------------------- | grad_norm | 1.79 | | loss | 0.00138 | | loss_cal | 0.0249 | | loss_cal_q0 | 0.0246 | | loss_cal_q1 | 0.0231 | | loss_cal_q2 | 0.0259 | | loss_cal_q3 | 0.0264 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00243 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000256 | | loss_diff_q3 | 6.87e-05 | | loss_q0 | 0.00369 | | loss_q1 | 0.00115 | | loss_q2 | 0.000259 | | loss_q3 | 6.96e-05 | | param_norm | 250 | | samples | 2.46e+05 | | step | 3.07e+04 | | vb | 0.000348 | | vb_q0 | 0.00125 | | vb_q1 | 8.45e-06 | | vb_q2 | 2.34e-06 | | vb_q3 | 8.86e-07 | --------------------------- --------------------------- | grad_norm | 1.38 | | loss | 0.00115 | | loss_cal | 0.024 | | loss_cal_q0 | 0.0253 | | loss_cal_q1 | 0.0253 | | loss_cal_q2 | 0.022 | | loss_cal_q3 | 0.0229 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00274 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000272 | | loss_diff_q3 | 7.47e-05 | | loss_q0 | 0.00292 | | loss_q1 | 0.00115 | | loss_q2 | 0.000275 | | loss_q3 | 7.57e-05 | | param_norm | 250 | | samples | 2.46e+05 | | step | 3.08e+04 | | vb | 5.01e-05 | | vb_q0 | 0.000177 | | vb_q1 | 8.47e-06 | | vb_q2 | 2.44e-06 | | vb_q3 | 9.67e-07 | --------------------------- --------------------------- | grad_norm | 1.46 | | loss | 0.00153 | | loss_cal | 0.0252 | | loss_cal_q0 | 0.0254 | | loss_cal_q1 | 0.0256 | | loss_cal_q2 | 0.0238 | | loss_cal_q3 | 0.0262 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00297 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000296 | | loss_diff_q3 | 8.58e-05 | | loss_q0 | 0.0048 | | loss_q1 | 0.00105 | | loss_q2 | 0.000298 | | loss_q3 | 8.69e-05 | | param_norm | 250 | | samples | 2.47e+05 | | step | 3.09e+04 | | vb | 0.000453 | | vb_q0 | 0.00183 | | vb_q1 | 7.8e-06 | | vb_q2 | 2.65e-06 | | vb_q3 | 1.09e-06 | --------------------------- --------------------------- | grad_norm | 1.4 | | loss | 0.00124 | | loss_cal | 0.0246 | | loss_cal_q0 | 0.0273 | | loss_cal_q1 | 0.0254 | | loss_cal_q2 | 0.0233 | | loss_cal_q3 | 0.022 | | loss_diff | 0.00116 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.00029 | | loss_diff_q3 | 7.8e-05 | | loss_q0 | 0.00323 | | loss_q1 | 0.00108 | | loss_q2 | 0.000293 | | loss_q3 | 7.9e-05 | | param_norm | 250 | | samples | 2.48e+05 | | step | 3.1e+04 | | vb | 8.01e-05 | | vb_q0 | 0.000286 | | vb_q1 | 7.96e-06 | | vb_q2 | 2.61e-06 | | vb_q3 | 9.91e-07 | --------------------------- --------------------------- | grad_norm | 1.4 | | loss | 0.00111 | | loss_cal | 0.0236 | | loss_cal_q0 | 0.0219 | | loss_cal_q1 | 0.0253 | | loss_cal_q2 | 0.0244 | | loss_cal_q3 | 0.0226 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00251 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 7.7e-05 | | loss_q0 | 0.00271 | | loss_q1 | 0.00117 | | loss_q2 | 0.000273 | | loss_q3 | 7.8e-05 | | param_norm | 251 | | samples | 2.49e+05 | | step | 3.11e+04 | | vb | 5.96e-05 | | vb_q0 | 0.000208 | | vb_q1 | 8.6e-06 | | vb_q2 | 2.45e-06 | | vb_q3 | 9.72e-07 | --------------------------- --------------------------- | grad_norm | 1.37 | | loss | 0.00313 | | loss_cal | 0.024 | | loss_cal_q0 | 0.0231 | | loss_cal_q1 | 0.0227 | | loss_cal_q2 | 0.0248 | | loss_cal_q3 | 0.0255 | | loss_diff | 0.00139 | | loss_diff_q0 | 0.00464 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000324 | | loss_diff_q3 | 9.51e-05 | | loss_q0 | 0.013 | | loss_q1 | 0.00113 | | loss_q2 | 0.000327 | | loss_q3 | 9.64e-05 | | param_norm | 251 | | samples | 2.5e+05 | | step | 3.12e+04 | | vb | 0.00174 | | vb_q0 | 0.00838 | | vb_q1 | 8.31e-06 | | vb_q2 | 2.9e-06 | | vb_q3 | 1.22e-06 | --------------------------- --------------------------- | grad_norm | 1.33 | | loss | 0.00145 | | loss_cal | 0.023 | | loss_cal_q0 | 0.0217 | | loss_cal_q1 | 0.0244 | | loss_cal_q2 | 0.0224 | | loss_cal_q3 | 0.0238 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.00363 | | loss_diff_q1 | 0.00124 | | loss_diff_q2 | 0.000315 | | loss_diff_q3 | 0.000117 | | loss_q0 | 0.00405 | | loss_q1 | 0.00125 | | loss_q2 | 0.000318 | | loss_q3 | 0.000119 | | param_norm | 251 | | samples | 2.5e+05 | | step | 3.13e+04 | | vb | 0.000109 | | vb_q0 | 0.000417 | | vb_q1 | 9.3e-06 | | vb_q2 | 2.86e-06 | | vb_q3 | 1.48e-06 | --------------------------- --------------------------- | grad_norm | 1.72 | | loss | 0.000898 | | loss_cal | 0.0246 | | loss_cal_q0 | 0.0228 | | loss_cal_q1 | 0.0255 | | loss_cal_q2 | 0.0269 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.000877 | | loss_diff_q0 | 0.00209 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000279 | | loss_diff_q3 | 6.73e-05 | | loss_q0 | 0.00217 | | loss_q1 | 0.00104 | | loss_q2 | 0.000282 | | loss_q3 | 6.82e-05 | | param_norm | 251 | | samples | 2.51e+05 | | step | 3.14e+04 | | vb | 2.15e-05 | | vb_q0 | 7.32e-05 | | vb_q1 | 7.73e-06 | | vb_q2 | 2.5e-06 | | vb_q3 | 8.51e-07 | --------------------------- --------------------------- | grad_norm | 1.43 | | loss | 0.00201 | | loss_cal | 0.0246 | | loss_cal_q0 | 0.0253 | | loss_cal_q1 | 0.0267 | | loss_cal_q2 | 0.0245 | | loss_cal_q3 | 0.0213 | | loss_diff | 0.00115 | | loss_diff_q0 | 0.0031 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000307 | | loss_diff_q3 | 8.64e-05 | | loss_q0 | 0.00666 | | loss_q1 | 0.00108 | | loss_q2 | 0.00031 | | loss_q3 | 8.75e-05 | | param_norm | 251 | | samples | 2.52e+05 | | step | 3.15e+04 | | vb | 0.000865 | | vb_q0 | 0.00355 | | vb_q1 | 8e-06 | | vb_q2 | 2.76e-06 | | vb_q3 | 1.09e-06 | --------------------------- --------------------------- | grad_norm | 1.35 | | loss | 0.00105 | | loss_cal | 0.023 | | loss_cal_q0 | 0.0229 | | loss_cal_q1 | 0.0233 | | loss_cal_q2 | 0.0241 | | loss_cal_q3 | 0.0222 | | loss_diff | 0.000981 | | loss_diff_q0 | 0.0025 | | loss_diff_q1 | 0.0011 | | loss_diff_q2 | 0.000278 | | loss_diff_q3 | 7.49e-05 | | loss_q0 | 0.00277 | | loss_q1 | 0.00111 | | loss_q2 | 0.00028 | | loss_q3 | 7.58e-05 | | param_norm | 251 | | samples | 2.53e+05 | | step | 3.16e+04 | | vb | 6.97e-05 | | vb_q0 | 0.000267 | | vb_q1 | 8.31e-06 | | vb_q2 | 2.52e-06 | | vb_q3 | 9.46e-07 | --------------------------- --------------------------- | grad_norm | 1.29 | | loss | 0.000914 | | loss_cal | 0.0239 | | loss_cal_q0 | 0.0257 | | loss_cal_q1 | 0.0243 | | loss_cal_q2 | 0.0224 | | loss_cal_q3 | 0.0231 | | loss_diff | 0.000891 | | loss_diff_q0 | 0.00203 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000256 | | loss_diff_q3 | 7.33e-05 | | loss_q0 | 0.0021 | | loss_q1 | 0.00113 | | loss_q2 | 0.000259 | | loss_q3 | 7.42e-05 | | param_norm | 251 | | samples | 2.54e+05 | | step | 3.17e+04 | | vb | 2.3e-05 | | vb_q0 | 7.44e-05 | | vb_q1 | 8.37e-06 | | vb_q2 | 2.33e-06 | | vb_q3 | 9.28e-07 | --------------------------- --------------------------- | grad_norm | 1.46 | | loss | 0.00102 | | loss_cal | 0.0241 | | loss_cal_q0 | 0.0266 | | loss_cal_q1 | 0.024 | | loss_cal_q2 | 0.0235 | | loss_cal_q3 | 0.0219 | | loss_diff | 0.00098 | | loss_diff_q0 | 0.00235 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000286 | | loss_diff_q3 | 7.41e-05 | | loss_q0 | 0.0025 | | loss_q1 | 0.00115 | | loss_q2 | 0.000288 | | loss_q3 | 7.51e-05 | | param_norm | 251 | | samples | 2.54e+05 | | step | 3.18e+04 | | vb | 4.3e-05 | | vb_q0 | 0.000148 | | vb_q1 | 8.51e-06 | | vb_q2 | 2.56e-06 | | vb_q3 | 9.41e-07 | --------------------------- --------------------------- | grad_norm | 1.82 | | loss | 0.00112 | | loss_cal | 0.0257 | | loss_cal_q0 | 0.0244 | | loss_cal_q1 | 0.0268 | | loss_cal_q2 | 0.0255 | | loss_cal_q3 | 0.026 | | loss_diff | 0.000906 | | loss_diff_q0 | 0.00244 | | loss_diff_q1 | 0.000989 | | loss_diff_q2 | 0.000261 | | loss_diff_q3 | 7.39e-05 | | loss_q0 | 0.00338 | | loss_q1 | 0.000997 | | loss_q2 | 0.000264 | | loss_q3 | 7.48e-05 | | param_norm | 251 | | samples | 2.55e+05 | | step | 3.19e+04 | | vb | 0.000219 | | vb_q0 | 0.000943 | | vb_q1 | 7.34e-06 | | vb_q2 | 2.39e-06 | | vb_q3 | 9.32e-07 | --------------------------- --------------------------- | grad_norm | 1.36 | | loss | 0.000948 | | loss_cal | 0.0233 | | loss_cal_q0 | 0.0224 | | loss_cal_q1 | 0.0229 | | loss_cal_q2 | 0.0246 | | loss_cal_q3 | 0.0233 | | loss_diff | 0.00091 | | loss_diff_q0 | 0.00212 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 7.18e-05 | | loss_q0 | 0.00225 | | loss_q1 | 0.00104 | | loss_q2 | 0.000272 | | loss_q3 | 7.27e-05 | | param_norm | 251 | | samples | 2.56e+05 | | step | 3.2e+04 | | vb | 3.81e-05 | | vb_q0 | 0.000131 | | vb_q1 | 7.67e-06 | | vb_q2 | 2.42e-06 | | vb_q3 | 9.12e-07 | --------------------------- --------------------------- | grad_norm | 1.37 | | loss | 0.00113 | | loss_cal | 0.0251 | | loss_cal_q0 | 0.0232 | | loss_cal_q1 | 0.0243 | | loss_cal_q2 | 0.024 | | loss_cal_q3 | 0.0259 | | loss_diff | 0.000961 | | loss_diff_q0 | 0.00237 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000269 | | loss_diff_q3 | 7.21e-05 | | loss_q0 | 0.00301 | | loss_q1 | 0.00106 | | loss_q2 | 0.000271 | | loss_q3 | 7.3e-05 | | param_norm | 251 | | samples | 2.57e+05 | | step | 3.21e+04 | | vb | 0.000169 | | vb_q0 | 0.000633 | | vb_q1 | 7.87e-06 | | vb_q2 | 2.42e-06 | | vb_q3 | 9.18e-07 | --------------------------- --------------------------- | grad_norm | 1.32 | | loss | 0.00089 | | loss_cal | 0.0232 | | loss_cal_q0 | 0.0244 | | loss_cal_q1 | 0.0246 | | loss_cal_q2 | 0.0213 | | loss_cal_q3 | 0.0225 | | loss_diff | 0.00087 | | loss_diff_q0 | 0.00206 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000243 | | loss_diff_q3 | 6.47e-05 | | loss_q0 | 0.00212 | | loss_q1 | 0.00106 | | loss_q2 | 0.000246 | | loss_q3 | 6.55e-05 | | param_norm | 251 | | samples | 2.58e+05 | | step | 3.22e+04 | | vb | 1.92e-05 | | vb_q0 | 6.41e-05 | | vb_q1 | 7.81e-06 | | vb_q2 | 2.21e-06 | | vb_q3 | 8.19e-07 | --------------------------- --------------------------- | grad_norm | 1.42 | | loss | 0.00128 | | loss_cal | 0.0235 | | loss_cal_q0 | 0.0223 | | loss_cal_q1 | 0.0249 | | loss_cal_q2 | 0.0268 | | loss_cal_q3 | 0.0212 | | loss_diff | 0.000957 | | loss_diff_q0 | 0.00227 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000243 | | loss_diff_q3 | 6.08e-05 | | loss_q0 | 0.00338 | | loss_q1 | 0.00108 | | loss_q2 | 0.000245 | | loss_q3 | 6.15e-05 | | param_norm | 251 | | samples | 2.58e+05 | | step | 3.23e+04 | | vb | 0.000318 | | vb_q0 | 0.00112 | | vb_q1 | 7.98e-06 | | vb_q2 | 2.22e-06 | | vb_q3 | 7.68e-07 | --------------------------- --------------------------- | grad_norm | 1.46 | | loss | 0.00259 | | loss_cal | 0.0237 | | loss_cal_q0 | 0.0251 | | loss_cal_q1 | 0.0245 | | loss_cal_q2 | 0.0238 | | loss_cal_q3 | 0.0216 | | loss_diff | 0.00126 | | loss_diff_q0 | 0.00345 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000332 | | loss_diff_q3 | 0.000144 | | loss_q0 | 0.00879 | | loss_q1 | 0.00114 | | loss_q2 | 0.000335 | | loss_q3 | 0.000146 | | param_norm | 251 | | samples | 2.59e+05 | | step | 3.24e+04 | | vb | 0.00133 | | vb_q0 | 0.00534 | | vb_q1 | 8.4e-06 | | vb_q2 | 3.01e-06 | | vb_q3 | 1.83e-06 | --------------------------- --------------------------- | grad_norm | 1.36 | | loss | 0.00219 | | loss_cal | 0.0228 | | loss_cal_q0 | 0.0229 | | loss_cal_q1 | 0.0225 | | loss_cal_q2 | 0.0223 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.00116 | | loss_diff_q0 | 0.00306 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000283 | | loss_diff_q3 | 0.000102 | | loss_q0 | 0.00692 | | loss_q1 | 0.00108 | | loss_q2 | 0.000285 | | loss_q3 | 0.000103 | | param_norm | 251 | | samples | 2.6e+05 | | step | 3.25e+04 | | vb | 0.00102 | | vb_q0 | 0.00385 | | vb_q1 | 8.13e-06 | | vb_q2 | 2.59e-06 | | vb_q3 | 1.29e-06 | --------------------------- --------------------------- | grad_norm | 1.34 | | loss | 0.000891 | | loss_cal | 0.0238 | | loss_cal_q0 | 0.0216 | | loss_cal_q1 | 0.0263 | | loss_cal_q2 | 0.0234 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.000874 | | loss_diff_q0 | 0.00227 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000297 | | loss_diff_q3 | 7.71e-05 | | loss_q0 | 0.00233 | | loss_q1 | 0.00108 | | loss_q2 | 0.0003 | | loss_q3 | 7.81e-05 | | param_norm | 251 | | samples | 2.61e+05 | | step | 3.26e+04 | | vb | 1.65e-05 | | vb_q0 | 5.82e-05 | | vb_q1 | 8.08e-06 | | vb_q2 | 2.67e-06 | | vb_q3 | 9.84e-07 | --------------------------- --------------------------- | grad_norm | 1.3 | | loss | 0.00156 | | loss_cal | 0.0219 | | loss_cal_q0 | 0.0198 | | loss_cal_q1 | 0.0209 | | loss_cal_q2 | 0.0236 | | loss_cal_q3 | 0.0228 | | loss_diff | 0.00116 | | loss_diff_q0 | 0.00314 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000307 | | loss_diff_q3 | 9.52e-05 | | loss_q0 | 0.00462 | | loss_q1 | 0.00112 | | loss_q2 | 0.00031 | | loss_q3 | 9.64e-05 | | param_norm | 251 | | samples | 2.62e+05 | | step | 3.27e+04 | | vb | 0.000397 | | vb_q0 | 0.00148 | | vb_q1 | 8.34e-06 | | vb_q2 | 2.77e-06 | | vb_q3 | 1.21e-06 | --------------------------- --------------------------- | grad_norm | 1.42 | | loss | 0.00141 | | loss_cal | 0.0236 | | loss_cal_q0 | 0.0254 | | loss_cal_q1 | 0.0227 | | loss_cal_q2 | 0.0223 | | loss_cal_q3 | 0.0239 | | loss_diff | 0.00115 | | loss_diff_q0 | 0.00316 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000302 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.00418 | | loss_q1 | 0.00101 | | loss_q2 | 0.000305 | | loss_q3 | 0.000107 | | param_norm | 251 | | samples | 2.62e+05 | | step | 3.28e+04 | | vb | 0.000258 | | vb_q0 | 0.00102 | | vb_q1 | 7.57e-06 | | vb_q2 | 2.71e-06 | | vb_q3 | 1.35e-06 | --------------------------- --------------------------- | grad_norm | 1.42 | | loss | 0.00133 | | loss_cal | 0.0234 | | loss_cal_q0 | 0.023 | | loss_cal_q1 | 0.0224 | | loss_cal_q2 | 0.0237 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00272 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000274 | | loss_diff_q3 | 7.08e-05 | | loss_q0 | 0.00381 | | loss_q1 | 0.00108 | | loss_q2 | 0.000276 | | loss_q3 | 7.17e-05 | | param_norm | 251 | | samples | 2.63e+05 | | step | 3.29e+04 | | vb | 0.000284 | | vb_q0 | 0.00109 | | vb_q1 | 8.07e-06 | | vb_q2 | 2.47e-06 | | vb_q3 | 9.03e-07 | --------------------------- --------------------------- | grad_norm | 1.41 | | loss | 0.00121 | | loss_cal | 0.0239 | | loss_cal_q0 | 0.0234 | | loss_cal_q1 | 0.0241 | | loss_cal_q2 | 0.0233 | | loss_cal_q3 | 0.0246 | | loss_diff | 0.001 | | loss_diff_q0 | 0.00258 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000264 | | loss_diff_q3 | 8.12e-05 | | loss_q0 | 0.00339 | | loss_q1 | 0.00107 | | loss_q2 | 0.000267 | | loss_q3 | 8.23e-05 | | param_norm | 252 | | samples | 2.64e+05 | | step | 3.3e+04 | | vb | 0.000207 | | vb_q0 | 0.000807 | | vb_q1 | 7.95e-06 | | vb_q2 | 2.4e-06 | | vb_q3 | 1.04e-06 | --------------------------- --------------------------- | grad_norm | 1.47 | | loss | 0.00211 | | loss_cal | 0.0233 | | loss_cal_q0 | 0.0228 | | loss_cal_q1 | 0.0235 | | loss_cal_q2 | 0.0239 | | loss_cal_q3 | 0.0231 | | loss_diff | 0.00134 | | loss_diff_q0 | 0.00333 | | loss_diff_q1 | 0.00109 | | loss_diff_q2 | 0.000324 | | loss_diff_q3 | 0.000102 | | loss_q0 | 0.00603 | | loss_q1 | 0.0011 | | loss_q2 | 0.000327 | | loss_q3 | 0.000104 | | param_norm | 252 | | samples | 2.65e+05 | | step | 3.31e+04 | | vb | 0.000777 | | vb_q0 | 0.0027 | | vb_q1 | 8.19e-06 | | vb_q2 | 2.97e-06 | | vb_q3 | 1.32e-06 | --------------------------- --------------------------- | grad_norm | 1.25 | | loss | 0.00117 | | loss_cal | 0.0225 | | loss_cal_q0 | 0.0231 | | loss_cal_q1 | 0.0224 | | loss_cal_q2 | 0.0235 | | loss_cal_q3 | 0.0211 | | loss_diff | 0.00112 | | loss_diff_q0 | 0.00303 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.00029 | | loss_diff_q3 | 0.00011 | | loss_q0 | 0.0032 | | loss_q1 | 0.00105 | | loss_q2 | 0.000292 | | loss_q3 | 0.000111 | | param_norm | 252 | | samples | 2.66e+05 | | step | 3.32e+04 | | vb | 4.57e-05 | | vb_q0 | 0.000171 | | vb_q1 | 7.8e-06 | | vb_q2 | 2.64e-06 | | vb_q3 | 1.4e-06 | --------------------------- --------------------------- | grad_norm | 1.23 | | loss | 0.00152 | | loss_cal | 0.0218 | | loss_cal_q0 | 0.0217 | | loss_cal_q1 | 0.0227 | | loss_cal_q2 | 0.0217 | | loss_cal_q3 | 0.0211 | | loss_diff | 0.00108 | | loss_diff_q0 | 0.00285 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000283 | | loss_diff_q3 | 8.29e-05 | | loss_q0 | 0.00465 | | loss_q1 | 0.00116 | | loss_q2 | 0.000286 | | loss_q3 | 8.4e-05 | | param_norm | 252 | | samples | 2.66e+05 | | step | 3.33e+04 | | vb | 0.000445 | | vb_q0 | 0.0018 | | vb_q1 | 8.61e-06 | | vb_q2 | 2.56e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.34 | | loss | 0.00148 | | loss_cal | 0.0222 | | loss_cal_q0 | 0.0217 | | loss_cal_q1 | 0.0235 | | loss_cal_q2 | 0.0211 | | loss_cal_q3 | 0.0216 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00285 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000268 | | loss_diff_q3 | 7.78e-05 | | loss_q0 | 0.00429 | | loss_q1 | 0.00104 | | loss_q2 | 0.000271 | | loss_q3 | 7.88e-05 | | param_norm | 252 | | samples | 2.67e+05 | | step | 3.34e+04 | | vb | 0.000381 | | vb_q0 | 0.00144 | | vb_q1 | 7.69e-06 | | vb_q2 | 2.4e-06 | | vb_q3 | 9.83e-07 | --------------------------- --------------------------- | grad_norm | 1.5 | | loss | 0.00182 | | loss_cal | 0.0241 | | loss_cal_q0 | 0.0224 | | loss_cal_q1 | 0.0239 | | loss_cal_q2 | 0.0259 | | loss_cal_q3 | 0.0246 | | loss_diff | 0.00116 | | loss_diff_q0 | 0.0031 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000288 | | loss_diff_q3 | 7.57e-05 | | loss_q0 | 0.00568 | | loss_q1 | 0.00112 | | loss_q2 | 0.00029 | | loss_q3 | 7.67e-05 | | param_norm | 252 | | samples | 2.68e+05 | | step | 3.35e+04 | | vb | 0.000657 | | vb_q0 | 0.00258 | | vb_q1 | 8.28e-06 | | vb_q2 | 2.58e-06 | | vb_q3 | 9.68e-07 | --------------------------- --------------------------- | grad_norm | 1.27 | | loss | 0.00123 | | loss_cal | 0.0209 | | loss_cal_q0 | 0.0206 | | loss_cal_q1 | 0.02 | | loss_cal_q2 | 0.0211 | | loss_cal_q3 | 0.0221 | | loss_diff | 0.001 | | loss_diff_q0 | 0.00259 | | loss_diff_q1 | 0.000974 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 7.61e-05 | | loss_q0 | 0.00348 | | loss_q1 | 0.000981 | | loss_q2 | 0.000264 | | loss_q3 | 7.71e-05 | | param_norm | 252 | | samples | 2.69e+05 | | step | 3.36e+04 | | vb | 0.000226 | | vb_q0 | 0.000885 | | vb_q1 | 7.3e-06 | | vb_q2 | 2.36e-06 | | vb_q3 | 9.6e-07 | --------------------------- --------------------------- | grad_norm | 1.24 | | loss | 0.000989 | | loss_cal | 0.0221 | | loss_cal_q0 | 0.0206 | | loss_cal_q1 | 0.0227 | | loss_cal_q2 | 0.0209 | | loss_cal_q3 | 0.0238 | | loss_diff | 0.000946 | | loss_diff_q0 | 0.00254 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000246 | | loss_diff_q3 | 7.05e-05 | | loss_q0 | 0.0027 | | loss_q1 | 0.00102 | | loss_q2 | 0.000248 | | loss_q3 | 7.14e-05 | | param_norm | 252 | | samples | 2.7e+05 | | step | 3.37e+04 | | vb | 4.28e-05 | | vb_q0 | 0.000166 | | vb_q1 | 7.52e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 8.94e-07 | --------------------------- --------------------------- | grad_norm | 1.29 | | loss | 0.00109 | | loss_cal | 0.0218 | | loss_cal_q0 | 0.0224 | | loss_cal_q1 | 0.0231 | | loss_cal_q2 | 0.0191 | | loss_cal_q3 | 0.0219 | | loss_diff | 0.000894 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.000981 | | loss_diff_q2 | 0.000248 | | loss_diff_q3 | 6.14e-05 | | loss_q0 | 0.00323 | | loss_q1 | 0.000988 | | loss_q2 | 0.000251 | | loss_q3 | 6.22e-05 | | param_norm | 252 | | samples | 2.7e+05 | | step | 3.38e+04 | | vb | 0.000194 | | vb_q0 | 0.000847 | | vb_q1 | 7.32e-06 | | vb_q2 | 2.22e-06 | | vb_q3 | 7.82e-07 | --------------------------- --------------------------- | grad_norm | 1.22 | | loss | 0.00109 | | loss_cal | 0.0215 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.0223 | | loss_cal_q2 | 0.0221 | | loss_cal_q3 | 0.0203 | | loss_diff | 0.000931 | | loss_diff_q0 | 0.00255 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000266 | | loss_diff_q3 | 7.63e-05 | | loss_q0 | 0.00324 | | loss_q1 | 0.00108 | | loss_q2 | 0.000269 | | loss_q3 | 7.73e-05 | | param_norm | 252 | | samples | 2.71e+05 | | step | 3.39e+04 | | vb | 0.000155 | | vb_q0 | 0.000682 | | vb_q1 | 7.99e-06 | | vb_q2 | 2.41e-06 | | vb_q3 | 9.69e-07 | --------------------------- --------------------------- | grad_norm | 1.27 | | loss | 0.00149 | | loss_cal | 0.0216 | | loss_cal_q0 | 0.0205 | | loss_cal_q1 | 0.0197 | | loss_cal_q2 | 0.0206 | | loss_cal_q3 | 0.0254 | | loss_diff | 0.00129 | | loss_diff_q0 | 0.00359 | | loss_diff_q1 | 0.001 | | loss_diff_q2 | 0.000289 | | loss_diff_q3 | 0.000109 | | loss_q0 | 0.00432 | | loss_q1 | 0.00101 | | loss_q2 | 0.000291 | | loss_q3 | 0.00011 | | param_norm | 252 | | samples | 2.72e+05 | | step | 3.4e+04 | | vb | 0.000196 | | vb_q0 | 0.000731 | | vb_q1 | 7.48e-06 | | vb_q2 | 2.62e-06 | | vb_q3 | 1.39e-06 | --------------------------- --------------------------- | grad_norm | 1.37 | | loss | 0.00143 | | loss_cal | 0.0226 | | loss_cal_q0 | 0.0217 | | loss_cal_q1 | 0.0223 | | loss_cal_q2 | 0.0237 | | loss_cal_q3 | 0.0222 | | loss_diff | 0.00107 | | loss_diff_q0 | 0.0028 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000278 | | loss_diff_q3 | 9.35e-05 | | loss_q0 | 0.00417 | | loss_q1 | 0.00102 | | loss_q2 | 0.000281 | | loss_q3 | 9.47e-05 | | param_norm | 252 | | samples | 2.73e+05 | | step | 3.41e+04 | | vb | 0.000363 | | vb_q0 | 0.00137 | | vb_q1 | 7.52e-06 | | vb_q2 | 2.55e-06 | | vb_q3 | 1.19e-06 | --------------------------- --------------------------- | grad_norm | 1.22 | | loss | 0.000877 | | loss_cal | 0.0211 | | loss_cal_q0 | 0.0203 | | loss_cal_q1 | 0.0206 | | loss_cal_q2 | 0.0211 | | loss_cal_q3 | 0.0217 | | loss_diff | 0.000854 | | loss_diff_q0 | 0.00218 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000231 | | loss_diff_q3 | 5.93e-05 | | loss_q0 | 0.00226 | | loss_q1 | 0.00105 | | loss_q2 | 0.000233 | | loss_q3 | 6.01e-05 | | param_norm | 252 | | samples | 2.74e+05 | | step | 3.42e+04 | | vb | 2.23e-05 | | vb_q0 | 7.9e-05 | | vb_q1 | 7.76e-06 | | vb_q2 | 2.09e-06 | | vb_q3 | 7.55e-07 | --------------------------- --------------------------- | grad_norm | 1.31 | | loss | 0.00154 | | loss_cal | 0.0216 | | loss_cal_q0 | 0.0213 | | loss_cal_q1 | 0.022 | | loss_cal_q2 | 0.0224 | | loss_cal_q3 | 0.0211 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00341 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000274 | | loss_diff_q3 | 7.9e-05 | | loss_q0 | 0.00523 | | loss_q1 | 0.00104 | | loss_q2 | 0.000277 | | loss_q3 | 8e-05 | | param_norm | 252 | | samples | 2.74e+05 | | step | 3.43e+04 | | vb | 0.000413 | | vb_q0 | 0.00182 | | vb_q1 | 7.66e-06 | | vb_q2 | 2.46e-06 | | vb_q3 | 1e-06 | --------------------------- --------------------------- | grad_norm | 1.35 | | loss | 0.00123 | | loss_cal | 0.0224 | | loss_cal_q0 | 0.0224 | | loss_cal_q1 | 0.0208 | | loss_cal_q2 | 0.0225 | | loss_cal_q3 | 0.0239 | | loss_diff | 0.00102 | | loss_diff_q0 | 0.00263 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000273 | | loss_diff_q3 | 7.01e-05 | | loss_q0 | 0.00344 | | loss_q1 | 0.00105 | | loss_q2 | 0.000275 | | loss_q3 | 7.09e-05 | | param_norm | 252 | | samples | 2.75e+05 | | step | 3.44e+04 | | vb | 0.000212 | | vb_q0 | 0.000809 | | vb_q1 | 7.8e-06 | | vb_q2 | 2.45e-06 | | vb_q3 | 8.78e-07 | --------------------------- --------------------------- | grad_norm | 1.28 | | loss | 0.00127 | | loss_cal | 0.0222 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.0229 | | loss_cal_q2 | 0.0223 | | loss_cal_q3 | 0.0225 | | loss_diff | 0.000988 | | loss_diff_q0 | 0.00248 | | loss_diff_q1 | 0.00108 | | loss_diff_q2 | 0.000279 | | loss_diff_q3 | 7.82e-05 | | loss_q0 | 0.00359 | | loss_q1 | 0.00109 | | loss_q2 | 0.000282 | | loss_q3 | 7.92e-05 | | param_norm | 252 | | samples | 2.76e+05 | | step | 3.45e+04 | | vb | 0.000287 | | vb_q0 | 0.00111 | | vb_q1 | 7.99e-06 | | vb_q2 | 2.54e-06 | | vb_q3 | 1e-06 | --------------------------- --------------------------- | grad_norm | 1.28 | | loss | 0.00177 | | loss_cal | 0.0212 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.0203 | | loss_cal_q2 | 0.0201 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.00121 | | loss_diff_q0 | 0.00278 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000275 | | loss_diff_q3 | 0.000101 | | loss_q0 | 0.00385 | | loss_q1 | 0.00103 | | loss_q2 | 0.000278 | | loss_q3 | 0.000102 | | param_norm | 252 | | samples | 2.77e+05 | | step | 3.46e+04 | | vb | 0.000557 | | vb_q0 | 0.00107 | | vb_q1 | 7.59e-06 | | vb_q2 | 2.51e-06 | | vb_q3 | 1.28e-06 | --------------------------- --------------------------- | grad_norm | 1.32 | | loss | 0.0241 | | loss_cal | 0.0209 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.0215 | | loss_cal_q2 | 0.0193 | | loss_cal_q3 | 0.0216 | | loss_diff | 0.00198 | | loss_diff_q0 | 0.00586 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000369 | | loss_diff_q3 | 0.000171 | | loss_q0 | 0.0888 | | loss_q1 | 0.00122 | | loss_q2 | 0.000373 | | loss_q3 | 0.000173 | | param_norm | 252 | | samples | 2.78e+05 | | step | 3.47e+04 | | vb | 0.0221 | | vb_q0 | 0.0829 | | vb_q1 | 9.06e-06 | | vb_q2 | 3.34e-06 | | vb_q3 | 2.2e-06 | --------------------------- --------------------------- | grad_norm | 1.2 | | loss | 0.00138 | | loss_cal | 0.0214 | | loss_cal_q0 | 0.022 | | loss_cal_q1 | 0.022 | | loss_cal_q2 | 0.0191 | | loss_cal_q3 | 0.0223 | | loss_diff | 0.00131 | | loss_diff_q0 | 0.00375 | | loss_diff_q1 | 0.00128 | | loss_diff_q2 | 0.000342 | | loss_diff_q3 | 0.00013 | | loss_q0 | 0.00403 | | loss_q1 | 0.00129 | | loss_q2 | 0.000345 | | loss_q3 | 0.000132 | | param_norm | 253 | | samples | 2.78e+05 | | step | 3.48e+04 | | vb | 6.99e-05 | | vb_q0 | 0.000285 | | vb_q1 | 9.53e-06 | | vb_q2 | 3.06e-06 | | vb_q3 | 1.67e-06 | --------------------------- --------------------------- | grad_norm | 1.31 | | loss | 0.00119 | | loss_cal | 0.0212 | | loss_cal_q0 | 0.0208 | | loss_cal_q1 | 0.0184 | | loss_cal_q2 | 0.0232 | | loss_cal_q3 | 0.0225 | | loss_diff | 0.00111 | | loss_diff_q0 | 0.00308 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000278 | | loss_diff_q3 | 8.08e-05 | | loss_q0 | 0.00338 | | loss_q1 | 0.00107 | | loss_q2 | 0.000281 | | loss_q3 | 8.19e-05 | | param_norm | 253 | | samples | 2.79e+05 | | step | 3.49e+04 | | vb | 7.59e-05 | | vb_q0 | 0.000301 | | vb_q1 | 7.92e-06 | | vb_q2 | 2.53e-06 | | vb_q3 | 1.03e-06 | --------------------------- --------------------------- | grad_norm | 1.34 | | loss | 0.000973 | | loss_cal | 0.0218 | | loss_cal_q0 | 0.0215 | | loss_cal_q1 | 0.0221 | | loss_cal_q2 | 0.0217 | | loss_cal_q3 | 0.0217 | | loss_diff | 0.000939 | | loss_diff_q0 | 0.00235 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000276 | | loss_diff_q3 | 7.56e-05 | | loss_q0 | 0.00248 | | loss_q1 | 0.00112 | | loss_q2 | 0.000279 | | loss_q3 | 7.65e-05 | | param_norm | 253 | | samples | 2.8e+05 | | step | 3.5e+04 | | vb | 3.49e-05 | | vb_q0 | 0.000134 | | vb_q1 | 8.3e-06 | | vb_q2 | 2.48e-06 | | vb_q3 | 9.56e-07 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 1.39 | | loss | 0.000887 | | loss_cal | 0.0235 | | loss_cal_q0 | 0.0199 | | loss_cal_q1 | 0.0231 | | loss_cal_q2 | 0.0236 | | loss_cal_q3 | 0.0256 | | loss_diff | 0.000864 | | loss_diff_q0 | 0.002 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000269 | | loss_diff_q3 | 6.39e-05 | | loss_q0 | 0.00208 | | loss_q1 | 0.00107 | | loss_q2 | 0.000272 | | loss_q3 | 6.47e-05 | | param_norm | 253 | | samples | 2.81e+05 | | step | 3.51e+04 | | vb | 2.3e-05 | | vb_q0 | 7.99e-05 | | vb_q1 | 7.88e-06 | | vb_q2 | 2.42e-06 | | vb_q3 | 8.06e-07 | --------------------------- --------------------------- | grad_norm | 1.27 | | loss | 0.000934 | | loss_cal | 0.0211 | | loss_cal_q0 | 0.0204 | | loss_cal_q1 | 0.022 | | loss_cal_q2 | 0.0214 | | loss_cal_q3 | 0.0208 | | loss_diff | 0.000911 | | loss_diff_q0 | 0.00219 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000248 | | loss_diff_q3 | 7.38e-05 | | loss_q0 | 0.00227 | | loss_q1 | 0.00103 | | loss_q2 | 0.00025 | | loss_q3 | 7.48e-05 | | param_norm | 253 | | samples | 2.82e+05 | | step | 3.52e+04 | | vb | 2.34e-05 | | vb_q0 | 8.04e-05 | | vb_q1 | 7.59e-06 | | vb_q2 | 2.26e-06 | | vb_q3 | 9.36e-07 | --------------------------- --------------------------- | grad_norm | 1.32 | | loss | 0.0012 | | loss_cal | 0.0213 | | loss_cal_q0 | 0.0235 | | loss_cal_q1 | 0.0198 | | loss_cal_q2 | 0.0197 | | loss_cal_q3 | 0.0213 | | loss_diff | 0.0011 | | loss_diff_q0 | 0.00307 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000264 | | loss_diff_q3 | 8.04e-05 | | loss_q0 | 0.00348 | | loss_q1 | 0.00105 | | loss_q2 | 0.000266 | | loss_q3 | 8.14e-05 | | param_norm | 253 | | samples | 2.82e+05 | | step | 3.53e+04 | | vb | 0.000102 | | vb_q0 | 0.000402 | | vb_q1 | 7.75e-06 | | vb_q2 | 2.39e-06 | | vb_q3 | 1.04e-06 | --------------------------- --------------------------- | grad_norm | 1.35 | | loss | 0.00102 | | loss_cal | 0.0206 | | loss_cal_q0 | 0.0206 | | loss_cal_q1 | 0.0208 | | loss_cal_q2 | 0.0208 | | loss_cal_q3 | 0.0207 | | loss_diff | 0.000961 | | loss_diff_q0 | 0.00228 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000258 | | loss_diff_q3 | 6.66e-05 | | loss_q0 | 0.00249 | | loss_q1 | 0.00107 | | loss_q2 | 0.00026 | | loss_q3 | 6.75e-05 | | param_norm | 253 | | samples | 2.83e+05 | | step | 3.54e+04 | | vb | 6.02e-05 | | vb_q0 | 0.000209 | | vb_q1 | 7.92e-06 | | vb_q2 | 2.31e-06 | | vb_q3 | 8.44e-07 | --------------------------- --------------------------- | grad_norm | 1.31 | | loss | 0.00185 | | loss_cal | 0.0209 | | loss_cal_q0 | 0.0198 | | loss_cal_q1 | 0.0226 | | loss_cal_q2 | 0.0221 | | loss_cal_q3 | 0.019 | | loss_diff | 0.000941 | | loss_diff_q0 | 0.00218 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000249 | | loss_diff_q3 | 6.21e-05 | | loss_q0 | 0.00569 | | loss_q1 | 0.00113 | | loss_q2 | 0.000251 | | loss_q3 | 6.29e-05 | | param_norm | 253 | | samples | 2.84e+05 | | step | 3.55e+04 | | vb | 0.000912 | | vb_q0 | 0.00351 | | vb_q1 | 8.49e-06 | | vb_q2 | 2.25e-06 | | vb_q3 | 7.93e-07 | --------------------------- --------------------------- | grad_norm | 1.41 | | loss | 0.00101 | | loss_cal | 0.0216 | | loss_cal_q0 | 0.0215 | | loss_cal_q1 | 0.0215 | | loss_cal_q2 | 0.0227 | | loss_cal_q3 | 0.0203 | | loss_diff | 0.000928 | | loss_diff_q0 | 0.00246 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000254 | | loss_diff_q3 | 6.99e-05 | | loss_q0 | 0.00281 | | loss_q1 | 0.00103 | | loss_q2 | 0.000256 | | loss_q3 | 7.08e-05 | | param_norm | 253 | | samples | 2.85e+05 | | step | 3.56e+04 | | vb | 8.34e-05 | | vb_q0 | 0.000351 | | vb_q1 | 7.67e-06 | | vb_q2 | 2.33e-06 | | vb_q3 | 8.91e-07 | --------------------------- --------------------------- | grad_norm | 1.32 | | loss | 0.00104 | | loss_cal | 0.0224 | | loss_cal_q0 | 0.0215 | | loss_cal_q1 | 0.022 | | loss_cal_q2 | 0.0225 | | loss_cal_q3 | 0.0235 | | loss_diff | 0.000941 | | loss_diff_q0 | 0.00267 | | loss_diff_q1 | 0.000991 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 7.16e-05 | | loss_q0 | 0.00309 | | loss_q1 | 0.000998 | | loss_q2 | 0.000264 | | loss_q3 | 7.25e-05 | | param_norm | 253 | | samples | 2.86e+05 | | step | 3.57e+04 | | vb | 9.7e-05 | | vb_q0 | 0.000415 | | vb_q1 | 7.37e-06 | | vb_q2 | 2.37e-06 | | vb_q3 | 9.19e-07 | --------------------------- --------------------------- | grad_norm | 1.27 | | loss | 0.00235 | | loss_cal | 0.0208 | | loss_cal_q0 | 0.021 | | loss_cal_q1 | 0.0213 | | loss_cal_q2 | 0.0183 | | loss_cal_q3 | 0.0226 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00294 | | loss_diff_q1 | 0.001 | | loss_diff_q2 | 0.000256 | | loss_diff_q3 | 6.86e-05 | | loss_q0 | 0.00809 | | loss_q1 | 0.00101 | | loss_q2 | 0.000258 | | loss_q3 | 6.95e-05 | | param_norm | 253 | | samples | 2.86e+05 | | step | 3.58e+04 | | vb | 0.0013 | | vb_q0 | 0.00515 | | vb_q1 | 7.48e-06 | | vb_q2 | 2.28e-06 | | vb_q3 | 8.75e-07 | --------------------------- --------------------------- | grad_norm | 1.28 | | loss | 0.00435 | | loss_cal | 0.02 | | loss_cal_q0 | 0.0194 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.023 | | loss_cal_q3 | 0.0194 | | loss_diff | 0.00272 | | loss_diff_q0 | 0.008 | | loss_diff_q1 | 0.00155 | | loss_diff_q2 | 0.000608 | | loss_diff_q3 | 0.000303 | | loss_q0 | 0.0142 | | loss_q1 | 0.00157 | | loss_q2 | 0.000614 | | loss_q3 | 0.000306 | | param_norm | 253 | | samples | 2.87e+05 | | step | 3.59e+04 | | vb | 0.00164 | | vb_q0 | 0.00619 | | vb_q1 | 1.24e-05 | | vb_q2 | 5.57e-06 | | vb_q3 | 3.88e-06 | --------------------------- --------------------------- | grad_norm | 1.38 | | loss | 0.00166 | | loss_cal | 0.0213 | | loss_cal_q0 | 0.0219 | | loss_cal_q1 | 0.0219 | | loss_cal_q2 | 0.021 | | loss_cal_q3 | 0.0207 | | loss_diff | 0.00118 | | loss_diff_q0 | 0.00319 | | loss_diff_q1 | 0.00114 | | loss_diff_q2 | 0.000302 | | loss_diff_q3 | 9.36e-05 | | loss_q0 | 0.00505 | | loss_q1 | 0.00115 | | loss_q2 | 0.000305 | | loss_q3 | 9.48e-05 | | param_norm | 253 | | samples | 2.88e+05 | | step | 3.6e+04 | | vb | 0.000478 | | vb_q0 | 0.00186 | | vb_q1 | 8.53e-06 | | vb_q2 | 2.7e-06 | | vb_q3 | 1.21e-06 | --------------------------- --------------------------- | grad_norm | 1.38 | | loss | 0.00154 | | loss_cal | 0.021 | | loss_cal_q0 | 0.02 | | loss_cal_q1 | 0.0218 | | loss_cal_q2 | 0.0209 | | loss_cal_q3 | 0.0211 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00268 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000245 | | loss_diff_q3 | 7.02e-05 | | loss_q0 | 0.00457 | | loss_q1 | 0.00104 | | loss_q2 | 0.000247 | | loss_q3 | 7.11e-05 | | param_norm | 253 | | samples | 2.89e+05 | | step | 3.61e+04 | | vb | 0.000487 | | vb_q0 | 0.00189 | | vb_q1 | 7.69e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 8.81e-07 | --------------------------- --------------------------- | grad_norm | 1.29 | | loss | 0.000907 | | loss_cal | 0.0207 | | loss_cal_q0 | 0.0217 | | loss_cal_q1 | 0.0196 | | loss_cal_q2 | 0.0212 | | loss_cal_q3 | 0.0207 | | loss_diff | 0.000886 | | loss_diff_q0 | 0.00214 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000238 | | loss_diff_q3 | 5.85e-05 | | loss_q0 | 0.00222 | | loss_q1 | 0.00102 | | loss_q2 | 0.000241 | | loss_q3 | 5.92e-05 | | param_norm | 253 | | samples | 2.9e+05 | | step | 3.62e+04 | | vb | 2.13e-05 | | vb_q0 | 7.32e-05 | | vb_q1 | 7.47e-06 | | vb_q2 | 2.17e-06 | | vb_q3 | 7.45e-07 | --------------------------- --------------------------- | grad_norm | 1.17 | | loss | 0.0021 | | loss_cal | 0.0206 | | loss_cal_q0 | 0.0226 | | loss_cal_q1 | 0.021 | | loss_cal_q2 | 0.0191 | | loss_cal_q3 | 0.0198 | | loss_diff | 0.00111 | | loss_diff_q0 | 0.00322 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000261 | | loss_diff_q3 | 7.62e-05 | | loss_q0 | 0.00744 | | loss_q1 | 0.00103 | | loss_q2 | 0.000264 | | loss_q3 | 7.72e-05 | | param_norm | 253 | | samples | 2.9e+05 | | step | 3.63e+04 | | vb | 0.000989 | | vb_q0 | 0.00422 | | vb_q1 | 7.56e-06 | | vb_q2 | 2.34e-06 | | vb_q3 | 9.76e-07 | --------------------------- --------------------------- | grad_norm | 1.16 | | loss | 0.001 | | loss_cal | 0.0199 | | loss_cal_q0 | 0.0201 | | loss_cal_q1 | 0.019 | | loss_cal_q2 | 0.0203 | | loss_cal_q3 | 0.0202 | | loss_diff | 0.000963 | | loss_diff_q0 | 0.00242 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 6.73e-05 | | loss_q0 | 0.00257 | | loss_q1 | 0.00104 | | loss_q2 | 0.000264 | | loss_q3 | 6.82e-05 | | param_norm | 254 | | samples | 2.91e+05 | | step | 3.64e+04 | | vb | 4.02e-05 | | vb_q0 | 0.000145 | | vb_q1 | 7.64e-06 | | vb_q2 | 2.36e-06 | | vb_q3 | 8.53e-07 | --------------------------- --------------------------- | grad_norm | 1.32 | | loss | 0.000925 | | loss_cal | 0.0208 | | loss_cal_q0 | 0.0193 | | loss_cal_q1 | 0.0197 | | loss_cal_q2 | 0.0218 | | loss_cal_q3 | 0.022 | | loss_diff | 0.00088 | | loss_diff_q0 | 0.00218 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000243 | | loss_diff_q3 | 6.25e-05 | | loss_q0 | 0.00234 | | loss_q1 | 0.00104 | | loss_q2 | 0.000245 | | loss_q3 | 6.33e-05 | | param_norm | 254 | | samples | 2.92e+05 | | step | 3.65e+04 | | vb | 4.43e-05 | | vb_q0 | 0.000165 | | vb_q1 | 7.6e-06 | | vb_q2 | 2.2e-06 | | vb_q3 | 7.86e-07 | --------------------------- --------------------------- | grad_norm | 1.08 | | loss | 0.00104 | | loss_cal | 0.0191 | | loss_cal_q0 | 0.0204 | | loss_cal_q1 | 0.0184 | | loss_cal_q2 | 0.0196 | | loss_cal_q3 | 0.0182 | | loss_diff | 0.000896 | | loss_diff_q0 | 0.00215 | | loss_diff_q1 | 0.000993 | | loss_diff_q2 | 0.000248 | | loss_diff_q3 | 7.08e-05 | | loss_q0 | 0.0027 | | loss_q1 | 0.001 | | loss_q2 | 0.00025 | | loss_q3 | 7.17e-05 | | param_norm | 254 | | samples | 2.93e+05 | | step | 3.66e+04 | | vb | 0.000145 | | vb_q0 | 0.000548 | | vb_q1 | 7.36e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 8.96e-07 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00106 | | loss_cal | 0.0196 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.018 | | loss_cal_q2 | 0.0186 | | loss_cal_q3 | 0.0204 | | loss_diff | 0.000881 | | loss_diff_q0 | 0.00228 | | loss_diff_q1 | 0.000937 | | loss_diff_q2 | 0.000225 | | loss_diff_q3 | 6.17e-05 | | loss_q0 | 0.00298 | | loss_q1 | 0.000944 | | loss_q2 | 0.000227 | | loss_q3 | 6.25e-05 | | param_norm | 254 | | samples | 2.94e+05 | | step | 3.67e+04 | | vb | 0.000179 | | vb_q0 | 0.000699 | | vb_q1 | 6.96e-06 | | vb_q2 | 2.03e-06 | | vb_q3 | 7.77e-07 | --------------------------- --------------------------- | grad_norm | 1.15 | | loss | 0.000835 | | loss_cal | 0.0194 | | loss_cal_q0 | 0.0212 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0202 | | loss_cal_q3 | 0.0183 | | loss_diff | 0.00082 | | loss_diff_q0 | 0.00204 | | loss_diff_q1 | 0.00099 | | loss_diff_q2 | 0.000259 | | loss_diff_q3 | 5.94e-05 | | loss_q0 | 0.00209 | | loss_q1 | 0.000997 | | loss_q2 | 0.000261 | | loss_q3 | 6.02e-05 | | param_norm | 254 | | samples | 2.94e+05 | | step | 3.68e+04 | | vb | 1.51e-05 | | vb_q0 | 5.25e-05 | | vb_q1 | 7.31e-06 | | vb_q2 | 2.33e-06 | | vb_q3 | 7.59e-07 | --------------------------- --------------------------- | grad_norm | 1.2 | | loss | 0.000848 | | loss_cal | 0.0199 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0204 | | loss_cal_q2 | 0.0195 | | loss_cal_q3 | 0.021 | | loss_diff | 0.000828 | | loss_diff_q0 | 0.00195 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000253 | | loss_diff_q3 | 5.89e-05 | | loss_q0 | 0.00202 | | loss_q1 | 0.00105 | | loss_q2 | 0.000256 | | loss_q3 | 5.96e-05 | | param_norm | 254 | | samples | 2.95e+05 | | step | 3.69e+04 | | vb | 2.06e-05 | | vb_q0 | 7.31e-05 | | vb_q1 | 7.69e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 7.48e-07 | --------------------------- --------------------------- | grad_norm | 1.42 | | loss | 0.00134 | | loss_cal | 0.0217 | | loss_cal_q0 | 0.0218 | | loss_cal_q1 | 0.0206 | | loss_cal_q2 | 0.0226 | | loss_cal_q3 | 0.0205 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00273 | | loss_diff_q1 | 0.00106 | | loss_diff_q2 | 0.000275 | | loss_diff_q3 | 7.51e-05 | | loss_q0 | 0.0039 | | loss_q1 | 0.00107 | | loss_q2 | 0.000277 | | loss_q3 | 7.6e-05 | | param_norm | 254 | | samples | 2.96e+05 | | step | 3.7e+04 | | vb | 0.000293 | | vb_q0 | 0.00117 | | vb_q1 | 7.88e-06 | | vb_q2 | 2.49e-06 | | vb_q3 | 9.54e-07 | --------------------------- --------------------------- | grad_norm | 1.2 | | loss | 0.000837 | | loss_cal | 0.0199 | | loss_cal_q0 | 0.0194 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0196 | | loss_cal_q3 | 0.0213 | | loss_diff | 0.000809 | | loss_diff_q0 | 0.00199 | | loss_diff_q1 | 0.000992 | | loss_diff_q2 | 0.000241 | | loss_diff_q3 | 7.02e-05 | | loss_q0 | 0.0021 | | loss_q1 | 0.000999 | | loss_q2 | 0.000243 | | loss_q3 | 7.11e-05 | | param_norm | 254 | | samples | 2.97e+05 | | step | 3.71e+04 | | vb | 2.82e-05 | | vb_q0 | 0.000107 | | vb_q1 | 7.36e-06 | | vb_q2 | 2.16e-06 | | vb_q3 | 8.93e-07 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00102 | | loss_cal | 0.0196 | | loss_cal_q0 | 0.019 | | loss_cal_q1 | 0.019 | | loss_cal_q2 | 0.0196 | | loss_cal_q3 | 0.0206 | | loss_diff | 0.000906 | | loss_diff_q0 | 0.00225 | | loss_diff_q1 | 0.000991 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 6.88e-05 | | loss_q0 | 0.00268 | | loss_q1 | 0.000998 | | loss_q2 | 0.000264 | | loss_q3 | 6.97e-05 | | param_norm | 254 | | samples | 2.98e+05 | | step | 3.72e+04 | | vb | 0.000112 | | vb_q0 | 0.000431 | | vb_q1 | 7.33e-06 | | vb_q2 | 2.35e-06 | | vb_q3 | 8.82e-07 | --------------------------- --------------------------- | grad_norm | 1.17 | | loss | 0.000795 | | loss_cal | 0.0193 | | loss_cal_q0 | 0.0184 | | loss_cal_q1 | 0.0201 | | loss_cal_q2 | 0.0201 | | loss_cal_q3 | 0.0189 | | loss_diff | 0.000781 | | loss_diff_q0 | 0.00189 | | loss_diff_q1 | 0.000962 | | loss_diff_q2 | 0.000237 | | loss_diff_q3 | 6.19e-05 | | loss_q0 | 0.00193 | | loss_q1 | 0.000969 | | loss_q2 | 0.000239 | | loss_q3 | 6.27e-05 | | param_norm | 254 | | samples | 2.98e+05 | | step | 3.73e+04 | | vb | 1.33e-05 | | vb_q0 | 4.38e-05 | | vb_q1 | 7.12e-06 | | vb_q2 | 2.16e-06 | | vb_q3 | 7.83e-07 | --------------------------- --------------------------- | grad_norm | 1.15 | | loss | 0.00173 | | loss_cal | 0.0199 | | loss_cal_q0 | 0.0191 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0198 | | loss_cal_q3 | 0.0212 | | loss_diff | 0.00123 | | loss_diff_q0 | 0.0033 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000305 | | loss_diff_q3 | 8.8e-05 | | loss_q0 | 0.00518 | | loss_q1 | 0.00108 | | loss_q2 | 0.000308 | | loss_q3 | 8.91e-05 | | param_norm | 254 | | samples | 2.99e+05 | | step | 3.74e+04 | | vb | 0.000506 | | vb_q0 | 0.00187 | | vb_q1 | 8.05e-06 | | vb_q2 | 2.73e-06 | | vb_q3 | 1.11e-06 | --------------------------- --------------------------- | grad_norm | 1.19 | | loss | 0.0145 | | loss_cal | 0.0193 | | loss_cal_q0 | 0.0194 | | loss_cal_q1 | 0.0194 | | loss_cal_q2 | 0.0204 | | loss_cal_q3 | 0.0181 | | loss_diff | 0.00383 | | loss_diff_q0 | 0.0107 | | loss_diff_q1 | 0.00197 | | loss_diff_q2 | 0.000975 | | loss_diff_q3 | 0.000894 | | loss_q0 | 0.0508 | | loss_q1 | 0.00198 | | loss_q2 | 0.000984 | | loss_q3 | 0.000905 | | param_norm | 254 | | samples | 3e+05 | | step | 3.75e+04 | | vb | 0.0107 | | vb_q0 | 0.0401 | | vb_q1 | 1.5e-05 | | vb_q2 | 9.06e-06 | | vb_q3 | 1.14e-05 | --------------------------- --------------------------- | grad_norm | 1.41 | | loss | 0.015 | | loss_cal | 0.0203 | | loss_cal_q0 | 0.0206 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0201 | | loss_cal_q3 | 0.0212 | | loss_diff | 0.00833 | | loss_diff_q0 | 0.0309 | | loss_diff_q1 | 0.0039 | | loss_diff_q2 | 0.0016 | | loss_diff_q3 | 0.00124 | | loss_q0 | 0.0623 | | loss_q1 | 0.00393 | | loss_q2 | 0.00161 | | loss_q3 | 0.00126 | | param_norm | 255 | | samples | 3.01e+05 | | step | 3.76e+04 | | vb | 0.00671 | | vb_q0 | 0.0313 | | vb_q1 | 3.05e-05 | | vb_q2 | 1.47e-05 | | vb_q3 | 1.62e-05 | --------------------------- --------------------------- | grad_norm | 1.08 | | loss | 0.0103 | | loss_cal | 0.0184 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0184 | | loss_cal_q3 | 0.0168 | | loss_diff | 0.00491 | | loss_diff_q0 | 0.0167 | | loss_diff_q1 | 0.00167 | | loss_diff_q2 | 0.000482 | | loss_diff_q3 | 0.000214 | | loss_q0 | 0.0375 | | loss_q1 | 0.00168 | | loss_q2 | 0.000487 | | loss_q3 | 0.000217 | | param_norm | 256 | | samples | 3.02e+05 | | step | 3.77e+04 | | vb | 0.00542 | | vb_q0 | 0.0208 | | vb_q1 | 1.32e-05 | | vb_q2 | 4.43e-06 | | vb_q3 | 2.74e-06 | --------------------------- --------------------------- | grad_norm | 1.28 | | loss | 0.0151 | | loss_cal | 0.0188 | | loss_cal_q0 | 0.0195 | | loss_cal_q1 | 0.0176 | | loss_cal_q2 | 0.0196 | | loss_cal_q3 | 0.0183 | | loss_diff | 0.00565 | | loss_diff_q0 | 0.0151 | | loss_diff_q1 | 0.00321 | | loss_diff_q2 | 0.00153 | | loss_diff_q3 | 0.00127 | | loss_q0 | 0.0492 | | loss_q1 | 0.00324 | | loss_q2 | 0.00154 | | loss_q3 | 0.00129 | | param_norm | 256 | | samples | 3.02e+05 | | step | 3.78e+04 | | vb | 0.00947 | | vb_q0 | 0.0341 | | vb_q1 | 2.51e-05 | | vb_q2 | 1.39e-05 | | vb_q3 | 1.6e-05 | --------------------------- --------------------------- | grad_norm | 1.26 | | loss | 0.0087 | | loss_cal | 0.0196 | | loss_cal_q0 | 0.0194 | | loss_cal_q1 | 0.0191 | | loss_cal_q2 | 0.021 | | loss_cal_q3 | 0.0188 | | loss_diff | 0.00463 | | loss_diff_q0 | 0.0156 | | loss_diff_q1 | 0.00173 | | loss_diff_q2 | 0.000664 | | loss_diff_q3 | 0.000355 | | loss_q0 | 0.0318 | | loss_q1 | 0.00174 | | loss_q2 | 0.00067 | | loss_q3 | 0.00036 | | param_norm | 257 | | samples | 3.03e+05 | | step | 3.79e+04 | | vb | 0.00407 | | vb_q0 | 0.0162 | | vb_q1 | 1.34e-05 | | vb_q2 | 6.14e-06 | | vb_q3 | 4.52e-06 | --------------------------- --------------------------- | grad_norm | 1.1 | | loss | 0.00531 | | loss_cal | 0.0186 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0188 | | loss_cal_q3 | 0.0181 | | loss_diff | 0.00265 | | loss_diff_q0 | 0.00918 | | loss_diff_q1 | 0.00153 | | loss_diff_q2 | 0.000419 | | loss_diff_q3 | 0.000197 | | loss_q0 | 0.021 | | loss_q1 | 0.00154 | | loss_q2 | 0.000422 | | loss_q3 | 0.0002 | | param_norm | 257 | | samples | 3.04e+05 | | step | 3.8e+04 | | vb | 0.00266 | | vb_q0 | 0.0118 | | vb_q1 | 1.14e-05 | | vb_q2 | 3.87e-06 | | vb_q3 | 2.52e-06 | --------------------------- --------------------------- | grad_norm | 1.15 | | loss | 0.00468 | | loss_cal | 0.0193 | | loss_cal_q0 | 0.0198 | | loss_cal_q1 | 0.0192 | | loss_cal_q2 | 0.0193 | | loss_cal_q3 | 0.0189 | | loss_diff | 0.00326 | | loss_diff_q0 | 0.0107 | | loss_diff_q1 | 0.0014 | | loss_diff_q2 | 0.000386 | | loss_diff_q3 | 0.000168 | | loss_q0 | 0.0161 | | loss_q1 | 0.00141 | | loss_q2 | 0.000389 | | loss_q3 | 0.00017 | | param_norm | 257 | | samples | 3.05e+05 | | step | 3.81e+04 | | vb | 0.00142 | | vb_q0 | 0.00541 | | vb_q1 | 1.04e-05 | | vb_q2 | 3.52e-06 | | vb_q3 | 2.14e-06 | --------------------------- --------------------------- | grad_norm | 1.26 | | loss | 0.0168 | | loss_cal | 0.0192 | | loss_cal_q0 | 0.0194 | | loss_cal_q1 | 0.0184 | | loss_cal_q2 | 0.019 | | loss_cal_q3 | 0.02 | | loss_diff | 0.00693 | | loss_diff_q0 | 0.0251 | | loss_diff_q1 | 0.00145 | | loss_diff_q2 | 0.000429 | | loss_diff_q3 | 0.000193 | | loss_q0 | 0.0637 | | loss_q1 | 0.00146 | | loss_q2 | 0.000433 | | loss_q3 | 0.000196 | | param_norm | 257 | | samples | 3.06e+05 | | step | 3.82e+04 | | vb | 0.00986 | | vb_q0 | 0.0387 | | vb_q1 | 1.08e-05 | | vb_q2 | 3.89e-06 | | vb_q3 | 2.51e-06 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00145 | | loss_cal | 0.0188 | | loss_cal_q0 | 0.0198 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0178 | | loss_cal_q3 | 0.0184 | | loss_diff | 0.00137 | | loss_diff_q0 | 0.00361 | | loss_diff_q1 | 0.00139 | | loss_diff_q2 | 0.000393 | | loss_diff_q3 | 0.000172 | | loss_q0 | 0.00393 | | loss_q1 | 0.0014 | | loss_q2 | 0.000396 | | loss_q3 | 0.000174 | | param_norm | 258 | | samples | 3.06e+05 | | step | 3.83e+04 | | vb | 8.16e-05 | | vb_q0 | 0.000318 | | vb_q1 | 1.03e-05 | | vb_q2 | 3.59e-06 | | vb_q3 | 2.21e-06 | --------------------------- --------------------------- | grad_norm | 1.04 | | loss | 0.00354 | | loss_cal | 0.0188 | | loss_cal_q0 | 0.0189 | | loss_cal_q1 | 0.0187 | | loss_cal_q2 | 0.0187 | | loss_cal_q3 | 0.0188 | | loss_diff | 0.00157 | | loss_diff_q0 | 0.0048 | | loss_diff_q1 | 0.00125 | | loss_diff_q2 | 0.000332 | | loss_diff_q3 | 0.000119 | | loss_q0 | 0.0133 | | loss_q1 | 0.00126 | | loss_q2 | 0.000335 | | loss_q3 | 0.00012 | | param_norm | 258 | | samples | 3.07e+05 | | step | 3.84e+04 | | vb | 0.00198 | | vb_q0 | 0.00848 | | vb_q1 | 9.28e-06 | | vb_q2 | 3.03e-06 | | vb_q3 | 1.51e-06 | --------------------------- --------------------------- | grad_norm | 1.14 | | loss | 0.00128 | | loss_cal | 0.0187 | | loss_cal_q0 | 0.0177 | | loss_cal_q1 | 0.0193 | | loss_cal_q2 | 0.0178 | | loss_cal_q3 | 0.0198 | | loss_diff | 0.00113 | | loss_diff_q0 | 0.00329 | | loss_diff_q1 | 0.00122 | | loss_diff_q2 | 0.00033 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.00398 | | loss_q1 | 0.00123 | | loss_q2 | 0.000333 | | loss_q3 | 0.000107 | | param_norm | 258 | | samples | 3.08e+05 | | step | 3.85e+04 | | vb | 0.000148 | | vb_q0 | 0.00069 | | vb_q1 | 9.13e-06 | | vb_q2 | 2.97e-06 | | vb_q3 | 1.33e-06 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00728 | | loss_cal | 0.0187 | | loss_cal_q0 | 0.0193 | | loss_cal_q1 | 0.0178 | | loss_cal_q2 | 0.0177 | | loss_cal_q3 | 0.0194 | | loss_diff | 0.00236 | | loss_diff_q0 | 0.00731 | | loss_diff_q1 | 0.00125 | | loss_diff_q2 | 0.000322 | | loss_diff_q3 | 0.000103 | | loss_q0 | 0.0256 | | loss_q1 | 0.00126 | | loss_q2 | 0.000325 | | loss_q3 | 0.000104 | | param_norm | 258 | | samples | 3.09e+05 | | step | 3.86e+04 | | vb | 0.00491 | | vb_q0 | 0.0183 | | vb_q1 | 9.37e-06 | | vb_q2 | 2.92e-06 | | vb_q3 | 1.31e-06 | --------------------------- --------------------------- | grad_norm | 1.07 | | loss | 0.00587 | | loss_cal | 0.0179 | | loss_cal_q0 | 0.0174 | | loss_cal_q1 | 0.0164 | | loss_cal_q2 | 0.0193 | | loss_cal_q3 | 0.0181 | | loss_diff | 0.00158 | | loss_diff_q0 | 0.00461 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000297 | | loss_diff_q3 | 9.81e-05 | | loss_q0 | 0.0206 | | loss_q1 | 0.00114 | | loss_q2 | 0.0003 | | loss_q3 | 9.93e-05 | | param_norm | 258 | | samples | 3.1e+05 | | step | 3.87e+04 | | vb | 0.00428 | | vb_q0 | 0.016 | | vb_q1 | 8.39e-06 | | vb_q2 | 2.71e-06 | | vb_q3 | 1.25e-06 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00593 | | loss_cal | 0.0193 | | loss_cal_q0 | 0.0206 | | loss_cal_q1 | 0.0185 | | loss_cal_q2 | 0.0197 | | loss_cal_q3 | 0.018 | | loss_diff | 0.00173 | | loss_diff_q0 | 0.00569 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000322 | | loss_diff_q3 | 0.000111 | | loss_q0 | 0.0238 | | loss_q1 | 0.00115 | | loss_q2 | 0.000325 | | loss_q3 | 0.000112 | | param_norm | 258 | | samples | 3.1e+05 | | step | 3.88e+04 | | vb | 0.0042 | | vb_q0 | 0.0181 | | vb_q1 | 8.56e-06 | | vb_q2 | 2.92e-06 | | vb_q3 | 1.39e-06 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.000951 | | loss_cal | 0.0186 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0171 | | loss_cal_q2 | 0.0178 | | loss_cal_q3 | 0.0201 | | loss_diff | 0.000916 | | loss_diff_q0 | 0.00243 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000262 | | loss_diff_q3 | 8.47e-05 | | loss_q0 | 0.00257 | | loss_q1 | 0.00103 | | loss_q2 | 0.000265 | | loss_q3 | 8.58e-05 | | param_norm | 258 | | samples | 3.11e+05 | | step | 3.89e+04 | | vb | 3.55e-05 | | vb_q0 | 0.000143 | | vb_q1 | 7.7e-06 | | vb_q2 | 2.4e-06 | | vb_q3 | 1.08e-06 | --------------------------- --------------------------- | grad_norm | 1.21 | | loss | 0.00704 | | loss_cal | 0.0183 | | loss_cal_q0 | 0.0182 | | loss_cal_q1 | 0.0195 | | loss_cal_q2 | 0.0174 | | loss_cal_q3 | 0.0184 | | loss_diff | 0.00202 | | loss_diff_q0 | 0.00619 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000306 | | loss_diff_q3 | 0.000115 | | loss_q0 | 0.0253 | | loss_q1 | 0.00118 | | loss_q2 | 0.000309 | | loss_q3 | 0.000116 | | param_norm | 258 | | samples | 3.12e+05 | | step | 3.9e+04 | | vb | 0.00502 | | vb_q0 | 0.0191 | | vb_q1 | 8.83e-06 | | vb_q2 | 2.79e-06 | | vb_q3 | 1.48e-06 | --------------------------- --------------------------- | grad_norm | 1.31 | | loss | 0.00292 | | loss_cal | 0.0187 | | loss_cal_q0 | 0.0183 | | loss_cal_q1 | 0.0191 | | loss_cal_q2 | 0.0174 | | loss_cal_q3 | 0.0187 | | loss_diff | 0.0013 | | loss_diff_q0 | 0.00357 | | loss_diff_q1 | 0.00121 | | loss_diff_q2 | 0.000323 | | loss_diff_q3 | 0.00011 | | loss_q0 | 0.0101 | | loss_q1 | 0.00122 | | loss_q2 | 0.000326 | | loss_q3 | 0.000112 | | param_norm | 258 | | samples | 3.13e+05 | | step | 3.91e+04 | | vb | 0.00162 | | vb_q0 | 0.00655 | | vb_q1 | 9.23e-06 | | vb_q2 | 2.94e-06 | | vb_q3 | 1.41e-06 | --------------------------- --------------------------- | grad_norm | 1.28 | | loss | 0.0026 | | loss_cal | 0.0198 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.019 | | loss_cal_q2 | 0.0213 | | loss_cal_q3 | 0.0203 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00356 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000278 | | loss_diff_q3 | 8.35e-05 | | loss_q0 | 0.00965 | | loss_q1 | 0.00108 | | loss_q2 | 0.000281 | | loss_q3 | 8.46e-05 | | param_norm | 259 | | samples | 3.14e+05 | | step | 3.92e+04 | | vb | 0.00141 | | vb_q0 | 0.00609 | | vb_q1 | 8.06e-06 | | vb_q2 | 2.53e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.17 | | loss | 0.00121 | | loss_cal | 0.0185 | | loss_cal_q0 | 0.0191 | | loss_cal_q1 | 0.0182 | | loss_cal_q2 | 0.0185 | | loss_cal_q3 | 0.0185 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00282 | | loss_diff_q1 | 0.00112 | | loss_diff_q2 | 0.000298 | | loss_diff_q3 | 9.01e-05 | | loss_q0 | 0.00355 | | loss_q1 | 0.00113 | | loss_q2 | 0.000301 | | loss_q3 | 9.13e-05 | | param_norm | 259 | | samples | 3.14e+05 | | step | 3.93e+04 | | vb | 0.000175 | | vb_q0 | 0.000726 | | vb_q1 | 8.37e-06 | | vb_q2 | 2.64e-06 | | vb_q3 | 1.14e-06 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00499 | | loss_cal | 0.0187 | | loss_cal_q0 | 0.0185 | | loss_cal_q1 | 0.0182 | | loss_cal_q2 | 0.019 | | loss_cal_q3 | 0.0191 | | loss_diff | 0.00168 | | loss_diff_q0 | 0.00563 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.00027 | | loss_diff_q3 | 8.65e-05 | | loss_q0 | 0.0199 | | loss_q1 | 0.00105 | | loss_q2 | 0.000273 | | loss_q3 | 8.77e-05 | | param_norm | 259 | | samples | 3.15e+05 | | step | 3.94e+04 | | vb | 0.00331 | | vb_q0 | 0.0143 | | vb_q1 | 7.74e-06 | | vb_q2 | 2.46e-06 | | vb_q3 | 1.1e-06 | --------------------------- --------------------------- | grad_norm | 1.17 | | loss | 0.00277 | | loss_cal | 0.0181 | | loss_cal_q0 | 0.0178 | | loss_cal_q1 | 0.0194 | | loss_cal_q2 | 0.0176 | | loss_cal_q3 | 0.0177 | | loss_diff | 0.00119 | | loss_diff_q0 | 0.00322 | | loss_diff_q1 | 0.00116 | | loss_diff_q2 | 0.000275 | | loss_diff_q3 | 9.27e-05 | | loss_q0 | 0.00956 | | loss_q1 | 0.00117 | | loss_q2 | 0.000277 | | loss_q3 | 9.39e-05 | | param_norm | 259 | | samples | 3.16e+05 | | step | 3.95e+04 | | vb | 0.00158 | | vb_q0 | 0.00634 | | vb_q1 | 8.61e-06 | | vb_q2 | 2.51e-06 | | vb_q3 | 1.18e-06 | --------------------------- --------------------------- | grad_norm | 1.19 | | loss | 0.00101 | | loss_cal | 0.0188 | | loss_cal_q0 | 0.0193 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0187 | | loss_cal_q3 | 0.0189 | | loss_diff | 0.000962 | | loss_diff_q0 | 0.00231 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000254 | | loss_diff_q3 | 7.61e-05 | | loss_q0 | 0.00247 | | loss_q1 | 0.00106 | | loss_q2 | 0.000256 | | loss_q3 | 7.7e-05 | | param_norm | 259 | | samples | 3.17e+05 | | step | 3.96e+04 | | vb | 4.55e-05 | | vb_q0 | 0.000162 | | vb_q1 | 7.82e-06 | | vb_q2 | 2.32e-06 | | vb_q3 | 9.65e-07 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00112 | | loss_cal | 0.0178 | | loss_cal_q0 | 0.0167 | | loss_cal_q1 | 0.0182 | | loss_cal_q2 | 0.0186 | | loss_cal_q3 | 0.0179 | | loss_diff | 0.000974 | | loss_diff_q0 | 0.00237 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.000264 | | loss_diff_q3 | 7.01e-05 | | loss_q0 | 0.00289 | | loss_q1 | 0.00108 | | loss_q2 | 0.000267 | | loss_q3 | 7.1e-05 | | param_norm | 259 | | samples | 3.18e+05 | | step | 3.97e+04 | | vb | 0.000143 | | vb_q0 | 0.000514 | | vb_q1 | 7.96e-06 | | vb_q2 | 2.38e-06 | | vb_q3 | 8.98e-07 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.0009 | | loss_cal | 0.0181 | | loss_cal_q0 | 0.0182 | | loss_cal_q1 | 0.0178 | | loss_cal_q2 | 0.0187 | | loss_cal_q3 | 0.0176 | | loss_diff | 0.000871 | | loss_diff_q0 | 0.00226 | | loss_diff_q1 | 0.000984 | | loss_diff_q2 | 0.000265 | | loss_diff_q3 | 7.19e-05 | | loss_q0 | 0.00237 | | loss_q1 | 0.000992 | | loss_q2 | 0.000267 | | loss_q3 | 7.28e-05 | | param_norm | 259 | | samples | 3.18e+05 | | step | 3.98e+04 | | vb | 2.9e-05 | | vb_q0 | 0.000112 | | vb_q1 | 7.39e-06 | | vb_q2 | 2.4e-06 | | vb_q3 | 9.16e-07 | --------------------------- --------------------------- | grad_norm | 1.15 | | loss | 0.0024 | | loss_cal | 0.0183 | | loss_cal_q0 | 0.0171 | | loss_cal_q1 | 0.0183 | | loss_cal_q2 | 0.0189 | | loss_cal_q3 | 0.0183 | | loss_diff | 0.00102 | | loss_diff_q0 | 0.00255 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000256 | | loss_diff_q3 | 7.02e-05 | | loss_q0 | 0.00805 | | loss_q1 | 0.00112 | | loss_q2 | 0.000259 | | loss_q3 | 7.11e-05 | | param_norm | 259 | | samples | 3.19e+05 | | step | 3.99e+04 | | vb | 0.00138 | | vb_q0 | 0.00549 | | vb_q1 | 8.3e-06 | | vb_q2 | 2.32e-06 | | vb_q3 | 8.83e-07 | --------------------------- --------------------------- | grad_norm | 1.29 | | loss | 0.00123 | | loss_cal | 0.02 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0189 | | loss_cal_q2 | 0.0204 | | loss_cal_q3 | 0.0219 | | loss_diff | 0.000953 | | loss_diff_q0 | 0.00246 | | loss_diff_q1 | 0.000991 | | loss_diff_q2 | 0.000263 | | loss_diff_q3 | 6.7e-05 | | loss_q0 | 0.00352 | | loss_q1 | 0.000998 | | loss_q2 | 0.000265 | | loss_q3 | 6.79e-05 | | param_norm | 259 | | samples | 3.2e+05 | | step | 4e+04 | | vb | 0.000278 | | vb_q0 | 0.00107 | | vb_q1 | 7.42e-06 | | vb_q2 | 2.37e-06 | | vb_q3 | 8.45e-07 | --------------------------- saving model 0... saving model 0.9999... --------------------------- | grad_norm | 1.12 | | loss | 0.00252 | | loss_cal | 0.018 | | loss_cal_q0 | 0.0187 | | loss_cal_q1 | 0.0171 | | loss_cal_q2 | 0.0178 | | loss_cal_q3 | 0.0182 | | loss_diff | 0.00104 | | loss_diff_q0 | 0.00279 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000248 | | loss_diff_q3 | 7.01e-05 | | loss_q0 | 0.00867 | | loss_q1 | 0.00103 | | loss_q2 | 0.00025 | | loss_q3 | 7.1e-05 | | param_norm | 259 | | samples | 3.21e+05 | | step | 4.01e+04 | | vb | 0.00148 | | vb_q0 | 0.00588 | | vb_q1 | 7.67e-06 | | vb_q2 | 2.26e-06 | | vb_q3 | 8.92e-07 | --------------------------- --------------------------- | grad_norm | 1.05 | | loss | 0.00109 | | loss_cal | 0.0174 | | loss_cal_q0 | 0.0175 | | loss_cal_q1 | 0.0176 | | loss_cal_q2 | 0.017 | | loss_cal_q3 | 0.0176 | | loss_diff | 0.000984 | | loss_diff_q0 | 0.00261 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000273 | | loss_diff_q3 | 7.72e-05 | | loss_q0 | 0.00304 | | loss_q1 | 0.00105 | | loss_q2 | 0.000275 | | loss_q3 | 7.82e-05 | | param_norm | 259 | | samples | 3.22e+05 | | step | 4.02e+04 | | vb | 0.000108 | | vb_q0 | 0.000434 | | vb_q1 | 7.82e-06 | | vb_q2 | 2.45e-06 | | vb_q3 | 9.81e-07 | --------------------------- --------------------------- | grad_norm | 1.25 | | loss | 0.000999 | | loss_cal | 0.0183 | | loss_cal_q0 | 0.0185 | | loss_cal_q1 | 0.0182 | | loss_cal_q2 | 0.0179 | | loss_cal_q3 | 0.0183 | | loss_diff | 0.000926 | | loss_diff_q0 | 0.00241 | | loss_diff_q1 | 0.000974 | | loss_diff_q2 | 0.000259 | | loss_diff_q3 | 7.18e-05 | | loss_q0 | 0.00269 | | loss_q1 | 0.000981 | | loss_q2 | 0.000262 | | loss_q3 | 7.27e-05 | | param_norm | 259 | | samples | 3.22e+05 | | step | 4.03e+04 | | vb | 7.31e-05 | | vb_q0 | 0.000278 | | vb_q1 | 7.24e-06 | | vb_q2 | 2.35e-06 | | vb_q3 | 9.02e-07 | --------------------------- --------------------------- | grad_norm | 0.977 | | loss | 0.00202 | | loss_cal | 0.0167 | | loss_cal_q0 | 0.0167 | | loss_cal_q1 | 0.0165 | | loss_cal_q2 | 0.0166 | | loss_cal_q3 | 0.0171 | | loss_diff | 0.000931 | | loss_diff_q0 | 0.00246 | | loss_diff_q1 | 0.000985 | | loss_diff_q2 | 0.000249 | | loss_diff_q3 | 7.1e-05 | | loss_q0 | 0.00688 | | loss_q1 | 0.000992 | | loss_q2 | 0.000251 | | loss_q3 | 7.19e-05 | | param_norm | 259 | | samples | 3.23e+05 | | step | 4.04e+04 | | vb | 0.00109 | | vb_q0 | 0.00443 | | vb_q1 | 7.34e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 8.96e-07 | --------------------------- --------------------------- | grad_norm | 1.26 | | loss | 0.00125 | | loss_cal | 0.0188 | | loss_cal_q0 | 0.0185 | | loss_cal_q1 | 0.0186 | | loss_cal_q2 | 0.0175 | | loss_cal_q3 | 0.0204 | | loss_diff | 0.00105 | | loss_diff_q0 | 0.00271 | | loss_diff_q1 | 0.00105 | | loss_diff_q2 | 0.000237 | | loss_diff_q3 | 7.39e-05 | | loss_q0 | 0.00341 | | loss_q1 | 0.00106 | | loss_q2 | 0.000239 | | loss_q3 | 7.49e-05 | | param_norm | 259 | | samples | 3.24e+05 | | step | 4.05e+04 | | vb | 0.000194 | | vb_q0 | 0.00071 | | vb_q1 | 7.82e-06 | | vb_q2 | 2.17e-06 | | vb_q3 | 9.36e-07 | --------------------------- --------------------------- | grad_norm | 1.18 | | loss | 0.000937 | | loss_cal | 0.0174 | | loss_cal_q0 | 0.017 | | loss_cal_q1 | 0.0161 | | loss_cal_q2 | 0.0177 | | loss_cal_q3 | 0.019 | | loss_diff | 0.000885 | | loss_diff_q0 | 0.00239 | | loss_diff_q1 | 0.00094 | | loss_diff_q2 | 0.000241 | | loss_diff_q3 | 6.78e-05 | | loss_q0 | 0.0026 | | loss_q1 | 0.000947 | | loss_q2 | 0.000243 | | loss_q3 | 6.87e-05 | | param_norm | 259 | | samples | 3.25e+05 | | step | 4.06e+04 | | vb | 5.17e-05 | | vb_q0 | 0.000204 | | vb_q1 | 7.03e-06 | | vb_q2 | 2.19e-06 | | vb_q3 | 8.58e-07 | --------------------------- --------------------------- | grad_norm | 1.09 | | loss | 0.00222 | | loss_cal | 0.0177 | | loss_cal_q0 | 0.018 | | loss_cal_q1 | 0.0183 | | loss_cal_q2 | 0.0174 | | loss_cal_q3 | 0.0171 | | loss_diff | 0.00114 | | loss_diff_q0 | 0.00313 | | loss_diff_q1 | 0.000969 | | loss_diff_q2 | 0.000246 | | loss_diff_q3 | 8.05e-05 | | loss_q0 | 0.00733 | | loss_q1 | 0.000976 | | loss_q2 | 0.000248 | | loss_q3 | 8.15e-05 | | param_norm | 259 | | samples | 3.26e+05 | | step | 4.07e+04 | | vb | 0.00108 | | vb_q0 | 0.0042 | | vb_q1 | 7.25e-06 | | vb_q2 | 2.24e-06 | | vb_q3 | 1.04e-06 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00163 | | loss_cal | 0.0177 | | loss_cal_q0 | 0.0175 | | loss_cal_q1 | 0.0176 | | loss_cal_q2 | 0.017 | | loss_cal_q3 | 0.0188 | | loss_diff | 0.000936 | | loss_diff_q0 | 0.00258 | | loss_diff_q1 | 0.00103 | | loss_diff_q2 | 0.000248 | | loss_diff_q3 | 7.27e-05 | | loss_q0 | 0.00551 | | loss_q1 | 0.00104 | | loss_q2 | 0.00025 | | loss_q3 | 7.36e-05 | | param_norm | 259 | | samples | 3.26e+05 | | step | 4.08e+04 | | vb | 0.000691 | | vb_q0 | 0.00293 | | vb_q1 | 7.71e-06 | | vb_q2 | 2.23e-06 | | vb_q3 | 9.21e-07 | --------------------------- --------------------------- | grad_norm | 1.08 | | loss | 0.000764 | | loss_cal | 0.0174 | | loss_cal_q0 | 0.0169 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0183 | | loss_cal_q3 | 0.0163 | | loss_diff | 0.00075 | | loss_diff_q0 | 0.00174 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000253 | | loss_diff_q3 | 6.6e-05 | | loss_q0 | 0.00178 | | loss_q1 | 0.00103 | | loss_q2 | 0.000256 | | loss_q3 | 6.68e-05 | | param_norm | 259 | | samples | 3.27e+05 | | step | 4.09e+04 | | vb | 1.42e-05 | | vb_q0 | 4.81e-05 | | vb_q1 | 7.63e-06 | | vb_q2 | 2.28e-06 | | vb_q3 | 8.39e-07 | --------------------------- --------------------------- | grad_norm | 0.957 | | loss | 0.000985 | | loss_cal | 0.0167 | | loss_cal_q0 | 0.0167 | | loss_cal_q1 | 0.0165 | | loss_cal_q2 | 0.018 | | loss_cal_q3 | 0.0156 | | loss_diff | 0.000864 | | loss_diff_q0 | 0.00215 | | loss_diff_q1 | 0.000981 | | loss_diff_q2 | 0.000245 | | loss_diff_q3 | 7.4e-05 | | loss_q0 | 0.00262 | | loss_q1 | 0.000988 | | loss_q2 | 0.000247 | | loss_q3 | 7.49e-05 | | param_norm | 259 | | samples | 3.28e+05 | | step | 4.1e+04 | | vb | 0.000122 | | vb_q0 | 0.000472 | | vb_q1 | 7.36e-06 | | vb_q2 | 2.22e-06 | | vb_q3 | 9.3e-07 | --------------------------- --------------------------- | grad_norm | 1.03 | | loss | 0.00327 | | loss_cal | 0.0166 | | loss_cal_q0 | 0.016 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0159 | | loss_cal_q3 | 0.0166 | | loss_diff | 0.00117 | | loss_diff_q0 | 0.00328 | | loss_diff_q1 | 0.000995 | | loss_diff_q2 | 0.000258 | | loss_diff_q3 | 8.41e-05 | | loss_q0 | 0.0114 | | loss_q1 | 0.001 | | loss_q2 | 0.00026 | | loss_q3 | 8.52e-05 | | param_norm | 259 | | samples | 3.29e+05 | | step | 4.11e+04 | | vb | 0.0021 | | vb_q0 | 0.00817 | | vb_q1 | 7.4e-06 | | vb_q2 | 2.34e-06 | | vb_q3 | 1.07e-06 | --------------------------- --------------------------- | grad_norm | 1.17 | | loss | 0.00202 | | loss_cal | 0.0175 | | loss_cal_q0 | 0.0166 | | loss_cal_q1 | 0.0191 | | loss_cal_q2 | 0.0173 | | loss_cal_q3 | 0.017 | | loss_diff | 0.00106 | | loss_diff_q0 | 0.00261 | | loss_diff_q1 | 0.00107 | | loss_diff_q2 | 0.00028 | | loss_diff_q3 | 9.85e-05 | | loss_q0 | 0.00612 | | loss_q1 | 0.00108 | | loss_q2 | 0.000282 | | loss_q3 | 9.98e-05 | | param_norm | 259 | | samples | 3.3e+05 | | step | 4.12e+04 | | vb | 0.00096 | | vb_q0 | 0.00351 | | vb_q1 | 8.02e-06 | | vb_q2 | 2.53e-06 | | vb_q3 | 1.25e-06 | --------------------------- --------------------------- | grad_norm | 1.05 | | loss | 0.00131 | | loss_cal | 0.0173 | | loss_cal_q0 | 0.0183 | | loss_cal_q1 | 0.0165 | | loss_cal_q2 | 0.0172 | | loss_cal_q3 | 0.0175 | | loss_diff | 0.000973 | | loss_diff_q0 | 0.00267 | | loss_diff_q1 | 0.001 | | loss_diff_q2 | 0.000267 | | loss_diff_q3 | 9.54e-05 | | loss_q0 | 0.00406 | | loss_q1 | 0.00101 | | loss_q2 | 0.00027 | | loss_q3 | 9.66e-05 | | param_norm | 259 | | samples | 3.3e+05 | | step | 4.13e+04 | | vb | 0.000334 | | vb_q0 | 0.00139 | | vb_q1 | 7.52e-06 | | vb_q2 | 2.42e-06 | | vb_q3 | 1.21e-06 | --------------------------- --------------------------- | grad_norm | 1.04 | | loss | 0.000835 | | loss_cal | 0.0176 | | loss_cal_q0 | 0.017 | | loss_cal_q1 | 0.017 | | loss_cal_q2 | 0.0183 | | loss_cal_q3 | 0.0181 | | loss_diff | 0.000801 | | loss_diff_q0 | 0.00206 | | loss_diff_q1 | 0.00101 | | loss_diff_q2 | 0.000234 | | loss_diff_q3 | 5.85e-05 | | loss_q0 | 0.00219 | | loss_q1 | 0.00101 | | loss_q2 | 0.000236 | | loss_q3 | 5.93e-05 | | param_norm | 259 | | samples | 3.31e+05 | | step | 4.14e+04 | | vb | 3.41e-05 | | vb_q0 | 0.000135 | | vb_q1 | 7.47e-06 | | vb_q2 | 2.11e-06 | | vb_q3 | 7.5e-07 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00331 | | loss_cal | 0.0175 | | loss_cal_q0 | 0.0174 | | loss_cal_q1 | 0.0171 | | loss_cal_q2 | 0.018 | | loss_cal_q3 | 0.0169 | | loss_diff | 0.00124 | | loss_diff_q0 | 0.00366 | | loss_diff_q1 | 0.000984 | | loss_diff_q2 | 0.000259 | | loss_diff_q3 | 8.34e-05 | | loss_q0 | 0.0121 | | loss_q1 | 0.000992 | | loss_q2 | 0.000262 | | loss_q3 | 8.44e-05 | | param_norm | 259 | | samples | 3.32e+05 | | step | 4.15e+04 | | vb | 0.00207 | | vb_q0 | 0.00841 | | vb_q1 | 7.31e-06 | | vb_q2 | 2.35e-06 | | vb_q3 | 1.06e-06 | --------------------------- --------------------------- | grad_norm | 1.07 | | loss | 0.00486 | | loss_cal | 0.017 | | loss_cal_q0 | 0.0184 | | loss_cal_q1 | 0.0167 | | loss_cal_q2 | 0.0166 | | loss_cal_q3 | 0.0159 | | loss_diff | 0.00167 | | loss_diff_q0 | 0.00466 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000287 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.0158 | | loss_q1 | 0.00114 | | loss_q2 | 0.000289 | | loss_q3 | 0.000107 | | param_norm | 259 | | samples | 3.33e+05 | | step | 4.16e+04 | | vb | 0.00319 | | vb_q0 | 0.0112 | | vb_q1 | 8.4e-06 | | vb_q2 | 2.62e-06 | | vb_q3 | 1.35e-06 | --------------------------- --------------------------- | grad_norm | 1.02 | | loss | 0.000953 | | loss_cal | 0.0174 | | loss_cal_q0 | 0.0178 | | loss_cal_q1 | 0.0177 | | loss_cal_q2 | 0.0168 | | loss_cal_q3 | 0.017 | | loss_diff | 0.00091 | | loss_diff_q0 | 0.00224 | | loss_diff_q1 | 0.000988 | | loss_diff_q2 | 0.000258 | | loss_diff_q3 | 6.73e-05 | | loss_q0 | 0.0024 | | loss_q1 | 0.000995 | | loss_q2 | 0.00026 | | loss_q3 | 6.81e-05 | | param_norm | 260 | | samples | 3.34e+05 | | step | 4.17e+04 | | vb | 4.29e-05 | | vb_q0 | 0.000154 | | vb_q1 | 7.34e-06 | | vb_q2 | 2.3e-06 | | vb_q3 | 8.53e-07 | --------------------------- --------------------------- | grad_norm | 1.06 | | loss | 0.00279 | | loss_cal | 0.0172 | | loss_cal_q0 | 0.0163 | | loss_cal_q1 | 0.0176 | | loss_cal_q2 | 0.016 | | loss_cal_q3 | 0.019 | | loss_diff | 0.00103 | | loss_diff_q0 | 0.00276 | | loss_diff_q1 | 0.001 | | loss_diff_q2 | 0.000231 | | loss_diff_q3 | 6.76e-05 | | loss_q0 | 0.00962 | | loss_q1 | 0.00101 | | loss_q2 | 0.000233 | | loss_q3 | 6.85e-05 | | param_norm | 260 | | samples | 3.34e+05 | | step | 4.18e+04 | | vb | 0.00175 | | vb_q0 | 0.00686 | | vb_q1 | 7.43e-06 | | vb_q2 | 2.09e-06 | | vb_q3 | 8.48e-07 | --------------------------- --------------------------- | grad_norm | 1.04 | | loss | 0.00129 | | loss_cal | 0.0166 | | loss_cal_q0 | 0.0167 | | loss_cal_q1 | 0.0168 | | loss_cal_q2 | 0.0166 | | loss_cal_q3 | 0.0164 | | loss_diff | 0.000952 | | loss_diff_q0 | 0.00247 | | loss_diff_q1 | 0.00104 | | loss_diff_q2 | 0.000237 | | loss_diff_q3 | 6.62e-05 | | loss_q0 | 0.00383 | | loss_q1 | 0.00105 | | loss_q2 | 0.000239 | | loss_q3 | 6.7e-05 | | param_norm | 260 | | samples | 3.35e+05 | | step | 4.19e+04 | | vb | 0.00034 | | vb_q0 | 0.00136 | | vb_q1 | 7.96e-06 | | vb_q2 | 2.15e-06 | | vb_q3 | 8.35e-07 | --------------------------- --------------------------- | grad_norm | 1 | | loss | 0.000881 | | loss_cal | 0.0164 | | loss_cal_q0 | 0.0156 | | loss_cal_q1 | 0.0167 | | loss_cal_q2 | 0.0166 | | loss_cal_q3 | 0.0163 | | loss_diff | 0.000836 | | loss_diff_q0 | 0.00219 | | loss_diff_q1 | 0.000988 | | loss_diff_q2 | 0.000228 | | loss_diff_q3 | 6.67e-05 | | loss_q0 | 0.00237 | | loss_q1 | 0.000995 | | loss_q2 | 0.00023 | | loss_q3 | 6.75e-05 | | param_norm | 260 | | samples | 3.36e+05 | | step | 4.2e+04 | | vb | 4.5e-05 | | vb_q0 | 0.000181 | | vb_q1 | 7.35e-06 | | vb_q2 | 2.08e-06 | | vb_q3 | 8.52e-07 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00093 | | loss_cal | 0.017 | | loss_cal_q0 | 0.017 | | loss_cal_q1 | 0.0173 | | loss_cal_q2 | 0.016 | | loss_cal_q3 | 0.0176 | | loss_diff | 0.000883 | | loss_diff_q0 | 0.0021 | | loss_diff_q1 | 0.000947 | | loss_diff_q2 | 0.000251 | | loss_diff_q3 | 5.9e-05 | | loss_q0 | 0.00226 | | loss_q1 | 0.000954 | | loss_q2 | 0.000253 | | loss_q3 | 5.98e-05 | | param_norm | 260 | | samples | 3.37e+05 | | step | 4.21e+04 | | vb | 4.66e-05 | | vb_q0 | 0.000159 | | vb_q1 | 7.03e-06 | | vb_q2 | 2.23e-06 | | vb_q3 | 7.44e-07 | --------------------------- --------------------------- | grad_norm | 1.1 | | loss | 0.000834 | | loss_cal | 0.0174 | | loss_cal_q0 | 0.0173 | | loss_cal_q1 | 0.0168 | | loss_cal_q2 | 0.0175 | | loss_cal_q3 | 0.0177 | | loss_diff | 0.000806 | | loss_diff_q0 | 0.00196 | | loss_diff_q1 | 0.00094 | | loss_diff_q2 | 0.000265 | | loss_diff_q3 | 6.01e-05 | | loss_q0 | 0.00206 | | loss_q1 | 0.000947 | | loss_q2 | 0.000267 | | loss_q3 | 6.08e-05 | | param_norm | 260 | | samples | 3.38e+05 | | step | 4.22e+04 | | vb | 2.77e-05 | | vb_q0 | 9.82e-05 | | vb_q1 | 6.99e-06 | | vb_q2 | 2.36e-06 | | vb_q3 | 7.68e-07 | --------------------------- --------------------------- | grad_norm | 0.94 | | loss | 0.00152 | | loss_cal | 0.0161 | | loss_cal_q0 | 0.0168 | | loss_cal_q1 | 0.0157 | | loss_cal_q2 | 0.0166 | | loss_cal_q3 | 0.0154 | | loss_diff | 0.000863 | | loss_diff_q0 | 0.0023 | | loss_diff_q1 | 0.000921 | | loss_diff_q2 | 0.000231 | | loss_diff_q3 | 7.04e-05 | | loss_q0 | 0.00495 | | loss_q1 | 0.000928 | | loss_q2 | 0.000234 | | loss_q3 | 7.13e-05 | | param_norm | 260 | | samples | 3.38e+05 | | step | 4.23e+04 | | vb | 0.000655 | | vb_q0 | 0.00265 | | vb_q1 | 6.85e-06 | | vb_q2 | 2.11e-06 | | vb_q3 | 8.94e-07 | --------------------------- --------------------------- | grad_norm | 1.09 | | loss | 0.00256 | | loss_cal | 0.0163 | | loss_cal_q0 | 0.0163 | | loss_cal_q1 | 0.0157 | | loss_cal_q2 | 0.0169 | | loss_cal_q3 | 0.016 | | loss_diff | 0.00137 | | loss_diff_q0 | 0.00403 | | loss_diff_q1 | 0.0012 | | loss_diff_q2 | 0.000359 | | loss_diff_q3 | 0.000163 | | loss_q0 | 0.00915 | | loss_q1 | 0.00121 | | loss_q2 | 0.000362 | | loss_q3 | 0.000166 | | param_norm | 260 | | samples | 3.39e+05 | | step | 4.24e+04 | | vb | 0.00119 | | vb_q0 | 0.00512 | | vb_q1 | 9.33e-06 | | vb_q2 | 3.26e-06 | | vb_q3 | 2.12e-06 | --------------------------- --------------------------- | grad_norm | 0.988 | | loss | 0.00336 | | loss_cal | 0.0165 | | loss_cal_q0 | 0.0173 | | loss_cal_q1 | 0.0165 | | loss_cal_q2 | 0.0163 | | loss_cal_q3 | 0.0159 | | loss_diff | 0.00136 | | loss_diff_q0 | 0.0036 | | loss_diff_q1 | 0.00102 | | loss_diff_q2 | 0.000277 | | loss_diff_q3 | 9.03e-05 | | loss_q0 | 0.0107 | | loss_q1 | 0.00103 | | loss_q2 | 0.00028 | | loss_q3 | 9.15e-05 | | param_norm | 260 | | samples | 3.4e+05 | | step | 4.25e+04 | | vb | 0.00199 | | vb_q0 | 0.00714 | | vb_q1 | 7.72e-06 | | vb_q2 | 2.51e-06 | | vb_q3 | 1.14e-06 | --------------------------- --------------------------- | grad_norm | 1.11 | | loss | 0.00535 | | loss_cal | 0.0166 | | loss_cal_q0 | 0.0157 | | loss_cal_q1 | 0.0169 | | loss_cal_q2 | 0.0167 | | loss_cal_q3 | 0.0172 | | loss_diff | 0.00185 | | loss_diff_q0 | 0.00527 | | loss_diff_q1 | 0.00111 | | loss_diff_q2 | 0.000272 | | loss_diff_q3 | 9.34e-05 | | loss_q0 | 0.0177 | | loss_q1 | 0.00112 | | loss_q2 | 0.000274 | | loss_q3 | 9.46e-05 | | param_norm | 260 | | samples | 3.41e+05 | | step | 4.26e+04 | | vb | 0.0035 | | vb_q0 | 0.0124 | | vb_q1 | 8.28e-06 | | vb_q2 | 2.47e-06 | | vb_q3 | 1.19e-06 | --------------------------- --------------------------- | grad_norm | 1.01 | | loss | 0.0015 | | loss_cal | 0.0159 | | loss_cal_q0 | 0.0168 | | loss_cal_q1 | 0.0158 | | loss_cal_q2 | 0.0157 | | loss_cal_q3 | 0.0151 | | loss_diff | 0.00126 | | loss_diff_q0 | 0.0034 | | loss_diff_q1 | 0.00115 | | loss_diff_q2 | 0.000284 | | loss_diff_q3 | 0.000106 | | loss_q0 | 0.0043 | | loss_q1 | 0.00116 | | loss_q2 | 0.000287 | | loss_q3 | 0.000108 | | param_norm | 260 | | samples | 3.42e+05 | | step | 4.27e+04 | | vb | 0.000236 | | vb_q0 | 0.000894 | | vb_q1 | 8.54e-06 | | vb_q2 | 2.57e-06 | | vb_q3 | 1.36e-06 | --------------------------- --------------------------- | grad_norm | 1.06 | | loss | 0.00708 | | loss_cal | 0.0166 | | loss_cal_q0 | 0.0164 | | loss_cal_q1 | 0.017 | | loss_cal_q2 | 0.0177 | | loss_cal_q3 | 0.0155 | | loss_diff | 0.00255 | | loss_diff_q0 | 0.00795 | | loss_diff_q1 | 0.00117 | | loss_diff_q2 | 0.000325 | | loss_diff_q3 | 0.000115 | | loss_q0 | 0.0246 | | loss_q1 | 0.00118 | | loss_q2 | 0.000328 | | loss_q3 | 0.000116 | | param_norm | 260 | | samples | 3.42e+05 | | step | 4.28e+04 | | vb | 0.00454 | | vb_q0 | 0.0166 | | vb_q1 | 8.69e-06 | | vb_q2 | 2.94e-06 | | vb_q3 | 1.45e-06 | --------------------------- --------------------------- | grad_norm | 1.13 | | loss | 0.00337 | | loss_cal | 0.0164 | | loss_cal_q0 | 0.0158 | | loss_cal_q1 | 0.0181 | | loss_cal_q2 | 0.0164 | | loss_cal_q3 | 0.0154 | | loss_diff | 0.00136 | | loss_diff_q0 | 0.0042 | | loss_diff_q1 | 0.00113 | | loss_diff_q2 | 0.000285 | | loss_diff_q3 | 9.05e-05 | | loss_q0 | 0.0126 | | loss_q1 | 0.00114 | | loss_q2 | 0.000287 | | loss_q3 | 9.17e-05 | | param_norm | 260 | | samples | 3.43e+05 | | step | 4.29e+04 | | vb | 0.00201 | | vb_q0 | 0.00843 | | vb_q1 | 8.42e-06 | | vb_q2 | 2.59e-06 | | vb_q3 | 1.15e-06 | ---------------------------