From b77c89f14efa6e742d3e38955878a68d4c243016 Mon Sep 17 00:00:00 2001 From: Tianyu Liu <lty@fb.com> Date: Fri, 16 Feb 2024 17:38:04 -0800 Subject: [PATCH] Update on "add memory metrics to TensorBoard" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit <img width="1391" alt="Screenshot 2024-02-15 at 5 19 09 PM" src="https://github.com/pytorch-labs/torchtrain/assets/150487191/af8a2efb-13ff-4e8f-84f2-b245784747ed"> [ghstack-poisoned] --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index e4d3c01b4..faabc2d8d 100644 --- a/train.py +++ b/train.py @@ -222,8 +222,8 @@ def main(args): gpu_mem_stats = gpu_metrics.get_current_stats(return_data=True) metrics = { - "loss/global_avg": global_avg_loss, - "loss/global_max": global_max_loss, + "loss_metrics/global_avg_loss": global_avg_loss, + "loss_metrics/global_max_loss": global_max_loss, "wps": wps, "memory_current/active(%)": gpu_mem_stats.active_curr, "memory_current/allocated(%)": gpu_mem_stats.allocated_curr,