From 33afafc90dbe522ba2b12998e99ea1adca5cf3b8 Mon Sep 17 00:00:00 2001
From: jianan-gu <jianan.gu@intel.com>
Date: Mon, 4 Dec 2023 13:12:30 +0800
Subject: [PATCH] Update acc CMD example in doc (#2303)

* Update README.md

* Update run_accuracy.py

* Update run_accuracy_with_deepspeed.py
---
 examples/cpu/inference/python/llm/README.md                 | 2 +-
 .../python/llm/distributed/run_accuracy_with_deepspeed.py   | 6 +++---
 .../inference/python/llm/single_instance/run_accuracy.py    | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/examples/cpu/inference/python/llm/README.md b/examples/cpu/inference/python/llm/README.md
index 201043307..1a420ea4c 100644
--- a/examples/cpu/inference/python/llm/README.md
+++ b/examples/cpu/inference/python/llm/README.md
@@ -262,7 +262,7 @@ OMP_NUM_THREADS=<physical cores num> numactl -m <node N> -C <cpu list> python ru
 # Please also add  "--int8-bf16-mixed" if your model is quantized with this flag
 
 # An example of llama2 7b model:
-OMP_NUM_THREADS=56 numactl -m 0 -C 0-55 python run_accuracy.py -m meta-llama/Llama-2-7b-hf --quantized-model-path "./saved_results/best_model.pt" --dtype int8 --accuracy-only --jit --int8 --tasks lambada_openai
+OMP_NUM_THREADS=56 numactl -m 0 -C 0-55 python run_accuracy.py -m meta-llama/Llama-2-7b-hf --quantized-model-path "./saved_results/best_model.pt" --dtype int8 --accuracy-only --jit --tasks lambada_openai
 ```
 ### Distributed with DeepSpeed (autoTP)
 ### Prepare:
diff --git a/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py b/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py
index e383aee59..7e7130dd1 100644
--- a/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py
+++ b/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py
@@ -580,7 +580,7 @@ def _model_call(
             if self._with_jit and self.iter == 0:
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._dtype != "int8":
@@ -677,7 +677,7 @@ def _model_call(
             ):
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._with_jit:
@@ -693,7 +693,7 @@ def _model_call(
             else:
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._with_jit:
diff --git a/examples/cpu/inference/python/llm/single_instance/run_accuracy.py b/examples/cpu/inference/python/llm/single_instance/run_accuracy.py
index 620e9420c..6967ccba6 100644
--- a/examples/cpu/inference/python/llm/single_instance/run_accuracy.py
+++ b/examples/cpu/inference/python/llm/single_instance/run_accuracy.py
@@ -417,7 +417,7 @@ def _model_call(
             if self._with_jit and self.iter == 0:
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._dtype != "int8":
@@ -514,7 +514,7 @@ def _model_call(
             ):
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._with_jit:
@@ -530,7 +530,7 @@ def _model_call(
             else:
                 with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
                     enabled=True
-                    if args.int8_bf16_mixed or self._dtype == torch.bfloat16
+                    if args.int8_bf16_mixed or self._dtype == "bfloat16"
                     else False,
                 ):
                     if self._with_jit: