From 33afafc90dbe522ba2b12998e99ea1adca5cf3b8 Mon Sep 17 00:00:00 2001 From: jianan-gu Date: Mon, 4 Dec 2023 13:12:30 +0800 Subject: [PATCH] Update acc CMD example in doc (#2303) * Update README.md * Update run_accuracy.py * Update run_accuracy_with_deepspeed.py --- examples/cpu/inference/python/llm/README.md | 2 +- .../python/llm/distributed/run_accuracy_with_deepspeed.py | 6 +++--- .../inference/python/llm/single_instance/run_accuracy.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/cpu/inference/python/llm/README.md b/examples/cpu/inference/python/llm/README.md index 201043307..1a420ea4c 100644 --- a/examples/cpu/inference/python/llm/README.md +++ b/examples/cpu/inference/python/llm/README.md @@ -262,7 +262,7 @@ OMP_NUM_THREADS= numactl -m -C python ru # Please also add "--int8-bf16-mixed" if your model is quantized with this flag # An example of llama2 7b model: -OMP_NUM_THREADS=56 numactl -m 0 -C 0-55 python run_accuracy.py -m meta-llama/Llama-2-7b-hf --quantized-model-path "./saved_results/best_model.pt" --dtype int8 --accuracy-only --jit --int8 --tasks lambada_openai +OMP_NUM_THREADS=56 numactl -m 0 -C 0-55 python run_accuracy.py -m meta-llama/Llama-2-7b-hf --quantized-model-path "./saved_results/best_model.pt" --dtype int8 --accuracy-only --jit --tasks lambada_openai ``` ### Distributed with DeepSpeed (autoTP) ### Prepare: diff --git a/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py b/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py index e383aee59..7e7130dd1 100644 --- a/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py +++ b/examples/cpu/inference/python/llm/distributed/run_accuracy_with_deepspeed.py @@ -580,7 +580,7 @@ def _model_call( if self._with_jit and self.iter == 0: with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._dtype != "int8": @@ -677,7 +677,7 @@ def _model_call( ): with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._with_jit: @@ -693,7 +693,7 @@ def _model_call( else: with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._with_jit: diff --git a/examples/cpu/inference/python/llm/single_instance/run_accuracy.py b/examples/cpu/inference/python/llm/single_instance/run_accuracy.py index 620e9420c..6967ccba6 100644 --- a/examples/cpu/inference/python/llm/single_instance/run_accuracy.py +++ b/examples/cpu/inference/python/llm/single_instance/run_accuracy.py @@ -417,7 +417,7 @@ def _model_call( if self._with_jit and self.iter == 0: with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._dtype != "int8": @@ -514,7 +514,7 @@ def _model_call( ): with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._with_jit: @@ -530,7 +530,7 @@ def _model_call( else: with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast( enabled=True - if args.int8_bf16_mixed or self._dtype == torch.bfloat16 + if args.int8_bf16_mixed or self._dtype == "bfloat16" else False, ): if self._with_jit: