Macos+kludge (pytorch#670)

* add ET runner to benchmark * remove spurios end * add mps runner and groupsize kludge * adjust groupsize * fortify runners * handle device for export_et
yanbing-j · Jul 17, 2024 · 3eb5db7 · 3eb5db7
1 parent 6198828
commit 3eb5db7
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 6 deletions.
diff --git a/.github/workflows/run-readme2.yml → .github/workflows/run-readme-cuda.yml b/.github/workflows/run-readme2.yml → .github/workflows/run-readme-cuda.yml
@@ -32,11 +32,11 @@ jobs:
 
         echo "::group::Run This"
         grep -v login ./readme-commands.sh | sed  -e '1,$s/llama3/stories15M/g' > ./we-run-this.sh
-        echo "******************************************"
+        echo "*******************************************"
         cat ./we-run-this.sh
-        echo "******************************************"
+        echo "*******************************************"
         bash -x ./we-run-this.sh
         
         echo "tests complete"
-        echo "******************************************"
+        echo "*******************************************"
         echo "::endgroup::"
diff --git a/.github/workflows/run-readme2-mps.yml b/.github/workflows/run-readme2-mps.yml
@@ -0,0 +1,47 @@
+name: Run the README instructions - with stories - to ensure they work
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  test-mps:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-14-xlarge 
+      script: |
+        set -x
+        # NS: Remove previous installation  of torch first
+        # as this script does not isntall anything into conda env but rather as system dep
+        pip3 uninstall -y torch || true
+        set -eou pipefail
+
+        echo "::group::Print machine info"
+        uname -a
+        sysctl machdep.cpu.brand_string
+        sysctl machdep.cpu.core_count
+        echo "::endgroup::"
+
+        # echo "::group::Install newer objcopy that supports --set-section-alignment"
+        # yum install -y  devtoolset-10-binutils
+        # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        # echo "::endgroup::"
+
+        echo "::group::Create script"
+        python3 scripts/process-readme.py > ./readme-commands.sh
+        echo "exit 1" >> ./readme-commands.sh
+        echo "::endgroup::"
+
+        echo "::group::Run This"
+        grep -v login ./readme-commands.sh | sed  -e '1,$s/llama3/stories15M/g' > ./we-run-this.sh
+        echo "*******************************************"
+        cat ./we-run-this.sh
+        echo "*******************************************"
+        bash -x ./we-run-this.sh
+        
+        echo "tests complete"
+        echo "*******************************************"
+        echo "::endgroup::"
diff --git a/README.md b/README.md
@@ -197,6 +197,8 @@ export TORCHCHAT_ROOT=${PWD}
 ### Export for mobile
 The following example uses the Llama3 8B Instruct model.
 
+[shell default]: echo '{"embedding": {"bitwidth": 4, "groupsize" : 32}, "linear:a8w4dq": {"groupsize" : 32}}' >./config/data/mobile.json
+
 ```
 # Export
 python3 torchchat.py export llama3 --quantize config/data/mobile.json --output-pte-path llama3.pte

diff --git a/cli.py b/cli.py
@@ -312,9 +312,15 @@ def arg_init(args):
 
     # if we specify dtype in quantization recipe, replicate it as args.dtype
     args.dtype = args.quantize.get("precision", {}).get("dtype", args.dtype)
-    args.device = get_device_str(
-        args.quantize.get("executor", {}).get("accelerator", args.device)
-    )
+
+    if args.output_pte_path:
+        if args.device not in ["cpu", "fast"]:
+            raise RuntimeError("Device not supported by ExecuTorch")
+        args.device="cpu"
+    else:
+        args.device = get_device_str(
+            args.quantize.get("executor", {}).get("accelerator", args.device)
+        )
 
     if hasattr(args, "seed") and args.seed:
         torch.manual_seed(args.seed)