diff --git a/.github/workflows/et.yml b/.github/workflows/et.yml index 378c6544e9..9bf723dbf2 100644 --- a/.github/workflows/et.yml +++ b/.github/workflows/et.yml @@ -73,3 +73,51 @@ jobs: cat ${PWD}/output_et echo "Tests complete." + + - name: Run inference + run: | + export MODEL_PATH=checkpoints/stories15M/stories15M.pt + export MODEL_NAME=stories15M + export MODEL_DIR=/tmp + python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "******************************************" + echo "******* Emb: channel-wise quantized ******" + echo "******************************************" + python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "******************************************" + echo "******** Emb: group-wise quantized *******" + echo "******************************************" + python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "******************************************" + echo "******* INT8 channel-wise quantized ******" + echo "******************************************" + python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "******************************************" + echo "******** INT8 group-wise quantized *******" + echo "******************************************" + python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "******************************************" + echo "******** INT4 group-wise quantized *******" + echo "******************************************" + python export.py --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte + python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et + cat ./output_et + + echo "tests complete" + echo "******************************************" +