diff --git a/.github/workflows/cpu-basic-install-prepare-train-inf-test.yml b/.github/workflows/cpu-basic-install-prepare-train-inf-test.yml new file mode 100644 index 0000000000..39d2318941 --- /dev/null +++ b/.github/workflows/cpu-basic-install-prepare-train-inf-test.yml @@ -0,0 +1,34 @@ +name: Basic Pytorch Installation, Data Prep, CPU Training, CPU Inference +on: [push, pull_request] +jobs: + Install-Dependencies_Data-Prep_CPU-Training_CPU-Inference: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v4 + - run: echo "${{ github.repository }} repository has been cloned to the runner." + - run: echo "Currently on ${{ github.ref }} branch" + - name: ls of directory + run: | + ls ${{ github.workspace }} + # Caching pip dependencies + - name: Cache pip dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_cpu.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install CPU Dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + python3 -m pip install -r requirements_cpu.txt + - name: Run Small Network on CPU + run: | + python3 data/shakespeare_char/prepare.py + python3 train.py --out_dir=out --device=cpu --eval_interval=2 --log_interval=1 --block_size=2 --batch_size=2 --n_layer=2 --n_head=2 --n_embd=16 --max_iters=3 --lr_decay_iters=2 --dropout=0.0 + - name: Run CPU Inference + run: | + python3 sample.py --device=cpu --out_dir="out" + diff --git a/requirements_cpu.txt b/requirements_cpu.txt new file mode 100644 index 0000000000..15132a5550 --- /dev/null +++ b/requirements_cpu.txt @@ -0,0 +1,77 @@ +absl-py==2.0.0 +aiohttp==3.8.6 +aiosignal==1.3.1 +appdirs==1.4.4 +async-timeout==4.0.3 +attrs==23.1.0 +black==23.10.1 +cachetools==5.3.2 +certifi==2022.12.7 +charset-normalizer==2.1.1 +click==8.1.7 +datasets==2.14.6 +dill==0.3.7 +docker-pycreds==0.4.0 +filelock==3.9.0 +frozenlist==1.4.0 +fsspec==2023.4.0 +gitdb==4.0.11 +GitPython==3.1.40 +google-auth==2.23.4 +google-auth-oauthlib==1.1.0 +greenlet==3.0.1 +grpcio==1.59.2 +huggingface-hub==0.17.3 +idna==3.4 +Jinja2==3.1.2 +Markdown==3.5.1 +MarkupSafe==2.1.2 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multiprocess==0.70.15 +mypy-extensions==1.0.0 +networkx==3.0 +numpy==1.26.1 +oauthlib==3.2.2 +packaging==23.2 +pandas==2.1.2 +pathspec==0.11.2 +pathtools==0.1.2 +Pillow==9.3.0 +platformdirs==3.11.0 +protobuf==4.23.4 +psutil==5.9.6 +pyarrow==14.0.0 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pynvim==0.4.3 +python-dateutil==2.8.2 +pytz==2023.3.post1 +PyYAML==6.0.1 +regex==2023.10.3 +requests==2.28.1 +requests-oauthlib==1.3.1 +rsa==4.9 +safetensors==0.4.0 +sentry-sdk==1.34.0 +setproctitle==1.3.3 +six==1.16.0 +smmap==5.0.1 +sympy==1.12 +tensorboard==2.15.1 +tensorboard-data-server==0.7.2 +tiktoken==0.5.1 +tokenizers==0.14.1 +torch==2.1.0+cpu +torchaudio==2.1.0+cpu +torchvision==0.16.0+cpu +tqdm==4.66.1 +transformers==4.35.0 +typing_extensions==4.4.0 +tzdata==2023.3 +urllib3==1.26.13 +wandb==0.15.12 +Werkzeug==3.0.1 +xxhash==3.4.1 +yarl==1.9.2 diff --git a/sample.py b/sample.py index 78d8c4e88f..8432ed91fa 100644 --- a/sample.py +++ b/sample.py @@ -7,17 +7,30 @@ import torch import tiktoken from model import GPTConfig, GPT +import argparse + +def parseargs(): + parser = argparse.ArgumentParser(description='') + parser.add_argument("-d", + "--device", + type=str, help="device to run inference, e.g. 'cpu' or 'cuda' or 'cuda:0', 'cuda:1', etc...") + parser.add_argument("-o", + "--out_dir", + type=str, help="directory to load checkpoint from") + + return parser.parse_args() # ----------------------------------------------------------------------------- +args = parseargs() init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl') -out_dir = 'out' # ignored if init_from is not 'resume' +out_dir = args.out_dir # ignored if init_from is not 'resume' start = "\n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt" num_samples = 10 # number of samples to draw max_new_tokens = 500 # number of tokens generated in each sample temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability seed = 1337 -device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc. +device = args.device dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16' compile = False # use PyTorch 2.0 to compile the model to be faster # ----------------------------------------------------------------------------- diff --git a/train.py b/train.py index a123e5bef7..a45e98c5c8 100644 --- a/train.py +++ b/train.py @@ -4,6 +4,7 @@ from datetime import datetime import math import pickle +from contextlib import nullcontext import numpy as np import torch