Skip to content

Commit

Permalink
Merge branch 'main' into asonawane/mpt
Browse files Browse the repository at this point in the history
  • Loading branch information
apsonawane committed Aug 3, 2023
2 parents f116952 + e7ca300 commit dc65d8c
Show file tree
Hide file tree
Showing 14 changed files with 122 additions and 9 deletions.
8 changes: 5 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
# Some of the models have very heavyweight setup, so we have to set a very
# generous limit. That said, we don't want the entire test suite to hang if
# a single test encounters an extreme failure, so we give up after a test is
# unresponsive to 5 minutes. (Note: this does not require that the entire
# test case completes in 5 minutes. It requires that if the worker is
# unresponsive to 5 minutes by default. (Note: this does not require that the
# entire test case completes in 5 minutes. It requires that if the worker is
# unresponsive for 5 minutes the parent will presume it dead / incapacitated.)
TIMEOUT = 300 # Seconds
TIMEOUT = int(os.getenv("TIMEOUT", 300)) # Seconds

class TestBenchmark(unittest.TestCase):

Expand Down Expand Up @@ -123,6 +123,8 @@ def _load_tests():
devices.append('cuda')
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
devices.append('mps')
if device := os.getenv('ACCELERATOR'):
devices.append(device)

for path in _list_model_paths():
# TODO: skipping quantized tests for now due to BC-breaking changes for prepare
Expand Down
2 changes: 2 additions & 0 deletions torchbenchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ def _maybe_import_model(package: str, model_path: str) -> Dict[str, Any]:
diagnostic_msg = ""
try:
module = importlib.import_module(f'.models.{model_name}', package=package)
if accelerator_backend := os.getenv("ACCELERATOR_BACKEND"):
setattr(module, accelerator_backend, importlib.import_module(accelerator_backend))
Model = getattr(module, 'Model', None)
if Model is None:
diagnostic_msg = f"Warning: {module} does not define attribute Model, skip it"
Expand Down
15 changes: 15 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_13b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)

def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
16 changes: 16 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_70b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)


def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
16 changes: 16 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_7b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)


def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
2 changes: 0 additions & 2 deletions torchbenchmark/models/hf_MPT_7b_instruct/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,5 @@ devices:
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- jit: true
train_benchmark: false
train_deterministic: false
3 changes: 3 additions & 0 deletions torchbenchmark/util/framework/huggingface/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
# default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real
'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'),
'hf_MPT_7b_instruct': (512, 512, 'AutoConfig.from_pretrained("mosaicml/mpt-7b-instruct", trust_remote_code=True)', 'AutoModelForCausalLM'),
'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'),
'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'),
'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'),
}

cpu_input_slice = {
Expand Down
6 changes: 2 additions & 4 deletions torchbenchmark/util/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,8 @@ def determine_batch_size(self, batch_size=None):
assert current_device_name, f"torch.cuda.get_device_name() returns None when device is set to cuda, please double check."
if current_device_name in SPECIAL_DEVICE_MAPPING:
current_device_name = SPECIAL_DEVICE_MAPPING[current_device_name]
elif self.device == "cpu":
current_device_name = "cpu"
elif self.device == "mps":
current_device_name = "mps"
else:
current_device_name = str(self.device)
# use the device suggestion on CUDA inference tests, key should be either eval_batch_size or train_batch_size
device_batch_size_key = f"{self.test}_batch_size"
if self.metadata and "devices" in self.metadata and current_device_name in self.metadata["devices"] \
Expand Down

0 comments on commit dc65d8c

Please sign in to comment.