diff --git a/torchbenchmark/canary_models/llama_v2_13b/__init__.py b/torchbenchmark/canary_models/llama_v2_13b/__init__.py new file mode 100644 index 0000000000..bf307dfedf --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/__init__.py @@ -0,0 +1,15 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_13b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + def train(self): + return NotImplementedError("FSDP should implement a training loop") \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_13b/install.py b/torchbenchmark/canary_models/llama_v2_13b/install.py new file mode 100644 index 0000000000..cc27b6e7cf --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml new file mode 100644 index 0000000000..db5866b059 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_70b/__init__.py b/torchbenchmark/canary_models/llama_v2_70b/__init__.py new file mode 100644 index 0000000000..6878b26c9d --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/__init__.py @@ -0,0 +1,16 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_70b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + + def train(self): + return NotImplementedError("FSDP should implement a training loop") diff --git a/torchbenchmark/canary_models/llama_v2_70b/install.py b/torchbenchmark/canary_models/llama_v2_70b/install.py new file mode 100644 index 0000000000..cc27b6e7cf --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml new file mode 100644 index 0000000000..db5866b059 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_7b/__init__.py b/torchbenchmark/canary_models/llama_v2_7b/__init__.py new file mode 100644 index 0000000000..326a47ef4e --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/__init__.py @@ -0,0 +1,16 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_7b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + + def train(self): + return NotImplementedError("FSDP should implement a training loop") diff --git a/torchbenchmark/canary_models/llama_v2_7b/install.py b/torchbenchmark/canary_models/llama_v2_7b/install.py new file mode 100644 index 0000000000..cc27b6e7cf --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) \ No newline at end of file diff --git a/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml new file mode 100644 index 0000000000..db5866b059 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py index 3327ad72db..d317483a67 100644 --- a/torchbenchmark/util/framework/huggingface/model_factory.py +++ b/torchbenchmark/util/framework/huggingface/model_factory.py @@ -30,6 +30,9 @@ 'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'), # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real 'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'), + 'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'), + 'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'), + 'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'), } cpu_input_slice = {