diff --git a/torchbenchmark/models/llama_v2_7b_8h/__init__.py b/torchbenchmark/models/llama_v2_7b_8h/__init__.py deleted file mode 100644 index fe4505bab2..0000000000 --- a/torchbenchmark/models/llama_v2_7b_8h/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from torchbenchmark.tasks import NLP -from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin - -class Model(HuggingFaceModel, HuggingFaceAuthMixin): - task = NLP.LANGUAGE_MODELING - DEFAULT_TRAIN_BSIZE = 1 - DEFAULT_EVAL_BSIZE = 1 - DEEPCOPY = False - - def __init__(self, test, device, batch_size=None, extra_args=[]): - HuggingFaceAuthMixin.__init__(self) - super().__init__(name="llama_v2_7b_8h", test=test, device=device, batch_size=batch_size, extra_args=extra_args) - - def train(self): - return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") diff --git a/torchbenchmark/models/llama_v2_7b_8h/install.py b/torchbenchmark/models/llama_v2_7b_8h/install.py deleted file mode 100644 index e22f9518b2..0000000000 --- a/torchbenchmark/models/llama_v2_7b_8h/install.py +++ /dev/null @@ -1,9 +0,0 @@ - -import subprocess -import sys -import os -from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model - -if __name__ == '__main__': - model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) - cache_model(model_name) diff --git a/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml b/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml deleted file mode 100644 index 81a62e29bb..0000000000 --- a/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml +++ /dev/null @@ -1,12 +0,0 @@ -devices: - NVIDIA A100-SXM4-40GB: - eval_batch_size: 1 -eval_benchmark: false -eval_deterministic: false -eval_nograd: true -not_implemented: -- device: cpu -- device: cuda - test: train -train_benchmark: false -train_deterministic: false diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py index 8052eed322..3327ad72db 100644 --- a/torchbenchmark/util/framework/huggingface/model_factory.py +++ b/torchbenchmark/util/framework/huggingface/model_factory.py @@ -30,8 +30,6 @@ 'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'), # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real 'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'), - # an even smaller model since 16h OOMs for our optimizer benchmarks - 'llama_v2_7b_8h' : (512,512, 'LlamaConfig(num_hidden_layers=8)', 'AutoModelForCausalLM'), } cpu_input_slice = { diff --git a/userbenchmark/optim/run.py b/userbenchmark/optim/run.py index 3dd982ff82..cad321e3ef 100644 --- a/userbenchmark/optim/run.py +++ b/userbenchmark/optim/run.py @@ -249,6 +249,10 @@ def get_unstable_models() -> Set[str]: # Skip models deemed unstable by torch-nightly {'model': m} for m in unstable_models ] + [ + # 16h currently OOMs, but once it supports train, we should remove this line + # See tracker https://github.com/pytorch/benchmark/issues/1793 + {'model': 'llama_v2_7b_16h'} +] +[ # SparseAdam does not support dense gradients {'optim': 'SparseAdam', 'model': m} for m in DENSE_MODELS ] + [