Skip to content

Commit

Permalink
all the llamas in canary (#1803)
Browse files Browse the repository at this point in the history
Summary:
All of these OOM on a single device but want to make them all available for the distributed tests H-Huang is working on

Pull Request resolved: #1803

Reviewed By: H-Huang, xuzhao9

Differential Revision: D47994029

Pulled By: msaroufim

fbshipit-source-id: ad335348b911ddb2379a87a4ec9ef3b0dcc91ea0
  • Loading branch information
msaroufim authored and facebook-github-bot committed Aug 2, 2023
1 parent 8dbc4ee commit e7ca300
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 0 deletions.
15 changes: 15 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_13b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)

def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_13b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
16 changes: 16 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_70b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)


def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_70b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
16 changes: 16 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin

class Model(HuggingFaceModel, HuggingFaceAuthMixin):
task = NLP.LANGUAGE_MODELING
DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1
DEEPCOPY = False

def __init__(self, test, device, batch_size=None, extra_args=[]):
HuggingFaceAuthMixin.__init__(self)
super().__init__(name="llama_v2_7b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)


def train(self):
return NotImplementedError("FSDP should implement a training loop")
9 changes: 9 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

if __name__ == '__main__':
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name)
12 changes: 12 additions & 0 deletions torchbenchmark/canary_models/llama_v2_7b/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
not_implemented:
- device: cpu
- device: cuda
test: train
train_benchmark: false
train_deterministic: false
3 changes: 3 additions & 0 deletions torchbenchmark/util/framework/huggingface/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'),
# default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real
'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'),
'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'),
'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'),
'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'),
}

cpu_input_slice = {
Expand Down

0 comments on commit e7ca300

Please sign in to comment.