diff --git a/torchbenchmark/models/hf_MPT_7b_instruct/__init__.py b/torchbenchmark/models/hf_MPT_7b_instruct/__init__.py new file mode 100644 index 0000000000..59f314bdd3 --- /dev/null +++ b/torchbenchmark/models/hf_MPT_7b_instruct/__init__.py @@ -0,0 +1,14 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel + +class Model(HuggingFaceModel): + task = NLP.LANGUAGE_MODELING + # https://huggingface.co/mosaicml/mpt-7b + DEFAULT_TRAIN_BSIZE = 4 + DEFAULT_EVAL_BSIZE = 1 + + def __init__(self, test, device, batch_size=None, extra_args=[]): + super().__init__(name="hf_MPT_7b_instruct", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + def eval(self): + super().eval() \ No newline at end of file diff --git a/torchbenchmark/models/hf_MPT_7b_instruct/install.py b/torchbenchmark/models/hf_MPT_7b_instruct/install.py new file mode 100644 index 0000000000..152b955c4c --- /dev/null +++ b/torchbenchmark/models/hf_MPT_7b_instruct/install.py @@ -0,0 +1,7 @@ +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + patch_transformers() + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name, trust_remote_code=True) \ No newline at end of file diff --git a/torchbenchmark/models/hf_MPT_7b_instruct/metadata.yaml b/torchbenchmark/models/hf_MPT_7b_instruct/metadata.yaml new file mode 100644 index 0000000000..c525275e5d --- /dev/null +++ b/torchbenchmark/models/hf_MPT_7b_instruct/metadata.yaml @@ -0,0 +1,8 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py index d317483a67..2f4c202078 100644 --- a/torchbenchmark/util/framework/huggingface/model_factory.py +++ b/torchbenchmark/util/framework/huggingface/model_factory.py @@ -30,6 +30,7 @@ 'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'), # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real 'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'), + 'hf_MPT_7b_instruct': (512, 512, 'AutoConfig.from_pretrained("mosaicml/mpt-7b-instruct", trust_remote_code=True)', 'AutoModelForCausalLM'), 'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'), 'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'), 'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'), @@ -83,7 +84,10 @@ def __init__(self, name, test, device, batch_size=None, extra_args=[]): # silence "config.num_buckets is not set. Setting config.num_buckets to 128" config.num_buckets = 128 class_ctor = getattr(transformers, class_models[name][3]) - self.model = class_ctor.from_config(config).to(device) + kwargs = {} + if name == "hf_Falcon_7b" or name == "hf_MPT_7b_instruct": + kwargs["trust_remote_code"] = True + self.model = class_ctor.from_config(config, **kwargs).to(device) self.optimizer = optim.Adam( self.model.parameters(), lr=0.001, diff --git a/torchbenchmark/util/framework/huggingface/patch_hf.py b/torchbenchmark/util/framework/huggingface/patch_hf.py index 24de3f1eb3..f23055be64 100644 --- a/torchbenchmark/util/framework/huggingface/patch_hf.py +++ b/torchbenchmark/util/framework/huggingface/patch_hf.py @@ -10,11 +10,11 @@ PATCH_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "patches") -def cache_model(name: str): +def cache_model(name: str, **kwargs): import transformers model_config = eval(class_models[name][2]) model_ctor = getattr(transformers, class_models[name][3]) - model_ctor.from_config(model_config) + model_ctor.from_config(model_config, **kwargs) def patch_transformers(): import transformers