pytorch · petermcaughan · Jun 30, 2023 · Jun 30, 2023 · Jul 10, 2023 · Jul 10, 2023
diff --git a/torchbenchmark/models/hf_Falcon_7b/__init__.py b/torchbenchmark/models/hf_Falcon_7b/__init__.py
@@ -0,0 +1,16 @@
+from torchbenchmark.tasks import NLP
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
+
+class Model(HuggingFaceModel):
+    task = NLP.LANGUAGE_MODELING
+    # Published training batch size per GPU is 6: see https://huggingface.co/tiiuae/falcon-7b/blob/main/README.md#:~:text=Batch%20size,tokens%20ramp%2Dup
+    DEFAULT_TRAIN_BSIZE = 6
+    DEFAULT_EVAL_BSIZE = 1
+
+    def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
+        super().__init__(name="hf_Falcon_7b", test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
+
+    def eval(self):
+        if (self.device == "cpu"):
+            raise NotImplementedError("Falcon model is too slow on CPU - skip CPU test.")
+        super().eval()
diff --git a/torchbenchmark/models/hf_Falcon_7b/install.py b/torchbenchmark/models/hf_Falcon_7b/install.py
@@ -0,0 +1,14 @@
+
+import subprocess
+import sys
+import os
+from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model
+
+def pip_install_requirements():
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])
+
+if __name__ == '__main__':
+    pip_install_requirements()
+    patch_transformers()
+    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+    cache_model(model_name, trust_remote_code=True)
diff --git a/torchbenchmark/models/hf_Falcon_7b/metadata.yaml b/torchbenchmark/models/hf_Falcon_7b/metadata.yaml
@@ -0,0 +1,10 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 16
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+not_implemented:
+- jit: true
+train_benchmark: false
+train_deterministic: false
diff --git a/torchbenchmark/models/hf_Falcon_7b/requirements.txt b/torchbenchmark/models/hf_Falcon_7b/requirements.txt
@@ -0,0 +1,2 @@
+sentencepiece
+datasets
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -27,6 +27,7 @@
     'hf_Bert': (512, 512, 'BertConfig()', 'AutoModelForMaskedLM'),
     # see https://huggingface.co/bert-large-cased
     'hf_Bert_large': (512, 512, 'BertConfig(hidden_size=1024, num_hidden_layers=24, num_attention_heads=16)', 'AutoModelForMaskedLM'),
+    'hf_Falcon_7b' : (512, 512, 'AutoConfig.from_pretrained("tiiuae/falcon-7b", trust_remote_code=True)', 'AutoModelForCausalLM'),
 }
 
 cpu_input_slice = {
@@ -77,7 +78,10 @@ def __init__(self, name, test, device, jit=False, batch_size=None, extra_args=[]
             # silence "config.num_buckets is not set. Setting config.num_buckets to 128"
             config.num_buckets = 128
         class_ctor = getattr(transformers, class_models[name][3])
-        self.model = class_ctor.from_config(config).to(device)
+        kwargs = {}
+        if name == "hf_Falcon_7b":
+            kwargs["trust_remote_code"] = True
+        self.model = class_ctor.from_config(config, **kwargs).to(device)
         self.optimizer = optim.Adam(
             self.model.parameters(),
             lr=0.001,

diff --git a/torchbenchmark/util/framework/huggingface/patch_hf.py b/torchbenchmark/util/framework/huggingface/patch_hf.py
@@ -9,11 +9,11 @@
 
 PATCH_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "patches")
 
-def cache_model(name: str):
+def cache_model(name: str, **kwargs):
     import transformers
     model_config = eval(class_models[name][2])
     model_ctor = getattr(transformers, class_models[name][3])
-    model_ctor.from_config(model_config)
+    model_ctor.from_config(model_config, **kwargs)
 
 def patch_transformers():
     import transformers