diff --git a/torchbenchmark/models/llama_v2_7b_8h/__init__.py b/torchbenchmark/models/llama_v2_7b_8h/__init__.py
deleted file mode 100644
index fe4505bab2..0000000000
--- a/torchbenchmark/models/llama_v2_7b_8h/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from torchbenchmark.tasks import NLP
-from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin
-
-class Model(HuggingFaceModel, HuggingFaceAuthMixin):
-    task = NLP.LANGUAGE_MODELING
-    DEFAULT_TRAIN_BSIZE = 1
-    DEFAULT_EVAL_BSIZE = 1
-    DEEPCOPY = False 
-
-    def __init__(self, test, device, batch_size=None, extra_args=[]):
-        HuggingFaceAuthMixin.__init__(self)
-        super().__init__(name="llama_v2_7b_8h", test=test, device=device, batch_size=batch_size, extra_args=extra_args)
-
-    def train(self):
-        return NotImplementedError("7b LLAMA model will OOM on CI GPU machines")
diff --git a/torchbenchmark/models/llama_v2_7b_8h/install.py b/torchbenchmark/models/llama_v2_7b_8h/install.py
deleted file mode 100644
index e22f9518b2..0000000000
--- a/torchbenchmark/models/llama_v2_7b_8h/install.py
+++ /dev/null
@@ -1,9 +0,0 @@
-
-import subprocess
-import sys
-import os
-from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model
-
-if __name__ == '__main__':
-    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
-    cache_model(model_name)
diff --git a/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml b/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml
deleted file mode 100644
index 81a62e29bb..0000000000
--- a/torchbenchmark/models/llama_v2_7b_8h/metadata.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-devices:
-  NVIDIA A100-SXM4-40GB:
-    eval_batch_size: 1
-eval_benchmark: false
-eval_deterministic: false
-eval_nograd: true
-not_implemented:
-- device: cpu
-- device: cuda
-  test: train
-train_benchmark: false
-train_deterministic: false
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
index 8052eed322..3327ad72db 100644
--- a/torchbenchmark/util/framework/huggingface/model_factory.py
+++ b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -30,8 +30,6 @@
     'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'),
     # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real
     'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'),
-    # an even smaller model since 16h OOMs for our optimizer benchmarks
-    'llama_v2_7b_8h' : (512,512, 'LlamaConfig(num_hidden_layers=8)', 'AutoModelForCausalLM'),
 }
 
 cpu_input_slice = {
diff --git a/userbenchmark/optim/run.py b/userbenchmark/optim/run.py
index 3dd982ff82..cad321e3ef 100644
--- a/userbenchmark/optim/run.py
+++ b/userbenchmark/optim/run.py
@@ -249,6 +249,10 @@ def get_unstable_models() -> Set[str]:
     # Skip models deemed unstable by torch-nightly
     {'model': m} for m in unstable_models
 ] + [
+    # 16h currently OOMs, but once it supports train, we should remove this line
+    # See tracker https://github.com/pytorch/benchmark/issues/1793
+    {'model': 'llama_v2_7b_16h'}
+] +[
     # SparseAdam does not support dense gradients
     {'optim': 'SparseAdam', 'model': m} for m in DENSE_MODELS
 ] + [