Add means to not load gpt2 wpe

This will allow finetuning of other position embeddings for gpt2 variations.
gkielian · Sep 17, 2024 · beff902 · beff902
1 parent f0890e1
commit beff902
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/model.py b/model.py
@@ -890,6 +890,10 @@ def from_pretrained(cls, config, model_type):
                     if key == "lm_head.weight":
                         continue
 
+                if not config.use_abs_pos_embeddings:
+                    if key == "transformer.wpe.weight":
+                        continue
+
                 assert sd_hf[key].shape == sd[key].shape
                 with torch.no_grad():
                     print(key)