From 3d1225ecf067bc67efc940296bb9682e066675cf Mon Sep 17 00:00:00 2001
From: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Date: Tue, 4 Oct 2022 15:03:49 -0700
Subject: [PATCH 1/3] Fix special tokens

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
---
 .../language_modeling/megatron_t5_prompt_learning_model.py   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
index 182c44bd4642..78f6d3206eae 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
@@ -424,10 +424,11 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
                 idx = pred.index(self.tokenizer.eos_id)
                 pred = pred[:idx]
 
+            special_token_ids = self.tokenizer.special_token_to_id.values() if hasattr(self.tokenizer, 'special_token_to_id') else self.tokenizer.tokenizer.additional_special_tokens_ids
             pred = [
                 id
                 for id in pred
-                if id not in self.tokenizer.tokenizer.additional_special_tokens_ids
+                if id not in special_token_ids
                 and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
             ]  # delete the sentinel token at the beginning of prediction
 
@@ -445,7 +446,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
                 label = [
                     id
                     for id in label
-                    if id not in self.tokenizer.tokenizer.additional_special_tokens_ids
+                    if id not in special_token_ids
                     and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
                 ]  # delete the sentinel token at the beginning of label
 

From ff064266c0dd70c366131ea9387299a09a2d9f97 Mon Sep 17 00:00:00 2001
From: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Date: Tue, 4 Oct 2022 15:04:25 -0700
Subject: [PATCH 2/3] Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
---
 .../megatron_t5_prompt_learning_model.py             | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
index 78f6d3206eae..2e7b52a19339 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
@@ -424,12 +424,15 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
                 idx = pred.index(self.tokenizer.eos_id)
                 pred = pred[:idx]
 
-            special_token_ids = self.tokenizer.special_token_to_id.values() if hasattr(self.tokenizer, 'special_token_to_id') else self.tokenizer.tokenizer.additional_special_tokens_ids
+            special_token_ids = (
+                self.tokenizer.special_token_to_id.values()
+                if hasattr(self.tokenizer, 'special_token_to_id')
+                else self.tokenizer.tokenizer.additional_special_tokens_ids
+            )
             pred = [
                 id
                 for id in pred
-                if id not in special_token_ids
-                and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
+                if id not in special_token_ids and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
             ]  # delete the sentinel token at the beginning of prediction
 
             pred = self.tokenizer.ids_to_text(pred)
@@ -446,8 +449,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
                 label = [
                     id
                     for id in label
-                    if id not in special_token_ids
-                    and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
+                    if id not in special_token_ids and id not in self.tokenizer.text_to_ids(T5Sentinel.FIRST.value)
                 ]  # delete the sentinel token at the beginning of label
 
                 label = self.tokenizer.ids_to_text(label)

From b6b1ef43735fb26d66e6da5a7a063fbe7e4a43e9 Mon Sep 17 00:00:00 2001
From: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Date: Wed, 5 Oct 2022 10:53:12 -0700
Subject: [PATCH 3/3] Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>