Arize-ai · anticorrelator · Jan 28, 2025 · Jan 29, 2025 · mikeldking · Jan 29, 2025
diff --git a/packages/phoenix-evals/src/phoenix/evals/templates.py b/packages/phoenix-evals/src/phoenix/evals/templates.py
@@ -146,11 +146,17 @@ def __init__(
         self.rails = rails
         self.template = self._normalize_template(template)
         self.explanation_template: Optional[List[PromptPartTemplate]]
+
         if explanation_template:
             self.explanation_template = self._normalize_template(explanation_template)
+            self.explanation_parser = parse_label_from_chain_of_thought_response
         else:
-            self.explanation_template = None
-        self.explanation_label_parser = explanation_label_parser
+            self.explanation_template = patched_explanation_template(self.template)
+            self.explanation_parser = parse_label_from_patched_explanation_response
+
+        if explanation_label_parser:
+            self.explanation_label_parser = explanation_label_parser
+
         self._start_delim, self._end_delim = delimiters
         self.variables: List[str] = []
         for _template in [self.template, self.explanation_template]:
@@ -174,9 +180,8 @@ def prompt(self, options: Optional[PromptOptions] = None) -> List[PromptPartTemp
             return self.template
 
     def extract_label_from_explanation(self, raw_string: str) -> str:
-        if parser := self.explanation_label_parser:
-            return parser(raw_string)
-        return parse_label_from_chain_of_thought_response(raw_string)
+        parser = self.explanation_parser
+        return parser(raw_string)
 
     def score(self, rail: str) -> float:
         if self._scores is None:
@@ -195,6 +200,14 @@ def parse_label_from_chain_of_thought_response(raw_string: str) -> str:
     return NOT_PARSABLE
 
 
+def parse_label_from_patched_explanation_response(raw_string: str) -> str:
+    explanation_delimiter = r"\W*EXPLANATION\W*"
+    parts = re.split(explanation_delimiter, raw_string, maxsplit=1, flags=re.IGNORECASE)
+    if parts:
+        return parts[0]
+    return NOT_PARSABLE
+
+
 def normalize_classification_template(
     rails: List[str], template: Union[PromptTemplate, ClassificationTemplate, str]
 ) -> ClassificationTemplate:
@@ -240,6 +253,27 @@ def normalize_prompt_template(template: Union[PromptTemplate, str]) -> PromptTem
     )
 
 
+def patched_explanation_template(
+    template_parts: List[PromptPartTemplate],
+) -> List[PromptPartTemplate]:
+    """
+    Attempts to patch a template to additionally include an explanation part.
+    """
+    patched_explanation_template_text = (
+        "*****\n\n"
+        "After following the previous instructions, add a paragraph that starts with "
+        "`EXPLANATION: ` and then provide a concise explanation of your reasoning."
+    )
+
+    return [
+        *template_parts,
+        PromptPartTemplate(
+            content_type=PromptPartContentType.TEXT,
+            template=patched_explanation_template_text,
+        ),
+    ]
+
+
 def map_template(
     dataframe: pd.DataFrame,
     template: PromptTemplate,

diff --git a/packages/phoenix-evals/tests/phoenix/evals/templates/test_template.py b/packages/phoenix-evals/tests/phoenix/evals/templates/test_template.py
@@ -33,10 +33,14 @@ def test_classification_template_can_beinstantiated_with_no_explanation_template
     template = ClassificationTemplate(
         rails=["relevant", "irrelevant"], template="is this irrelevant?"
     )
-    assert template.explanation_template is None
+    assert template.explanation_template is not None
+    assert len(template.explanation_template) == 2
 
     explanation_options = PromptOptions(provide_explanation=True)
     assert template.prompt(options=explanation_options)[0].template == "is this irrelevant?"
+    assert (
+        "provide a concise explanation" in template.prompt(options=explanation_options)[1].template
+    )
 
 
 def test_template_with_default_delimiters_uses_python_string_formatting():