refuel-ai · rajasbansal · Jan 29, 2025 · Jan 28, 2025
diff --git a/src/autolabel/tasks/attribute_extraction.py b/src/autolabel/tasks/attribute_extraction.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import pickle
+import re
 from collections import defaultdict
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
@@ -32,14 +33,14 @@


 class AttributeExtractionTask(BaseTask):
    NULL_LABEL = {}
    DEFAULT_TASK_GUIDELINES = "You are an expert at extracting attributes from text. Given a piece of text, extract the required attributes."
    DEFAULT_OUTPUT_GUIDELINES = "You will return the extracted attributes as a json with the following keys:\n{attribute_json}. \n Do not include keys in the final JSON that don't have any valid value extracted."
    LABEL_FORMAT_IN_EXPLANATION = (
        " The explanation should end with - 'so, the answer is <label>.'"
    )
    EXCLUDE_LABEL_IN_EXPLANATION = " Do not repeat the output of the task - simply provide an explanation for the provided output. The provided label was generated by you in a previous step and your job now is to only provided an explanation for the output. Your job is not verify the output but instead explain why it might have been generated, even if it is incorrect. If you think the provided output is incorrect, give an explanation of why it might have been generated anyway but don't say that the output may be incorrect or incorrectly generated.'"
    GENERATE_EXPLANATION_PROMPT = "You are an expert at providing a well reasoned explanation for the output of a given task. \n\nBEGIN TASK DESCRIPTION\n{task_guidelines}\nEND TASK DESCRIPTION\nYou will be given an input example and the output for one of the attributes. Your job is to provide an explanation for why the output for that attribute is correct for the task above.\nYour explanation should be at most two sentences.{label_format}\n{labeled_example}\nCurrent Attribute:{attribute}.\nExplanation: "
    OUTPUT_DICT_KEY = "output_dict"

    def __init__(self, config: AutolabelConfig) -> None:
@@ -54,9 +55,9 @@
        if self.config.confidence():
            self.metrics.append(AUROCMetric())

    def _construct_attribute_json(
        self,
        selected_labels_map: Dict[str, List[str]] = None,
        selected_labels_desc_map: Dict[str, Dict[str, str]] = None,
    ) -> Tuple[str, Dict]:
        """
@@ -365,9 +366,14 @@
             )
             try:
                 json_start, json_end = response.text.find("{"), response.text.rfind("}")
+                json_str = re.sub(
+                    r'"[^"]*"',
+                    lambda m: m.group().replace("\n", "\\n"),
+                    response.text[json_start : json_end + 1],
+                )
                 llm_label = {}
                 for k, v in json5.loads(
-                    response.text[json_start : json_end + 1],
+                    json_str,
                 ).items():
                     if isinstance(v, list) or isinstance(v, dict):
                         llm_label[k] = v