camel-ai · Wendong-Fan · Dec 2, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 21, 2024
diff --git a/camel/messages/__init__.py b/camel/messages/__init__.py
@@ -23,7 +23,7 @@
     HermesFunctionFormatter,
     ShareGPTMessage,
 )
-from .conversion.models import (
+from .conversion.conversation_models import (
     ShareGPTConversation,
 )
 from .conversion.sharegpt.function_call_formatter import (

diff --git a/camel/messages/conversion/__init__.py b/camel/messages/conversion/__init__.py
@@ -12,7 +12,8 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
-from .models import (
+from .alpaca import AlpacaItem
+from .conversation_models import (
     ShareGPTConversation,
     ShareGPTMessage,
     ToolCall,
@@ -24,6 +25,7 @@
     'ShareGPTMessage',
     'ShareGPTConversation',
     'HermesFunctionFormatter',
+    'AlpacaItem',
     'ToolCall',
     'ToolResponse',
 ]
diff --git a/camel/messages/conversion/alpaca.py b/camel/messages/conversion/alpaca.py
@@ -0,0 +1,115 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import re
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class AlpacaItem(BaseModel):
+    r"""Represents an instruction-response item in the Alpaca format.
+
+    Appropripate for both cases where input field is empty, or populated.
+    Provides parsing from string format using the class method from_string().
+
+    Args:
+        instruction (str): The instruction/question/prompt
+        input (str): Input context or examples (put empty string if none)
+        output (str): The response/answer to the instruction
+    """
+
+    instruction: str = Field(description="The instruction/question/prompt")
+    input: str = Field(
+        description="Optional context or input for the task."
+        " For example, when the instruction is \"Summarize the "
+        "following article\", the input is the article."
+    )
+    output: str = Field(description="The response/answer to the instruction")
+
+    @field_validator('instruction', 'output')
+    def no_section_markers(cls, value: str) -> str:
+        r"""Ensures fields don't contain section markers like '### Response:'"""
+        if '### Response' in value or '### Instruction' in value or '### Input' in value:
+            raise ValueError("Field cannot contain section markers")
+        return value.strip()
+
+    @classmethod
+    def from_string(cls, text: str) -> "AlpacaItem":
+        r"""Creates an AlpacaItem from a formatted string.
+
+        Args:
+            text: String in either of these formats:
+                 With input:
+                 ### Instruction:
+                 {instruction}
+                 ### Input:
+                 {input}
+                 ### Response:
+                 {response}
+
+                 Without input:
+                 ### Instruction:
+                 {instruction}
+                 ### Response:
+                 {response}
+
+        Returns:
+            AlpacaItem: Parsed instance
+
+        Raises:
+            ValueError: text doesn't match expected format or sections missing
+        """
+        # Strip and standardize newlines
+        text = text.strip().replace('\r\n', '\n')
+
+        # Try to extract sections using regex
+        instruction_match = re.search(
+            r'###\s*Instruction:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL
+        )
+        input_match = re.search(
+            r'###\s*Input:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL
+        )
+        response_match = re.search(
+            r'###\s*Response:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL
+        )
+
+        if not instruction_match or not response_match:
+            raise ValueError(
+                "Text must contain '### Instruction:'"
+                " and '### Response:' sections"
+            )
+
+        return cls(
+            instruction=instruction_match.group(1).strip(),
+            input=input_match.group(1).strip() if input_match else "",
+            output=response_match.group(1).strip(),
+        )
+
+    def to_string(self) -> str:
+        r"""Converts the AlpacaItem to its string representation.
+
+        Returns:
+            str: Formatted string representation with sections markers
+        """
+        return "\n".join(
+            [
+                "### Instruction:",
+                self.instruction,
+                "",
+                "### Input:",
+                self.input,
+                "",
+                "### Response:",
+                self.output,
+            ]
+        )
diff --git a/camel/messages/conversion/models.py → ...essages/conversion/conversation_models.py b/camel/messages/conversion/models.py → ...essages/conversion/conversation_models.py