From 2d85c7dce5ca80a7ab5be69e85263129998ae3d1 Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Thu, 16 Jan 2025 13:47:47 +0800
Subject: [PATCH 1/6] Initial commit

---
 app/features/text_rewriter/__init__.py        |   0
 app/features/text_rewriter/core.py            |  32 +++
 app/features/text_rewriter/metadata.json      |  19 ++
 .../prompt/text-rewriter-prompt.txt           |  10 +
 app/features/text_rewriter/tests/__init__.py  |   0
 app/features/text_rewriter/tests/test_core.py | 222 ++++++++++++++++++
 app/features/text_rewriter/tools.py           |  84 +++++++
 7 files changed, 367 insertions(+)
 create mode 100644 app/features/text_rewriter/__init__.py
 create mode 100644 app/features/text_rewriter/core.py
 create mode 100644 app/features/text_rewriter/metadata.json
 create mode 100644 app/features/text_rewriter/prompt/text-rewriter-prompt.txt
 create mode 100644 app/features/text_rewriter/tests/__init__.py
 create mode 100644 app/features/text_rewriter/tests/test_core.py
 create mode 100644 app/features/text_rewriter/tools.py

diff --git a/app/features/text_rewriter/__init__.py b/app/features/text_rewriter/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py
new file mode 100644
index 00000000..ad14039f
--- /dev/null
+++ b/app/features/text_rewriter/core.py
@@ -0,0 +1,32 @@
+from app.services.logger import setup_logger
+from app.utils.document_loaders import get_docs
+from app.features.text_rewriter.tools import TextRewriter
+from app.api.error_utilities import ToolExecutorError
+
+logger = setup_logger()
+
+def executor(
+             instructions: str,
+             file_url: str,
+             file_type: str, 
+             verbose=False):
+    try:
+        if verbose:
+            logger.info(f"File URL loaded: {file_url}")
+        
+        if file_type and file_url:
+            docs = get_docs(file_url, file_type, verbose=True)
+        else:
+            docs = None
+            raise ToolExecutorError("File URL and file type must be provided")
+        
+        # TODO: IMPLEMENT CLASS HERE
+        output = TextRewriter(instructions, verbose=verbose).rewrite(docs)
+
+    except Exception as e:
+        error_message = f"Error in executor: {e}"
+        logger.error(error_message)
+        raise ValueError(error_message)
+    
+    return output
+
diff --git a/app/features/text_rewriter/metadata.json b/app/features/text_rewriter/metadata.json
new file mode 100644
index 00000000..56c6fbde
--- /dev/null
+++ b/app/features/text_rewriter/metadata.json
@@ -0,0 +1,19 @@
+{
+    "inputs": [
+        {
+            "label": "Instructions",
+            "name": "instructions",
+            "type": "text"
+        },
+        {
+            "label": "File URL",
+            "name": "file_url",
+            "type": "text"
+        },
+        {
+            "label": "File Type",
+            "name": "file_type",
+            "type": "text"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/app/features/text_rewriter/prompt/text-rewriter-prompt.txt b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
new file mode 100644
index 00000000..48266ce7
--- /dev/null
+++ b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
@@ -0,0 +1,10 @@
+You are a helpful AI assistant thatt helps users rewrite text from documents.
+
+You are given instructions on how to rewrite a given text from the context provided:
+{instructions}
+
+You must respond as a JSON object:
+{format_instructions}
+
+Context: 
+{context}
\ No newline at end of file
diff --git a/app/features/text_rewriter/tests/__init__.py b/app/features/text_rewriter/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/features/text_rewriter/tests/test_core.py b/app/features/text_rewriter/tests/test_core.py
new file mode 100644
index 00000000..17f2fa3c
--- /dev/null
+++ b/app/features/text_rewriter/tests/test_core.py
@@ -0,0 +1,222 @@
+import pytest
+
+from app.tools.ai_resistant_assignment_generator.core import executor
+
+# Base attributes reused across all tests
+base_attributes = {
+    "assignment": "Math Homework",
+    "grade_level": "university",
+    "lang": "en"
+}
+
+# PDF Tests
+def test_executor_pdf_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/pdf/sample1.pdf",
+        file_type="pdf"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_pdf_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/pdf/sample1.pdf",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# CSV Tests
+def test_executor_csv_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/csv/sample1.csv",
+        file_type="csv"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_csv_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/csv/sample1.csv",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# TXT Tests
+def test_executor_txt_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/txt/sample1.txt",
+        file_type="txt"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_txt_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/txt/sample1.txt",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# MD Tests
+def test_executor_md_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md",
+        file_type="md"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_md_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# PPTX Tests
+def test_executor_pptx_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx",
+        file_type="pptx"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_pptx_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# DOCX Tests
+def test_executor_docx_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/docx/sample1.docx",
+        file_type="docx"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_docx_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/docx/sample1.docx",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# XLS Tests
+def test_executor_xls_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/xls/sample1.xls",
+        file_type="xls"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_xls_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/xls/sample1.xls",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# XLSX Tests
+def test_executor_xlsx_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx",
+        file_type="xlsx"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_xlsx_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# XML Tests
+def test_executor_xml_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://filesampleshub.com/download/code/xml/dummy.xml",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# GDocs Tests
+def test_executor_gdocs_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://docs.google.com/document/d/1OWQfO9LX6psGipJu9LabzNE22us1Ct/edit",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# GSheets Tests
+def test_executor_gsheets_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# GSlides Tests
+def test_executor_gslides_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# GPDFs Tests
+def test_executor_gpdfs_url_valid():
+    ai_resistant_assignment = executor(
+        **base_attributes,
+        file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view",
+        file_type="gpdf"
+    )
+    assert isinstance(ai_resistant_assignment, dict)
+
+def test_executor_gpdfs_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
+
+# MP3 Tests
+def test_executor_mp3_url_invalid():
+    with pytest.raises(ValueError) as exc_info:
+        executor(
+            **base_attributes,
+            file_url="https://raw.githubusercontent.com/asleem/uploaded_files/main/dummy.mp3",
+            file_type=1
+        )
+    assert isinstance(exc_info.value, ValueError)
diff --git a/app/features/text_rewriter/tools.py b/app/features/text_rewriter/tools.py
new file mode 100644
index 00000000..10015cac
--- /dev/null
+++ b/app/features/text_rewriter/tools.py
@@ -0,0 +1,84 @@
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional
+import os
+from langchain_core.documents import Document
+from langchain_chroma import Chroma
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_google_genai import GoogleGenerativeAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+
+from app.services.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+def read_text_file(file_path):
+    # Get the directory containing the script file
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+
+    # Combine the script directory with the relative file path
+    absolute_file_path = os.path.join(script_dir, file_path)
+    
+    with open(absolute_file_path, 'r') as file:
+        return file.read()
+    
+class TextRewriter:
+    def __init__(self, instructions, vectorstore_class=Chroma, prompt=None, embedding_model=None, model=None, parser=None, verbose=False):
+        default_config = {
+            "model": GoogleGenerativeAI(model="gemini-1.5-flash"),
+            "embedding_model": GoogleGenerativeAIEmbeddings(model='models/embedding-001'),
+            "parser": JsonOutputParser(pydantic_object=RewrittenOutput),
+            "prompt": read_text_file("prompt/ai-resistant-prompt.txt"),
+            "prompt_without_context": read_text_file("prompt/ai-resistant-without-context-prompt.txt"),
+            "vectorstore_class": Chroma
+        }
+
+        self.prompt = prompt or default_config["prompt"]
+        self.prompt_without_context = default_config["prompt_without_context"]
+        self.model = model or default_config["model"]
+        self.parser = parser or default_config["parser"]
+        self.embedding_model = embedding_model or default_config["embedding_model"]
+
+        self.vectorstore_class = vectorstore_class or default_config["vectorstore_class"]
+        self.vectorstore, self.retriever, self.runner = None, None, None
+        self.instructions = instructions
+        self.verbose = verbose
+
+        if vectorstore_class is None: raise ValueError("Vectorstore must be provided")
+    
+    def compile_with_docs(self, documents: List[Document]):
+        # Return the chain
+        prompt = PromptTemplate(
+            template=self.prompt,
+            input_variables=["instructions"],
+            partial_variables={"format_instructions": self.parser.get_format_instructions()}
+        )
+
+        if self.runner is None:
+            logger.info(f"Creating vectorstore from {len(documents)} documents") if self.verbose else None
+            self.vectorstore = self.vectorstore_class.from_documents(documents, self.embedding_model)
+            logger.info(f"Vectorstore created") if self.verbose else None
+
+            self.retriever = self.vectorstore.as_retriever()
+            logger.info(f"Retriever created successfully") if self.verbose else None
+
+            self.runner = RunnableParallel(
+                {"context": self.retriever,
+                "instructions": RunnablePassthrough()
+                }
+            )
+
+        chain = self.runner | prompt | self.model | self.parser
+
+        logger.info(f"Chain compilation complete")
+
+        return chain
+
+    def rewrite(self, documents: List[Document]):
+        chain = self.compile_with_docs(documents)
+        output = chain.run()
+        return output
+    
+class RewrittenOutput(BaseModel):
+    rewritten_text: str = Field(description="The rewritten text")
\ No newline at end of file

From 86be7391d3fe3591665598a9d176fb5c7286e32c Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Thu, 16 Jan 2025 23:03:43 +0800
Subject: [PATCH 2/6] chore: essential functions added in tools.py

---
 app/features/text_rewriter/core.py            |  3 +-
 .../prompt/text-rewriter-prompt.txt           |  2 +-
 app/features/text_rewriter/tools.py           | 68 +++++++++----------
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py
index ad14039f..80c351a3 100644
--- a/app/features/text_rewriter/core.py
+++ b/app/features/text_rewriter/core.py
@@ -10,17 +10,18 @@ def executor(
              file_url: str,
              file_type: str, 
              verbose=False):
+    
     try:
         if verbose:
             logger.info(f"File URL loaded: {file_url}")
         
         if file_type and file_url:
+            logger.info(f"Generating docs. from {file_url} with type {file_type}")
             docs = get_docs(file_url, file_type, verbose=True)
         else:
             docs = None
             raise ToolExecutorError("File URL and file type must be provided")
         
-        # TODO: IMPLEMENT CLASS HERE
         output = TextRewriter(instructions, verbose=verbose).rewrite(docs)
 
     except Exception as e:
diff --git a/app/features/text_rewriter/prompt/text-rewriter-prompt.txt b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
index 48266ce7..a686e3d7 100644
--- a/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
+++ b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
@@ -3,7 +3,7 @@ You are a helpful AI assistant thatt helps users rewrite text from documents.
 You are given instructions on how to rewrite a given text from the context provided:
 {instructions}
 
-You must respond as a JSON object:
+You must respond as a JSON object with the below format:
 {format_instructions}
 
 Context: 
diff --git a/app/features/text_rewriter/tools.py b/app/features/text_rewriter/tools.py
index 10015cac..25f5c2b0 100644
--- a/app/features/text_rewriter/tools.py
+++ b/app/features/text_rewriter/tools.py
@@ -1,13 +1,10 @@
 from pydantic import BaseModel, Field
-from typing import List, Dict, Optional
+from typing import List, Dict
 import os
 from langchain_core.documents import Document
-from langchain_chroma import Chroma
 from langchain_core.prompts import PromptTemplate
-from langchain_core.runnables import RunnablePassthrough, RunnableParallel
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_google_genai import GoogleGenerativeAI
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
 from app.services.logger import setup_logger
 
@@ -24,61 +21,62 @@ def read_text_file(file_path):
         return file.read()
     
 class TextRewriter:
-    def __init__(self, instructions, vectorstore_class=Chroma, prompt=None, embedding_model=None, model=None, parser=None, verbose=False):
+    def __init__(self, instructions, prompt=None, model=None, parser=None, verbose=False):
         default_config = {
             "model": GoogleGenerativeAI(model="gemini-1.5-flash"),
-            "embedding_model": GoogleGenerativeAIEmbeddings(model='models/embedding-001'),
             "parser": JsonOutputParser(pydantic_object=RewrittenOutput),
-            "prompt": read_text_file("prompt/ai-resistant-prompt.txt"),
-            "prompt_without_context": read_text_file("prompt/ai-resistant-without-context-prompt.txt"),
-            "vectorstore_class": Chroma
+            "prompt": read_text_file("prompt/text-rewriter-prompt.txt"),
         }
 
         self.prompt = prompt or default_config["prompt"]
-        self.prompt_without_context = default_config["prompt_without_context"]
         self.model = model or default_config["model"]
         self.parser = parser or default_config["parser"]
-        self.embedding_model = embedding_model or default_config["embedding_model"]
 
-        self.vectorstore_class = vectorstore_class or default_config["vectorstore_class"]
-        self.vectorstore, self.retriever, self.runner = None, None, None
         self.instructions = instructions
         self.verbose = verbose
-
-        if vectorstore_class is None: raise ValueError("Vectorstore must be provided")
     
-    def compile_with_docs(self, documents: List[Document]):
+    def compile(self):
         # Return the chain
         prompt = PromptTemplate(
             template=self.prompt,
-            input_variables=["instructions"],
+            input_variables=["instructions", "context"],
             partial_variables={"format_instructions": self.parser.get_format_instructions()}
         )
 
-        if self.runner is None:
-            logger.info(f"Creating vectorstore from {len(documents)} documents") if self.verbose else None
-            self.vectorstore = self.vectorstore_class.from_documents(documents, self.embedding_model)
-            logger.info(f"Vectorstore created") if self.verbose else None
+        chain = prompt | self.model | self.parser
 
-            self.retriever = self.vectorstore.as_retriever()
-            logger.info(f"Retriever created successfully") if self.verbose else None
+        return chain
+    
+    def validate_output(self, response: Dict) -> bool:
+        # TODO: implement a response validator here
+        # might need to add more checks
+        if 'rewritten_text' in response:
+            return True
+        return False
 
-            self.runner = RunnableParallel(
-                {"context": self.retriever,
-                "instructions": RunnablePassthrough()
-                }
-            )
+    def rewrite(self, documents: List[Document]):
+        chain = self.compile()
+        doc_content = "\n".join([doc.page_content for doc in documents])
 
-        chain = self.runner | prompt | self.model | self.parser
+        attempts = 0
+        max_attempts = 5
 
-        logger.info(f"Chain compilation complete")
+        while attempts < max_attempts:
+            response = chain.invoke(
+                instructions=self.instructions,
+                context=doc_content
+            )
 
-        return chain
+            if self.verbose:
+                logger.info(f"Generated response attempt {attempts + 1}: {response}")
 
-    def rewrite(self, documents: List[Document]):
-        chain = self.compile_with_docs(documents)
-        output = chain.run()
-        return output
+            # validate response
+            if self.validate_output(response):
+                break
+    
+            # if response is invalid, retry
+            attempts += 1
+        return response
     
 class RewrittenOutput(BaseModel):
     rewritten_text: str = Field(description="The rewritten text")
\ No newline at end of file

From ae2515da36e93ab6c42c0cf41c0446c07d07f773 Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Sat, 18 Jan 2025 17:31:59 +0800
Subject: [PATCH 3/6] feat: rewriter functionality complete

---
 app/features/text_rewriter/core.py            | 13 ++++----
 app/features/text_rewriter/metadata.json      |  5 ++++
 .../prompt/text-rewriter-prompt.txt           |  4 +--
 app/features/text_rewriter/tools.py           | 30 +++++++++++--------
 app/tools/utils/tools_config.json             |  4 +++
 5 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py
index 80c351a3..d4f11c8f 100644
--- a/app/features/text_rewriter/core.py
+++ b/app/features/text_rewriter/core.py
@@ -5,24 +5,27 @@
 
 logger = setup_logger()
 
+ALLOWED_FILE_TYPES = {"pptx", "pdf", "docx", "txt", "csv", "youtube_url", "url", "gsheet"}
+
 def executor(
+             raw_text: str,
              instructions: str,
              file_url: str,
              file_type: str, 
              verbose=False):
     
     try:
-        if verbose:
-            logger.info(f"File URL loaded: {file_url}")
+        if verbose: logger.info(f"File URL loaded: {file_url}")
         
-        if file_type and file_url:
+        if file_type and file_url and file_type in ALLOWED_FILE_TYPES:
             logger.info(f"Generating docs. from {file_url} with type {file_type}")
             docs = get_docs(file_url, file_type, verbose=True)
-        else:
+        elif raw_text:
             docs = None
+        else:
             raise ToolExecutorError("File URL and file type must be provided")
         
-        output = TextRewriter(instructions, verbose=verbose).rewrite(docs)
+        output = TextRewriter(instructions, verbose=verbose).rewrite(raw_text, docs)
 
     except Exception as e:
         error_message = f"Error in executor: {e}"
diff --git a/app/features/text_rewriter/metadata.json b/app/features/text_rewriter/metadata.json
index 56c6fbde..90d0be49 100644
--- a/app/features/text_rewriter/metadata.json
+++ b/app/features/text_rewriter/metadata.json
@@ -5,6 +5,11 @@
             "name": "instructions",
             "type": "text"
         },
+        {
+            "label": "Text to Rewrite",
+            "name": "raw_text",
+            "type": "text"
+        },
         {
             "label": "File URL",
             "name": "file_url",
diff --git a/app/features/text_rewriter/prompt/text-rewriter-prompt.txt b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
index a686e3d7..70e85eb5 100644
--- a/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
+++ b/app/features/text_rewriter/prompt/text-rewriter-prompt.txt
@@ -1,10 +1,10 @@
 You are a helpful AI assistant thatt helps users rewrite text from documents.
 
-You are given instructions on how to rewrite a given text from the context provided:
+You must follow the instructions given below on how the text should be rewritten:
 {instructions}
 
 You must respond as a JSON object with the below format:
 {format_instructions}
 
-Context: 
+Text to rewrite: 
 {context}
\ No newline at end of file
diff --git a/app/features/text_rewriter/tools.py b/app/features/text_rewriter/tools.py
index 25f5c2b0..e66b3605 100644
--- a/app/features/text_rewriter/tools.py
+++ b/app/features/text_rewriter/tools.py
@@ -36,7 +36,6 @@ def __init__(self, instructions, prompt=None, model=None, parser=None, verbose=F
         self.verbose = verbose
     
     def compile(self):
-        # Return the chain
         prompt = PromptTemplate(
             template=self.prompt,
             input_variables=["instructions", "context"],
@@ -45,37 +44,44 @@ def compile(self):
 
         chain = prompt | self.model | self.parser
 
+        if self.verbose: logger.info(f"Chain compiled: {chain}")
+
         return chain
     
     def validate_output(self, response: Dict) -> bool:
-        # TODO: implement a response validator here
-        # might need to add more checks
         if 'rewritten_text' in response:
             return True
         return False
 
-    def rewrite(self, documents: List[Document]):
+    def rewrite(self, raw_text: str, documents: List[Document]):
         chain = self.compile()
-        doc_content = "\n".join([doc.page_content for doc in documents])
-
+        if documents:
+            doc_content = "\n".join([doc.page_content for doc in documents])
+        else:
+            doc_content = raw_text
+            
         attempts = 0
         max_attempts = 5
 
         while attempts < max_attempts:
-            response = chain.invoke(
-                instructions=self.instructions,
-                context=doc_content
-            )
+            response = chain.invoke({
+                "instructions": self.instructions,
+                "context": doc_content
+            })
 
             if self.verbose:
                 logger.info(f"Generated response attempt {attempts + 1}: {response}")
 
-            # validate response
+            # validate response incase of LLM hallucinations
             if self.validate_output(response):
                 break
-    
+            
+            if self.verbose: logger.warning(f"Invalid response generated, retrying...")
             # if response is invalid, retry
             attempts += 1
+        
+        if self.verbose: logger.info(f"Final response generated: {response}")
+        
         return response
     
 class RewrittenOutput(BaseModel):
diff --git a/app/tools/utils/tools_config.json b/app/tools/utils/tools_config.json
index 08c2e241..525b8c2e 100644
--- a/app/tools/utils/tools_config.json
+++ b/app/tools/utils/tools_config.json
@@ -38,5 +38,9 @@
     "writing-feedback-generator": {
         "path": "tools.writing_feedback_generator.core",
         "metadata_file": "metadata.json"
+    },
+    "text-rewriter": {
+        "path": "features.text_rewriter.core",
+        "metadata_file": "metadata.json"
     }
 }

From 03fe2f97a232140ff136ac6c34c0f48010c2345c Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Sat, 18 Jan 2025 22:02:16 +0800
Subject: [PATCH 4/6] feat: Added unit test cases + bug fixes

---
 app/features/text_rewriter/core.py            |   2 +
 app/features/text_rewriter/tests/test_core.py | 165 ++++++++----------
 2 files changed, 70 insertions(+), 97 deletions(-)

diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py
index d4f11c8f..80f7ee04 100644
--- a/app/features/text_rewriter/core.py
+++ b/app/features/text_rewriter/core.py
@@ -23,6 +23,8 @@ def executor(
         elif raw_text:
             docs = None
         else:
+            if file_type not in ALLOWED_FILE_TYPES:
+                raise ToolExecutorError(f"File type {file_type} not supported")
             raise ToolExecutorError("File URL and file type must be provided")
         
         output = TextRewriter(instructions, verbose=verbose).rewrite(raw_text, docs)
diff --git a/app/features/text_rewriter/tests/test_core.py b/app/features/text_rewriter/tests/test_core.py
index 17f2fa3c..9f189eaa 100644
--- a/app/features/text_rewriter/tests/test_core.py
+++ b/app/features/text_rewriter/tests/test_core.py
@@ -1,22 +1,26 @@
 import pytest
 
-from app.tools.ai_resistant_assignment_generator.core import executor
+from app.features.text_rewriter.core import executor
+from app.api.error_utilities import InputValidationError
 
 # Base attributes reused across all tests
 base_attributes = {
-    "assignment": "Math Homework",
-    "grade_level": "university",
-    "lang": "en"
+    "instructions": "Rewrite the text in a more formal tone.",
+    "raw_text": "",
+}
+
+base_attributes_without_raw_text = {
+    "instructions": "Rewrite the text in a more formal tone.",
 }
 
 # PDF Tests
 def test_executor_pdf_url_valid():
-    ai_resistant_assignment = executor(
+    rewritten_text = executor(
         **base_attributes,
         file_url="https://filesamples.com/samples/document/pdf/sample1.pdf",
         file_type="pdf"
     )
-    assert isinstance(ai_resistant_assignment, dict)
+    assert isinstance(rewritten_text, dict)
 
 def test_executor_pdf_url_invalid():
     with pytest.raises(ValueError) as exc_info:
@@ -29,12 +33,12 @@ def test_executor_pdf_url_invalid():
 
 # CSV Tests
 def test_executor_csv_url_valid():
-    ai_resistant_assignment = executor(
+    rewritten_text = executor(
         **base_attributes,
         file_url="https://filesamples.com/samples/document/csv/sample1.csv",
         file_type="csv"
     )
-    assert isinstance(ai_resistant_assignment, dict)
+    assert isinstance(rewritten_text, dict)
 
 def test_executor_csv_url_invalid():
     with pytest.raises(ValueError) as exc_info:
@@ -47,12 +51,12 @@ def test_executor_csv_url_invalid():
 
 # TXT Tests
 def test_executor_txt_url_valid():
-    ai_resistant_assignment = executor(
+    rewritten_text = executor(
         **base_attributes,
         file_url="https://filesamples.com/samples/document/txt/sample1.txt",
         file_type="txt"
     )
-    assert isinstance(ai_resistant_assignment, dict)
+    assert isinstance(rewritten_text, dict)
 
 def test_executor_txt_url_invalid():
     with pytest.raises(ValueError) as exc_info:
@@ -63,32 +67,14 @@ def test_executor_txt_url_invalid():
         )
     assert isinstance(exc_info.value, ValueError)
 
-# MD Tests
-def test_executor_md_url_valid():
-    ai_resistant_assignment = executor(
-        **base_attributes,
-        file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md",
-        file_type="md"
-    )
-    assert isinstance(ai_resistant_assignment, dict)
-
-def test_executor_md_url_invalid():
-    with pytest.raises(ValueError) as exc_info:
-        executor(
-            **base_attributes,
-            file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md",
-            file_type=1
-        )
-    assert isinstance(exc_info.value, ValueError)
-
 # PPTX Tests
 def test_executor_pptx_url_valid():
-    ai_resistant_assignment = executor(
+    rewritten_text = executor(
         **base_attributes,
         file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx",
         file_type="pptx"
     )
-    assert isinstance(ai_resistant_assignment, dict)
+    assert isinstance(rewritten_text, dict)
 
 def test_executor_pptx_url_invalid():
     with pytest.raises(ValueError) as exc_info:
@@ -101,12 +87,12 @@ def test_executor_pptx_url_invalid():
 
 # DOCX Tests
 def test_executor_docx_url_valid():
-    ai_resistant_assignment = executor(
+    rewritten_text = executor(
         **base_attributes,
         file_url="https://filesamples.com/samples/document/docx/sample1.docx",
         file_type="docx"
     )
-    assert isinstance(ai_resistant_assignment, dict)
+    assert isinstance(rewritten_text, dict)
 
 def test_executor_docx_url_invalid():
     with pytest.raises(ValueError) as exc_info:
@@ -117,106 +103,91 @@ def test_executor_docx_url_invalid():
         )
     assert isinstance(exc_info.value, ValueError)
 
-# XLS Tests
-def test_executor_xls_url_valid():
-    ai_resistant_assignment = executor(
-        **base_attributes,
-        file_url="https://filesamples.com/samples/document/xls/sample1.xls",
-        file_type="xls"
-    )
-    assert isinstance(ai_resistant_assignment, dict)
-
-def test_executor_xls_url_invalid():
-    with pytest.raises(ValueError) as exc_info:
-        executor(
-            **base_attributes,
-            file_url="https://filesamples.com/samples/document/xls/sample1.xls",
-            file_type=1
-        )
-    assert isinstance(exc_info.value, ValueError)
-
-# XLSX Tests
-def test_executor_xlsx_url_valid():
-    ai_resistant_assignment = executor(
-        **base_attributes,
-        file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx",
-        file_type="xlsx"
-    )
-    assert isinstance(ai_resistant_assignment, dict)
-
-def test_executor_xlsx_url_invalid():
+# Invalid file type test
+def test_executor_invalid_file_type():
     with pytest.raises(ValueError) as exc_info:
         executor(
             **base_attributes,
             file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx",
-            file_type=1
+            file_type="xlsx"
         )
     assert isinstance(exc_info.value, ValueError)
 
-# XML Tests
-def test_executor_xml_url_invalid():
-    with pytest.raises(ValueError) as exc_info:
-        executor(
+# GSheets Tests
+def test_executor_gsheets_url_valid():
+    rewritten_text = executor(
             **base_attributes,
-            file_url="https://filesampleshub.com/download/code/xml/dummy.xml",
-            file_type=1
+            file_url="https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit?gid=0#gid=0",
+            file_type="gsheet"
         )
-    assert isinstance(exc_info.value, ValueError)
+    assert isinstance(rewritten_text, dict)
 
-# GDocs Tests
-def test_executor_gdocs_url_invalid():
+def test_executor_gsheets_url_invalid():
     with pytest.raises(ValueError) as exc_info:
         executor(
             **base_attributes,
-            file_url="https://docs.google.com/document/d/1OWQfO9LX6psGipJu9LabzNE22us1Ct/edit",
+            file_url="https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit?gid=0#gid=0",
             file_type=1
         )
     assert isinstance(exc_info.value, ValueError)
 
-# GSheets Tests
-def test_executor_gsheets_url_invalid():
-    with pytest.raises(ValueError) as exc_info:
-        executor(
+# Youtube URL Tests
+def test_executor_youtube_url_valid():
+    rewritten_text = executor(
             **base_attributes,
-            file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit",
-            file_type=1
+            file_url="https://www.youtube.com/watch?v=HgBpFaATdoA",
+            file_type="youtube_url"
         )
-    assert isinstance(exc_info.value, ValueError)
+    assert isinstance(rewritten_text, dict)
 
-# GSlides Tests
-def test_executor_gslides_url_invalid():
+def test_executor_youtube_url_invalid():
     with pytest.raises(ValueError) as exc_info:
         executor(
             **base_attributes,
-            file_url="https://docs.google.com/spreadsheets/d/16OPtLLSfU/edit",
+            file_url="https://www.youtube.com/watch?v=HgBpFaATdoA",
             file_type=1
         )
     assert isinstance(exc_info.value, ValueError)
 
-# GPDFs Tests
-def test_executor_gpdfs_url_valid():
-    ai_resistant_assignment = executor(
+# PPTX Tests
+def test_executor_pptx_url_valid():
+    rewritten_text = executor(
         **base_attributes,
-        file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view",
-        file_type="gpdf"
+        file_url = "https://getsamplefiles.com/download/pptx/sample-1.pptx",
+        file_type = "pptx",
     )
-    assert isinstance(ai_resistant_assignment, dict)
 
-def test_executor_gpdfs_url_invalid():
+    assert isinstance(rewritten_text, dict)
+
+def test_executor_pptx_url_invalid():
+
     with pytest.raises(ValueError) as exc_info:
         executor(
             **base_attributes,
-            file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view",
-            file_type=1
+            file_url = "https://getsamplefiles.com/download/pptx/sample-1.pptx",
+            file_type = "pptx",
         )
+
     assert isinstance(exc_info.value, ValueError)
 
-# MP3 Tests
-def test_executor_mp3_url_invalid():
-    with pytest.raises(ValueError) as exc_info:
+# Plain text through text box
+def test_executor_plain_text_valid():
+    rewritten_text = executor(
+        **base_attributes_without_raw_text,
+        raw_text="The quick brown fox jumps over the lazy dog.",
+        file_url="",
+        file_type="",
+    )
+
+    assert isinstance(rewritten_text, dict)
+
+def test_executor_plain_text_invalid():
+    with pytest.raises(InputValidationError) as exc_info:
         executor(
-            **base_attributes,
-            file_url="https://raw.githubusercontent.com/asleem/uploaded_files/main/dummy.mp3",
-            file_type=1
+            **base_attributes_without_raw_text,
+            raw_text=1,
+            file_url="",
+            file_type="",
         )
-    assert isinstance(exc_info.value, ValueError)
+
+    assert isinstance(exc_info.value, InputValidationError)
\ No newline at end of file

From 0bad428f2babd8e8814a8beb8b517ec02ea3047b Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Sat, 18 Jan 2025 22:23:58 +0800
Subject: [PATCH 5/6] fix: test case bugs

---
 app/features/text_rewriter/tests/test_core.py | 26 +++----------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/app/features/text_rewriter/tests/test_core.py b/app/features/text_rewriter/tests/test_core.py
index 9f189eaa..edcca330 100644
--- a/app/features/text_rewriter/tests/test_core.py
+++ b/app/features/text_rewriter/tests/test_core.py
@@ -68,14 +68,6 @@ def test_executor_txt_url_invalid():
     assert isinstance(exc_info.value, ValueError)
 
 # PPTX Tests
-def test_executor_pptx_url_valid():
-    rewritten_text = executor(
-        **base_attributes,
-        file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx",
-        file_type="pptx"
-    )
-    assert isinstance(rewritten_text, dict)
-
 def test_executor_pptx_url_invalid():
     with pytest.raises(ValueError) as exc_info:
         executor(
@@ -149,16 +141,6 @@ def test_executor_youtube_url_invalid():
         )
     assert isinstance(exc_info.value, ValueError)
 
-# PPTX Tests
-def test_executor_pptx_url_valid():
-    rewritten_text = executor(
-        **base_attributes,
-        file_url = "https://getsamplefiles.com/download/pptx/sample-1.pptx",
-        file_type = "pptx",
-    )
-
-    assert isinstance(rewritten_text, dict)
-
 def test_executor_pptx_url_invalid():
 
     with pytest.raises(ValueError) as exc_info:
@@ -182,12 +164,12 @@ def test_executor_plain_text_valid():
     assert isinstance(rewritten_text, dict)
 
 def test_executor_plain_text_invalid():
-    with pytest.raises(InputValidationError) as exc_info:
+    with pytest.raises(ValueError) as exc_info:
         executor(
             **base_attributes_without_raw_text,
-            raw_text=1,
+            raw_text="",
             file_url="",
-            file_type="",
+            file_type=1,
         )
 
-    assert isinstance(exc_info.value, InputValidationError)
\ No newline at end of file
+    assert isinstance(exc_info.value, ValueError)
\ No newline at end of file

From f59a1beaa022b155f13419adc2b74eab82d8a7f4 Mon Sep 17 00:00:00 2001
From: Sarang-Nambiar <101126190+Sarang-Nambiar@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:34:52 +0800
Subject: [PATCH 6/6] chore: imported langchain in core.py + reverted
 tools_config.json

---
 app/features/text_rewriter/core.py | 1 +
 app/tools/utils/tools_config.json  | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py
index 80f7ee04..515c470a 100644
--- a/app/features/text_rewriter/core.py
+++ b/app/features/text_rewriter/core.py
@@ -2,6 +2,7 @@
 from app.utils.document_loaders import get_docs
 from app.features.text_rewriter.tools import TextRewriter
 from app.api.error_utilities import ToolExecutorError
+import langchain
 
 logger = setup_logger()
 
diff --git a/app/tools/utils/tools_config.json b/app/tools/utils/tools_config.json
index 525b8c2e..08c2e241 100644
--- a/app/tools/utils/tools_config.json
+++ b/app/tools/utils/tools_config.json
@@ -38,9 +38,5 @@
     "writing-feedback-generator": {
         "path": "tools.writing_feedback_generator.core",
         "metadata_file": "metadata.json"
-    },
-    "text-rewriter": {
-        "path": "features.text_rewriter.core",
-        "metadata_file": "metadata.json"
     }
 }