diff --git a/app/features/__init__.py b/app/features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/features/text_rewriter/.gitignore b/app/features/text_rewriter/.gitignore new file mode 100644 index 00000000..bfe061f7 --- /dev/null +++ b/app/features/text_rewriter/.gitignore @@ -0,0 +1,40 @@ +# Environment files +.env +text_rewriter.env + +# System files +.DS_Store +Thumbs.db + +# Python-related files +__pycache__/ +*.py[cod] +*.pyo +*.pyd + + +# Logs +*.log +*.sqlite3 + +# Compiled files +*.pyc +*.pyo +*.pyd + +# Ignore virtual environment +Ai/ +env/ +Lab/ + +# VS Code configuration +.vscode/ + +# Output files +output.txt + +# Ignore additional Ai/ and output.txt files +Ai/ +output.txt +.vscode/ +.env diff --git a/app/features/text_rewriter/Dockerfile b/app/features/text_rewriter/Dockerfile new file mode 100644 index 00000000..734d016a --- /dev/null +++ b/app/features/text_rewriter/Dockerfile @@ -0,0 +1,39 @@ +# Use Python 3.10 slim image +FROM python:3.10-slim + +# Set environment variables for non-interactive installs +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +# Set working directory inside the container +WORKDIR /code/app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + libssl-dev \ + libffi-dev \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements.txt and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt \ + && rm -rf /root/.cache + +# Copy the application source code into the container +COPY . /code/app + +# Copy the .env file into the container +COPY app/features/text_rewriter/text_rewriter.env /code/app/features/text_rewriter/ + +# Set the virtual environment as the default Python environment +ENV PATH="/venv/bin:$PATH" + +# Expose the application port +EXPOSE 8000 + +# Use uvicorn to run the FastAPI app from test_core.py (where the /rewrite-text route is defined) +ENTRYPOINT ["uvicorn", "app.features.text_rewriter.test_core:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/app/features/text_rewriter/__init__.py b/app/features/text_rewriter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/features/text_rewriter/core.py b/app/features/text_rewriter/core.py new file mode 100644 index 00000000..9c3ea7aa --- /dev/null +++ b/app/features/text_rewriter/core.py @@ -0,0 +1,42 @@ +import logging +from app.features.text_rewriter.tools import ( + load_metadata, + create_input_schema, + create_output_schema, + rewrite_tool_handler, + get_few_shot_examples +) +from typing import Dict +from app.services.logger import setup_logger + +logger = setup_logger("executor") + +def execute_text_rewriter(text: str, instructions: str) -> Dict[str, str]: + """ + Executes the text rewriting logic using the few-shot approach and tool handlers. + + Args: + text (str): The input text to rewrite. + instructions (str): The instructions for rewriting. + + Returns: + Dict[str, str]: The rewritten text. + """ + logger.info("Starting text rewriting task.") + try: + # Get few-shot examples + few_shot_examples = get_few_shot_examples() + + # Prepare inputs for the rewrite tool handler + inputs = { + "text": text, + "instructions": instructions, + } + + # Call the rewrite tool handler with the inputs + result = rewrite_tool_handler(inputs, few_shot_examples) + return result + except ValueError as ve: + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") \ No newline at end of file diff --git a/app/features/text_rewriter/local-start.sh b/app/features/text_rewriter/local-start.sh new file mode 100755 index 00000000..81d6aa48 --- /dev/null +++ b/app/features/text_rewriter/local-start.sh @@ -0,0 +1,3 @@ +#!/bin/bash +source env/bin/activate # Activate virtual environment +uvicorn text_rewriter:app --reload # Start the FastAPI server diff --git a/app/features/text_rewriter/metadata.json b/app/features/text_rewriter/metadata.json new file mode 100644 index 00000000..e87dc7c8 --- /dev/null +++ b/app/features/text_rewriter/metadata.json @@ -0,0 +1,23 @@ +{ + "tool_id": "text_rewriter", + "name": "Text Rewriter Tool", + "description": "A tool for rewriting text based on specific instructions.", + "inputs": { + "text": { + "type": "string", + "required": true, + "description": "The text to be rewritten." + }, + "instructions": { + "type": "string", + "required": true, + "description": "The instructions for rewriting the text." + } + }, + "outputs": { + "rewritten_text": { + "type": "string", + "description": "The rewritten text." + } + } +} \ No newline at end of file diff --git a/app/features/text_rewriter/prompt/few_shot_examples.txt b/app/features/text_rewriter/prompt/few_shot_examples.txt new file mode 100644 index 00000000..6f707f39 --- /dev/null +++ b/app/features/text_rewriter/prompt/few_shot_examples.txt @@ -0,0 +1,24 @@ +Example 1: +Text: 'The causes of the American Civil War were complex, including economic differences, the issue of slavery, and the election of Abraham Lincoln.' +Instructions: 'Rewrite the text in simpler terms.' +Rewritten Text: 'The American Civil War happened because of many reasons, such as different economies in the North and South, slavery, and the election of Abraham Lincoln.' + +Example 2: +Text: 'Photosynthesis is the process by which plants use sunlight to synthesize foods from carbon dioxide and water.' +Instructions: 'Summarize the key points of the text.' +Rewritten Text: 'Photosynthesis is how plants make food from sunlight, carbon dioxide, and water.' + +Example 3: +Text: 'In the history of mathematics, there are several key figures such as Euclid, Isaac Newton, and Carl Friedrich Gauss, who contributed to the foundations of geometry and calculus.' +Instructions: 'Make the text shorter and focus on the key points.' +Rewritten Text: 'Important mathematicians like Euclid, Newton, and Gauss helped develop geometry and calculus.' + +Example 4: +Text: 'The mitochondria are often called the powerhouses of the cell because they generate most of the cell's energy.' +Instructions: 'Explain the concept in simple terms.' +Rewritten Text: 'Mitochondria are the parts of cells that produce energy.' + +Example 5: +Text: 'A balanced diet is crucial for maintaining health, and it should include a variety of foods such as vegetables, fruits, proteins, and carbohydrates.' +Instructions: 'Rewrite the text in a more concise manner.' +Rewritten Text: 'A healthy diet includes a mix of vegetables, fruits, proteins, and carbs.' \ No newline at end of file diff --git a/app/features/text_rewriter/requirements.txt b/app/features/text_rewriter/requirements.txt new file mode 100644 index 00000000..29606a6e --- /dev/null +++ b/app/features/text_rewriter/requirements.txt @@ -0,0 +1,38 @@ +fastapi +fastapi[standard] +uvicorn[standard] +langchain +langchain-core +langchain-google-genai +langchain-chroma +langchain-community +langgraph +google-cloud-secret-manager +google-cloud-logging +google-auth +google-cloud-storage +firebase-admin +chroma +pypdf +fpdf +youtube-transcript-api +pytube +unstructured +python-pptx +docx2txt +networkx +pandas +xlrd +openpyxl +gdown +pytest +PyPDF2 +python-dotenv +psutil +pydub +ffmpeg-python +speechrecognition +google-cloud-speech +python-docx +google-cloud-aiplatform +vertexai # No need for vertexai.generative_models, it's already included in vertexai diff --git a/app/features/text_rewriter/router.py b/app/features/text_rewriter/router.py new file mode 100644 index 00000000..9313cbb0 --- /dev/null +++ b/app/features/text_rewriter/router.py @@ -0,0 +1,67 @@ +import os +from fastapi import APIRouter, Depends, HTTPException +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse +from typing import Union +from app.assistants.utils.assistants_utilities import execute_assistant, finalize_inputs_assistants, load_assistant_metadata +from app.services.schemas import GenericAssistantRequest, ToolRequest, ChatRequest, Message, ChatResponse, ToolResponse +from app.utils.auth import key_check +from app.services.logger import setup_logger +from app.api.error_utilities import InputValidationError, ErrorResponse +from app.tools.utils.tool_utilities import load_tool_metadata, execute_tool, finalize_inputs +from fastapi.responses import FileResponse +from starlette.background import BackgroundTask + +logger = setup_logger(__name__) +router = APIRouter() + +@router.get("/") +def read_root(): + return {"Hello": "World"} + +@router.post("/submit-tool", response_model=Union[ToolResponse, ErrorResponse]) +async def submit_tool( data: ToolRequest, _ = Depends(key_check)): + try: + # Unpack GenericRequest for tool data + request_data = data.tool_data + + requested_tool = load_tool_metadata(request_data.tool_id) + + request_inputs_dict = finalize_inputs(request_data.inputs, requested_tool['inputs']) + + result = execute_tool(request_data.tool_id, request_inputs_dict) + + return ToolResponse(data=result) + + except InputValidationError as e: + logger.error(f"InputValidationError: {e}") + + return JSONResponse( + status_code=400, + content=jsonable_encoder(ErrorResponse(status=400, message=e.message)) + ) + + except HTTPException as e: + logger.error(f"HTTPException: {e}") + return JSONResponse( + status_code=e.status_code, + content=jsonable_encoder(ErrorResponse(status=e.status_code, message=e.detail)) + ) + +@router.post("/assistant-chat", response_model=ChatResponse) +async def assistants( request: GenericAssistantRequest, _ = Depends(key_check) ): + + assistant_group = request.assistant_inputs.assistant_group + assistant_name = request.assistant_inputs.assistant_name + + requested_assistant = load_assistant_metadata(assistant_group, assistant_name) + request_inputs_dict = finalize_inputs_assistants(request.assistant_inputs.inputs, requested_assistant['inputs']) + result = execute_assistant(assistant_group, assistant_name, request_inputs_dict) + + formatted_response = Message( + role="ai", + type="text", + payload={"text": result} + ) + + return ChatResponse(data=[formatted_response]) \ No newline at end of file diff --git a/app/features/text_rewriter/test_core.py b/app/features/text_rewriter/test_core.py new file mode 100644 index 00000000..523069d9 --- /dev/null +++ b/app/features/text_rewriter/test_core.py @@ -0,0 +1,61 @@ +import sys +import os +# Dynamically add the project root to the PYTHONPATH +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))) + +import uvicorn +from fastapi import FastAPI, HTTPException +from app.features.text_rewriter.router import router +from app.features.text_rewriter.core import logger +from app.features.text_rewriter.tools import ( + load_metadata, + create_input_schema, + create_output_schema, + rewrite_tool_handler, + get_few_shot_examples +) + +# Load metadata.json +metadata = load_metadata() + +# Dynamically create Pydantic models +InputModel = create_input_schema(metadata) +OutputModel = create_output_schema(metadata) + +# Initialize FastAPI app +app = FastAPI( + title="Text Rewriter API", + description="FastAPI application for text rewriting.", + version="2.0.0" +) + +# Include your router with the rewrite_text endpoint +app.include_router(router) + +@app.post("/rewrite-text", response_model=OutputModel) +async def rewrite_text(data: InputModel): + """ + FastAPI endpoint for rewriting text based on metadata.json. + """ + try: + # Convert Pydantic model to dict and validate inputs + inputs = data.dict() + + # Get the few-shot examples to pass along + few_shot_examples = get_few_shot_examples() + + # Log input and few-shot examples for debugging + logger.info(f"Few-shot examples: {few_shot_examples}") + + # Call the rewrite tool handler with both inputs and few-shot examples + result = rewrite_tool_handler(inputs, few_shot_examples) + + return result + except ValueError as ve: + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + +# Run the app +if __name__ == "__main__": + uvicorn.run("app.features.text_rewriter.test_core:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/app/features/text_rewriter/tools.py b/app/features/text_rewriter/tools.py new file mode 100644 index 00000000..5ab2f725 --- /dev/null +++ b/app/features/text_rewriter/tools.py @@ -0,0 +1,140 @@ +from pydantic import BaseModel, create_model +import os +import json +from langchain_google_genai import GoogleGenerativeAI +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from dotenv import load_dotenv, find_dotenv + +dotenv_path = find_dotenv("text_rewriter.env") # Looks for the .env file by name + +if dotenv_path: + print(f"Found .env file at: {dotenv_path}") + load_dotenv(dotenv_path) # Load the environment variables from the found .env file +else: + print("No .env file found!") + +api_key = os.getenv("GOOGLE_API_KEY") +project_id = os.getenv("PROJECT_ID") + +if not api_key or not project_id: + raise ValueError("API key or project ID is missing in environment variables.") + +def create_input_schema(metadata: dict): + """ + Dynamically creates a Pydantic model for input validation based on metadata.json. + + Args: + metadata (dict): Metadata defining the inputs. + + Returns: + BaseModel: A Pydantic schema for input validation. + """ + fields = { + input_name: (str, ...) if input_spec["required"] else (str, None) + for input_name, input_spec in metadata["inputs"].items() + } + return create_model(metadata["name"] + "InputSchema", **fields) + +def create_output_schema(metadata: dict): + """ + Dynamically creates a Pydantic model for output validation based on metadata.json. + + Args: + metadata (dict): Metadata defining the outputs. + + Returns: + BaseModel: A Pydantic schema for output validation. + """ + fields = { + output_name: (str, ...) + for output_name, output_spec in metadata["outputs"].items() + } + return create_model(metadata["name"] + "OutputSchema", **fields) + +def get_few_shot_examples() -> str: + """ + Returns a string containing a set of few-shot examples for text rewriting tasks. + Reads from the 'few_shot_examples.txt' file located in the 'prompt/' folder. + """ + # Define the path to the prompt folder and the few-shot examples file + prompt_file_path = os.path.join(os.path.dirname(__file__), "prompt", "few_shot_examples.txt") + + try: + with open(prompt_file_path, "r") as file: + few_shot_examples = file.read() + except FileNotFoundError: + raise ValueError(f"Few-shot examples file not found at {prompt_file_path}") + + return few_shot_examples + +def rewrite_tool_handler(inputs: dict, few_shot_examples: str): + """ + Handles the text rewriting tool request by validating inputs and executing the model. + """ + try: + metadata = load_metadata() # Load the metadata + validate_inputs(inputs, metadata) # Validate inputs using the metadata + + # Initialize the model and pipeline + model = GoogleGenerativeAI( + model="gemini-1.5-pro", + temperature=0.7, + max_output_tokens=1024, + api_key=api_key # Using the globally loaded API key + ) + + # Define the prompt template using few-shot examples + prompt_template = PromptTemplate( + template=( + "{few_shot_examples}\n" + "Task: {instructions}\n\n" + "Text: {text}\n\n" + "Respond with a JSON object containing only the key 'rewritten_text' and its value." + ), + input_variables=["instructions", "text", "few_shot_examples"] + ) + + output_parser = JsonOutputParser(pydantic_object=dict) + + # Combine components into the pipeline + pipeline = prompt_template | model | output_parser + + # Prepare the input for the model + inputs["few_shot_examples"] = few_shot_examples + + # Execute the pipeline and return the result + result = pipeline.invoke(inputs) + + return {"rewritten_text": result.get("rewritten_text", "No rewritten text found")} + + except Exception as e: + raise ValueError(f"Error in rewrite_tool_handler: {str(e)}") + +def load_metadata(): + """ + Loads metadata from the metadata.json file. + + Returns: + dict: The metadata for the text rewriter tool. + """ + METADATA_FILE = os.path.join(os.path.dirname(__file__), "metadata.json") + with open(METADATA_FILE, "r") as f: + return json.load(f) + +def validate_inputs(inputs: dict, metadata: dict): + """ + Validates the inputs using the provided metadata. + + Args: + inputs (dict): The input data to validate. + metadata (dict): The metadata that defines the valid inputs. + + Raises: + ValueError: If validation fails. + """ + for input_key, input_spec in metadata["inputs"].items(): + if input_spec["required"] and input_key not in inputs: + raise ValueError(f"Missing required input: {input_key}") + if input_key in inputs and not isinstance(inputs[input_key], str): + raise ValueError(f"Invalid type for input '{input_key}'. Expected: {input_spec['type']}") \ No newline at end of file