server.py

import asyncio
import json
import traceback
import uuid
from typing import Literal, TypedDict

from fastapi import FastAPI, HTTPException, Response
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from model.faiss import Node
from model.rag import GuidedRagTopicAndNodes, IterativeRagResult
from services.rag import guided_rag, iterative_rag, naive_rag

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods (GET, POST, etc.)
    allow_headers=["*"],  # Allows all headers
)


class IterativeRagRequest(BaseModel):
    question: str
    max_steps: int
    top_K: int


class IterativeRagResponse(IterativeRagResult):
    id: str
    type: Literal["iterative"]


LLM_LOCK = asyncio.Lock()


@app.post("/iterative_rag")
async def iterative_rag_handler(request: IterativeRagRequest):
    await LLM_LOCK.acquire()
    try:
        response: IterativeRagResponse = {
            "id": str(uuid.uuid4()),
            "type": "iterative",
            **iterative_rag(request.question, request.max_steps, request.top_K),
        }
    except:
        print(traceback.format_exc())
        LLM_LOCK.release()
        raise HTTPException(
            status_code=500,
            detail="Internal Server Error. Please try another question or adjusting parameters (they may be set to high).",
        )
    LLM_LOCK.release()
    return Response(json.dumps(response), media_type="application/json")


class GuidedRagRequest(BaseModel):
    question: str
    max_topics: int
    top_K_per_topic: int


class GuidedRagResponse(TypedDict):
    id: str
    answer: str
    question: str
    topics_and_nodes: list[GuidedRagTopicAndNodes]
    type: Literal["guided"]


@app.post("/guided_rag")
async def guided_rag_handler(request: GuidedRagRequest):
    await LLM_LOCK.acquire()
    try:
        answer, topics_and_nodes = guided_rag(
            request.question, request.max_topics, request.top_K_per_topic
        )
    except:
        print(traceback.format_exc())
        LLM_LOCK.release()
        raise HTTPException(
            status_code=500,
            detail="Internal Server Error. Please try another question or adjusting parameters (they may be set to high).",
        )
    LLM_LOCK.release()
    response: GuidedRagResponse = {
        "answer": answer,
        "question": request.question,
        "topics_and_nodes": topics_and_nodes,
        "id": str(uuid.uuid4()),
        "type": "guided",
    }
    return Response(json.dumps(response), media_type="application/json")


class NaiveRagRequest(BaseModel):
    question: str
    top_K: int


class NaiveRagResponse(TypedDict):
    answer: str
    question: str
    sources: list[Node]
    id: str
    type: Literal["naive"]


@app.post("/naive_rag")
async def naive_rag_handler(request: NaiveRagRequest):
    await LLM_LOCK.acquire()
    try:
        answer, nodes = naive_rag(request.question, request.top_K)
    except:
        print(traceback.format_exc())
        LLM_LOCK.release()
        raise HTTPException(
            status_code=500,
            detail="Internal Server Error. Please try another question or adjusting parameters (they may be set to high).",
        )
    response: NaiveRagResponse = {
        "answer": answer,
        "question": request.question,
        "sources": nodes,
        "id": str(uuid.uuid4()),
        "type": "naive",
    }
    LLM_LOCK.release()
    return Response(json.dumps(response), media_type="application/json")