Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: assistants #2421

Merged
merged 7 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 19 additions & 46 deletions backend/modules/assistant/controller/assistant_routes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import List
from uuid import UUID

from fastapi import APIRouter, Depends, File, Query, UploadFile
from fastapi import APIRouter, Depends, HTTPException, UploadFile
from logger import get_logger
from middlewares.auth import AuthBearer, get_current_user
from modules.assistant.dto.inputs import InputAssistant
from modules.assistant.dto.outputs import AssistantOutput
from modules.assistant.ito.audio_transcript import AudioTranscriptAssistant
from modules.assistant.ito.crawler import CrawlerAssistant
from modules.assistant.ito.audio_transcript import audio_transcript_inputs
from modules.assistant.ito.crawler import crawler_inputs
from modules.assistant.ito.summary import SummaryAssistant, summary_inputs
from modules.assistant.service.assistant import Assistant
from modules.user.entity.user_identity import UserIdentity
Expand All @@ -28,55 +28,28 @@ async def list_assistants(
"""

summary = summary_inputs()
return [summary]
crawler = crawler_inputs()
audio_transcript = audio_transcript_inputs()
return [summary, crawler, audio_transcript]


@assistant_router.post(
"/assistant/{ingestion_id}/process",
"/assistant/process",
dependencies=[Depends(AuthBearer())],
tags=["Assistant"],
)
async def process_assistant(
ingestion_id: UUID,
file_1: UploadFile = File(None),
input: InputAssistant,
files: List[UploadFile] = None,
current_user: UserIdentity = Depends(get_current_user),
brain_id: UUID = Query(None, description="The ID of the brain"),
send_file_email: bool = Query(False, description="Send the file by email"),
url: str = Query(None, description="The URL to process"),
):
if ingestion_id is None:
raise ValueError("Ingestion ID is required")

assistant = assistant_service.get_assistant_by_id(ingestion_id)

if assistant.name == "summary":
summary = SummaryAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
return await summary.process_assistant()

if assistant.name == "audio_transcript":
audio_summary = AudioTranscriptAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
if input.name == "summary":
summary_assistant = SummaryAssistant(
input=input, files=files, current_user=current_user
)
return await audio_summary.process_assistant()

if assistant.name == "crawler":
crawler = CrawlerAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
return await crawler.process_assistant()

return {"message": "Not found"}
try:
summary_assistant.check_input()
return await summary_assistant.process_assistant()
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return {"message": "Assistant not found"}
64 changes: 29 additions & 35 deletions backend/modules/assistant/dto/inputs.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,51 @@
from typing import List
import json
from typing import List, Optional
from uuid import UUID

from pydantic import BaseModel
from pydantic import BaseModel, model_validator


class InputFile(BaseModel):
allowed_extensions: List[str]
required: bool
description: str
class EmailInput(BaseModel):
activated: bool


class InputUrl(BaseModel):
required: bool
description: bool
class BrainInput(BaseModel):
activated: bool
value: UUID


class InputText(BaseModel):
required: bool
description: bool
class FileInput(BaseModel):
key: str
value: str


class Inputs(BaseModel):
files: List[InputFile]
urls: List[InputUrl]
texts: List[InputText]


class OutputEmail(BaseModel):
required: bool
description: str
type: str
class UrlInput(BaseModel):
key: str
value: str


class OutputBrain(BaseModel):
required: bool
description: str
type: UUID
class TextInput(BaseModel):
key: str
value: str


class Outputs(BaseModel):
emails: OutputEmail
brains: OutputBrain
class Inputs(BaseModel):
files: Optional[List[FileInput]] = None
urls: Optional[List[UrlInput]] = None
texts: Optional[List[TextInput]] = None


class Outputs(BaseModel):
files: List[InputFile]
urls: List[InputUrl]
texts: List[InputText]
email: Optional[EmailInput] = None
brain: Optional[BrainInput] = None


class AssistantOutput(BaseModel):
class InputAssistant(BaseModel):
name: str
input_description: str
output_description: str
inputs: Inputs
outputs: Outputs

@model_validator(mode="before")
@classmethod
def to_py_dict(cls, data):
return json.loads(data)
1 change: 1 addition & 0 deletions backend/modules/assistant/dto/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ class AssistantOutput(BaseModel):
output_description: str
inputs: Inputs
outputs: Outputs
icon_url: Optional[str] = None
40 changes: 40 additions & 0 deletions backend/modules/assistant/ito/audio_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
from tempfile import NamedTemporaryFile

from logger import get_logger
from modules.assistant.dto.outputs import (
AssistantOutput,
InputFile,
Inputs,
OutputBrain,
OutputEmail,
Outputs,
)
from modules.assistant.ito.ito import ITO
from openai import OpenAI

Expand Down Expand Up @@ -45,3 +53,35 @@ async def process_assistant(self):
return await self.create_and_upload_processed_file(
transcription, self.uploadFile.filename, "Audio Transcript"
)


def audio_transcript_inputs():
output = AssistantOutput(
name="Audio Transcript",
description="Transcribes an audio file",
input_description="One audio file to transcribe",
output_description="Transcription of the audio file",
inputs=Inputs(
files=[
InputFile(
key="audio_file",
allowed_extensions=["mp3", "wav", "ogg", "m4a"],
required=True,
description="The audio file to transcribe",
)
]
),
outputs=Outputs(
brain=OutputBrain(
required=True,
description="The brain to which to upload the document",
type="uuid",
),
email=OutputEmail(
required=True,
description="Send the document by email",
type="str",
),
),
)
return output
39 changes: 39 additions & 0 deletions backend/modules/assistant/ito/crawler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from logger import get_logger
from modules.assistant.dto.outputs import (
AssistantOutput,
Inputs,
InputUrl,
OutputBrain,
OutputEmail,
Outputs,
)
from modules.assistant.ito.ito import ITO

logger = get_logger(__name__)
Expand Down Expand Up @@ -31,3 +39,34 @@ async def process_assistant(self):
await self.create_and_upload_processed_file(
docs.page_content, nice_url, "Crawler"
)


def crawler_inputs():
output = AssistantOutput(
name="Crawler",
description="Crawls a website and extracts the text from the pages",
input_description="One URL to crawl",
output_description="Text extracted from the pages",
inputs=Inputs(
urls=[
InputUrl(
key="url",
required=True,
description="The URL to crawl",
)
],
),
outputs=Outputs(
brain=OutputBrain(
required=True,
description="The brain to which upload the document",
type="uuid",
),
email=OutputEmail(
required=True,
description="Send the document by email",
type="str",
),
),
)
return output
47 changes: 13 additions & 34 deletions backend/modules/assistant/ito/ito.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from abc import abstractmethod
from io import BytesIO
from tempfile import NamedTemporaryFile
from uuid import UUID
from typing import List

from fastapi import UploadFile
from logger import get_logger
from modules.assistant.dto.outputs import AssistantOutput
from modules.assistant.dto.inputs import InputAssistant
from modules.contact_support.controller.settings import ContactsSettings
from modules.upload.controller.upload_routes import upload_file
from modules.user.entity.user_identity import UserIdentity
Expand All @@ -17,36 +17,14 @@


class ITO(BaseModel):
uploadFile: UploadFile | None = None
current_user: UserIdentity = None
brain_id: UUID | None = None
send_file_email: bool = False
url: str | None = None

def __init__(
self,
uploadFile: UploadFile,
current_user: UserIdentity,
brain_id: UUID,
send_file_email: bool = False,
url: str = None,
):
super().__init__(
uploadFile=uploadFile,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
input: InputAssistant
files: List[UploadFile]
current_user: UserIdentity

@abstractmethod
async def process_assistant(self):
pass

@abstractmethod
def assistant_inputs(self) -> AssistantOutput:
pass

async def send_output_by_email(
self, file: UploadFile, name: str, custom_message: str = None
):
Expand Down Expand Up @@ -100,7 +78,7 @@ async def create_and_upload_processed_file(
headers={"content-type": "text/plain"},
)

if self.send_file_email:
if self.input.outputs.email.activated:
await self.send_output_by_email(
file_to_upload,
new_filename,
Expand All @@ -109,11 +87,12 @@ async def create_and_upload_processed_file(

# Reset to start of file before upload
file_to_upload.file.seek(0)
await upload_file(
uploadFile=file_to_upload,
brain_id=self.brain_id,
current_user=self.current_user,
chat_id=None,
)
if self.input.outputs.brain.activated:
await upload_file(
uploadFile=file_to_upload,
brain_id=self.input.outputs.brain.value,
current_user=self.current_user,
chat_id=None,
)

return {"message": f"{file_description} generated successfully"}
Loading