Improve chat example (#4)

* Add PDF processing * Remove unused code and document environment variables * Fix spelling * Bumb version and enable automatic PyPi publishing * Bumb version and enable automatic PyPi publishing * Move github workflow to correct folder
tilotech · Oct 2, 2024 · fbe31b0 · fbe31b0
1 parent 0fee16d
commit fbe31b0
Show file tree

Hide file tree

Showing 6 changed files with 122 additions and 8 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,30 @@
+name: Upload Python Package to PyPI when a Release is Created
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  pypi-publish:
+    name: Publish release to PyPI
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/langchain-tilores
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+      - name: Build package
+        run: |
+          python -m build
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/examples/chat/README.md b/examples/chat/README.md
@@ -16,7 +16,7 @@ Tilores entity resolution system.
 It automatically adapts to any Tilores instance schema and supports any number of tools.
 
 ## Prerequisites
-To start the demo, you'll need to input your OpenAI API key and the connection information for a Tilores instance as environment variables. For your convenience, the Tilores connection details for the demo instance have already been pre-filled.
+By default the demo uses OpenAIs GPT-o4 mini as the model. To start the demo, you'll need to input your OpenAI API key and the connection information for a Tilores instance as environment variables. For your convenience, the Tilores connection details for the demo instance have already been pre-filled.
 
 ```
 export OPENAI_API_KEY='your key'
@@ -26,6 +26,27 @@ export TILORES_CLIENT_ID='3l3i0ifjurnr58u4lgf0eaeqa3'
 export TILORES_CLIENT_SECRET='1c0g3v0u7pf1bvb7v65pauqt6s0h3vkkcf9u232u92ov3lm4aun2'
 ```
 
+## Switching OpenAI Model or Using Bedrock
+
+Set the following environment variable if you would like to switch to another
+OpenAI model:
+
+```
+export OPENAI_MODEL_NAME='gpt-4o'
+```
+
+If you would rather like to switch to any of the models that are provided by
+AWS Bedrock, then you can use the following environment variables. Please note,
+that you must have valid credentials for authenticating against the AWS services
+available.
+
+```
+LLM_PROVIDER=Bedrock
+BEDROCK_CREDENTIALS_PROFILE_NAME=my-aws-profile
+BEDROCK_REGION=us-east-1
+BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20240620-v1:0
+```
+
 ## Demo
 
 The demo can be used with a preconfigured Tilores instance that is already loaded with some sample data.
@@ -36,6 +57,12 @@ $ pip install -r requirements.txt
 $ chainlit run chat.py -w
 ```
 
+If you want to test the automatic lookup from the PDFs, you also must have the poppler-utils installed:
+
+```
+sudo apt-get install poppler-utils
+```
+
 ## Example Data
 ```
 Firstname: Sophia

diff --git a/examples/chat/chat.py b/examples/chat/chat.py
@@ -1,14 +1,20 @@
 import operator
 from typing import *
+import requests
+import tempfile
+import os
 
 # LangChain
 from langchain.tools import BaseTool
 from langchain_core.messages import AnyMessage, HumanMessage
 from langchain_openai import ChatOpenAI
+from langchain_aws import ChatBedrock
 
 # LangGraph
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.prebuilt import create_react_agent
+from langchain_community.document_loaders import UnstructuredPDFLoader
+from langchain.tools import Tool
 
 # Tilores
 from tilores import TiloresAPI
@@ -44,16 +50,29 @@ async def _arun(self, query: str, run_manager=None) -> str:
 class ChatState(TypedDict):
     messages: Annotated[Sequence[AnyMessage], operator.add]
 
-
 @cl.on_chat_start
 def start():
-    llm = ChatOpenAI(temperature=0, streaming=True, model_name="gpt-4o")
+    if os.environ.get("LLM_PROVIDER") == "Bedrock":
+        llm = ChatBedrock(
+            credentials_profile_name=os.environ["BEDROCK_CREDENTIALS_PROFILE_NAME"],
+            region_name=os.environ["BEDROCK_REGION"],
+            model_id=os.environ["BEDROCK_MODEL_ID"],
+            streaming=True,
+            model_kwargs={"temperature": 0},
+        )
+    else:
+        model_name = "gpt-4o-mini"
+        if os.environ.get("OPENAI_MODEL_NAME"):
+            model_name = os.environ.get("OPENAI_MODEL_NAME")
+        llm = ChatOpenAI(temperature=0, streaming=True, model_name=model_name)
+
     # Setup a connection to the Tilores instance and provide it as a tool
     tilores = TiloresAPI.from_environ()
     tilores_tools = TiloresTools(tilores)
     tools = [
         HumanInputChainlit(),
         tilores_tools.search_tool(),
+        pdf_tool,
     ]
     # Use MemorySaver to use the full conversation
     memory = MemorySaver()
@@ -65,7 +84,6 @@ def start():
     cl.user_session.set("runnable", agent)
     cl.user_session.set("state", state)
 
-
 @cl.on_message
 async def main(message: cl.Message):
     # Retrieve the runnable and state from the user session
@@ -80,7 +98,34 @@ async def main(message: cl.Message):
     await ui_message.send()
     async for event in runnable.astream_events(state, version="v1", config={'configurable': {'thread_id': 'thread-1'}}):
         if event["event"] == "on_chat_model_stream":
-            content = event["data"]["chunk"].content or ""
+            c = event["data"]["chunk"].content
+            if c and len(c) > 0 and isinstance(c[0], dict) and c[0]["type"] == "text":
+                content = c[0]["text"]
+            elif isinstance(c, str):
+                content = c
+            else:
+                content = ""
             await ui_message.stream_token(token=content)
+
     await ui_message.update()
 
+def load_pdf_from_url(url: str):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
+            temp_pdf.write(response.content)
+
+        loader = UnstructuredPDFLoader(temp_pdf.name)
+        documents = loader.load()
+        return documents
+    else:
+        raise Exception(f"Failed to download PDF from {url}. Status code: {response.status_code}")
+
+pdf_tool = Tool(
+    name = "load_pdf",
+    func=load_pdf_from_url,
+    description="useful for when you need to download and process a PDF file from a given URL"
+)
diff --git a/examples/chat/requirements.txt b/examples/chat/requirements.txt
@@ -1,4 +1,10 @@
 langchain-tilores[all]>=0.1.0
 langgraph
 langchain_openai
+langchain_aws
+langchain-community
+langchain-unstructured
 chainlit
+unstructured
+pdfminer.six
+unstructured[pdf]
diff --git a/langchain_tilores/tilores_tools.py b/langchain_tilores/tilores_tools.py
@@ -21,15 +21,21 @@ def references(self):
 
     def all(self):
         return [
+            # self.record_fields_tool,
             self.search_tool
         ]
-
+    
     def search_tool(self):
         return StructuredTool.from_function(**{
             'name': 'tilores_search',
-            'description': 'useful for when you need to search entities on variable parameters',
+            'description': 'useful for when you need to search one or more entities; each entity is a list of records with varying information which refer to the same real world entity',
             'args_schema': self.references['SearchParams'],
             'return_direct': True,
             'func': self.tilores_api.search
         })
 
+def static_value(val):
+    def wrapper():
+        print("requested values")
+        return val
+    return wrapper
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langchain-tilores"
-version = "0.1.4"
+version = "0.2.0"
 authors = [
   { name="Lukas Rieder", email="lukas@parlant.co" },
 ]