From eb1aab1fc7dc38bbcdfc0c3a7b640c4ab6e2846a Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Thu, 10 Oct 2024 14:07:45 -0700
Subject: [PATCH 1/6] chore: fix branch  (#1865)

---
 letta/cli/cli.py        | 27 +++++++++++++++++++++++++--
 letta/llm_api/openai.py |  7 +++++--
 letta/providers.py      | 21 ++++++++++++++++-----
 letta/server/server.py  |  5 +++--
 letta/settings.py       |  2 +-
 5 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/letta/cli/cli.py b/letta/cli/cli.py
index 31a567e1de..160615b7b6 100644
--- a/letta/cli/cli.py
+++ b/letta/cli/cli.py
@@ -14,7 +14,9 @@
 from letta.local_llm.constants import ASSISTANT_MESSAGE_CLI_SYMBOL
 from letta.log import get_logger
 from letta.metadata import MetadataStore
+from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import OptionState
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ChatMemory, Memory
 from letta.server.server import logger as server_logger
 
@@ -233,25 +235,46 @@ def run(
         # choose from list of llm_configs
         llm_configs = client.list_llm_configs()
         llm_options = [llm_config.model for llm_config in llm_configs]
+
+        # TODO move into LLMConfig as a class method?
+        def prettify_llm_config(llm_config: LLMConfig) -> str:
+            return f"{llm_config.model}" + f" ({llm_config.model_endpoint})" if llm_config.model_endpoint else ""
+
+        llm_choices = [questionary.Choice(title=prettify_llm_config(llm_config), value=llm_config) for llm_config in llm_configs]
+
         # select model
         if len(llm_options) == 0:
             raise ValueError("No LLM models found. Please enable a provider.")
         elif len(llm_options) == 1:
             llm_model_name = llm_options[0]
         else:
-            llm_model_name = questionary.select("Select LLM model:", choices=llm_options).ask()
+            llm_model_name = questionary.select("Select LLM model:", choices=llm_choices).ask().model
         llm_config = [llm_config for llm_config in llm_configs if llm_config.model == llm_model_name][0]
 
         # choose form list of embedding configs
         embedding_configs = client.list_embedding_configs()
         embedding_options = [embedding_config.embedding_model for embedding_config in embedding_configs]
+
+        # TODO move into EmbeddingConfig as a class method?
+        def prettify_embed_config(embedding_config: EmbeddingConfig) -> str:
+            return (
+                f"{embedding_config.embedding_model}" + f" ({embedding_config.embedding_endpoint})"
+                if embedding_config.embedding_endpoint
+                else ""
+            )
+
+        embedding_choices = [
+            questionary.Choice(title=prettify_embed_config(embedding_config), value=embedding_config)
+            for embedding_config in embedding_configs
+        ]
+
         # select model
         if len(embedding_options) == 0:
             raise ValueError("No embedding models found. Please enable a provider.")
         elif len(embedding_options) == 1:
             embedding_model_name = embedding_options[0]
         else:
-            embedding_model_name = questionary.select("Select embedding model:", choices=embedding_options).ask()
+            embedding_model_name = questionary.select("Select embedding model:", choices=embedding_choices).ask().embedding_model
         embedding_config = [
             embedding_config for embedding_config in embedding_configs if embedding_config.embedding_model == embedding_model_name
         ][0]
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index 69a50fc264..f60150ee2a 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -41,7 +41,9 @@
 OPENAI_SSE_DONE = "[DONE]"
 
 
-def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional[bool] = False) -> dict:
+def openai_get_model_list(
+    url: str, api_key: Union[str, None], fix_url: Optional[bool] = False, extra_params: Optional[dict] = None
+) -> dict:
     """https://platform.openai.com/docs/api-reference/models/list"""
     from letta.utils import printd
 
@@ -60,7 +62,8 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
 
     printd(f"Sending request to {url}")
     try:
-        response = requests.get(url, headers=headers)
+        # TODO add query param "tool" to be true
+        response = requests.get(url, headers=headers, params=extra_params)
         response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
         response = response.json()  # convert to dict from string
         printd(f"response = {response}")
diff --git a/letta/providers.py b/letta/providers.py
index 361d1728db..761fcd7ee7 100644
--- a/letta/providers.py
+++ b/letta/providers.py
@@ -53,17 +53,28 @@ def list_embedding_models(self):
 class OpenAIProvider(Provider):
     name: str = "openai"
     api_key: str = Field(..., description="API key for the OpenAI API.")
-    base_url: str = "https://api.openai.com/v1"
+    base_url: str = Field(..., description="Base URL for the OpenAI API.")
 
     def list_llm_models(self) -> List[LLMConfig]:
         from letta.llm_api.openai import openai_get_model_list
 
-        response = openai_get_model_list(self.base_url, api_key=self.api_key)
-        model_options = [obj["id"] for obj in response["data"]]
+        # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
+        # See: https://openrouter.ai/docs/requests
+        extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
+        response = openai_get_model_list(self.base_url, api_key=self.api_key, extra_params=extra_params)
+
+        assert "data" in response, f"OpenAI model query response missing 'data' field: {response}"
 
         configs = []
-        for model_name in model_options:
-            context_window_size = self.get_model_context_window_size(model_name)
+        for model in response["data"]:
+            assert "id" in model, f"OpenAI model missing 'id' field: {model}"
+            model_name = model["id"]
+
+            if "context_length" in model:
+                # Context length is returned in OpenRouter as "context_length"
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
 
             if not context_window_size:
                 continue
diff --git a/letta/server/server.py b/letta/server/server.py
index 5088e9effc..b37ec867d4 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -50,6 +50,7 @@
     LettaProvider,
     OllamaProvider,
     OpenAIProvider,
+    Provider,
     VLLMProvider,
 )
 from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgentState
@@ -261,9 +262,9 @@ def __init__(
         self.add_default_tools(module_name="base")
 
         # collect providers (always has Letta as a default)
-        self._enabled_providers = [LettaProvider()]
+        self._enabled_providers: List[Provider] = [LettaProvider()]
         if model_settings.openai_api_key:
-            self._enabled_providers.append(OpenAIProvider(api_key=model_settings.openai_api_key))
+            self._enabled_providers.append(OpenAIProvider(api_key=model_settings.openai_api_key, base_url=model_settings.openai_api_base))
         if model_settings.anthropic_api_key:
             self._enabled_providers.append(AnthropicProvider(api_key=model_settings.anthropic_api_key))
         if model_settings.ollama_base_url:
diff --git a/letta/settings.py b/letta/settings.py
index 8b7fee277f..12d425670a 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -11,7 +11,7 @@ class ModelSettings(BaseSettings):
 
     # openai
     openai_api_key: Optional[str] = None
-    # TODO: provide overriding BASE_URL?
+    openai_api_base: Optional[str] = "https://api.openai.com/v1"
 
     # groq
     groq_api_key: Optional[str] = None

From 2df5f23f91be5501da21c49b8a5c87a499cf6f91 Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Thu, 10 Oct 2024 17:43:14 -0700
Subject: [PATCH 2/6] chore: add e2e tests for Groq to CI (#1868)

Co-authored-by: Matt Zhou <mattzhou@Matts-MacBook-Pro.local>
---
 .github/workflows/test_groq.yml | 104 ++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 .github/workflows/test_groq.yml

diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml
new file mode 100644
index 0000000000..f14da94a82
--- /dev/null
+++ b/.github/workflows/test_groq.yml
@@ -0,0 +1,104 @@
+name: Groq Llama 3.1 70b Capabilities Test
+
+env:
+  GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - name: "Setup Python, Poetry and Dependencies"
+      uses: packetcoders/action-setup-cache-python-poetry@main
+      with:
+        python-version: "3.12"
+        poetry-version: "1.8.2"
+        install-args: "-E dev -E external-tools"
+
+    - name: Test first message contains expected function call and inner monologue
+      id: test_first_message
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message
+        echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model sends message with keyword
+      id: test_keyword_message
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword
+        echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model uses external tool correctly
+      id: test_external_tool
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool
+        echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model recalls chat memory
+      id: test_chat_memory
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory
+        echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model uses 'archival_memory_search' to find secret
+      id: test_archival_memory
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval
+        echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model can edit core memories
+      id: test_core_memory
+      env:
+        GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory
+        echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Summarize test results
+      if: always()
+      run: |
+        echo "Test Results Summary:"
+
+        # If the exit code is empty, treat it as a failure (❌)
+        echo "Test first message: $([[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model sends message with keyword: $([[ -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model uses external tool: $([[ -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model recalls chat memory: $([[ -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model uses 'archival_memory_search' to find secret: $([[ -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model can edit core memories: $([[ -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+
+        # Check if any test failed (either non-zero or unset exit code)
+        if [[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \
+              -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \
+              -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \
+              -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \
+              -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \
+              -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]]; then
+          echo "Some tests failed."
+          exit 78
+        fi
+      continue-on-error: true

From d7340eaa4d51c1074b8917840608eed9626c3621 Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Fri, 11 Oct 2024 15:12:45 -0700
Subject: [PATCH 3/6] test: Fix Azure tests and write CI tests (#1871)

Co-authored-by: Matt Zhou <mattzhou@Matts-MacBook-Pro.local>
---
 .github/workflows/test_azure.yml | 111 +++++++++++++++++++++++++++++++
 letta/llm_api/azure_openai.py    |   2 +-
 letta/llm_api/helpers.py         |   1 +
 letta/settings.py                |   5 +-
 4 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/test_azure.yml

diff --git a/.github/workflows/test_azure.yml b/.github/workflows/test_azure.yml
new file mode 100644
index 0000000000..e18f512d42
--- /dev/null
+++ b/.github/workflows/test_azure.yml
@@ -0,0 +1,111 @@
+name: Azure OpenAI GPT-4o Mini Capabilities Test
+
+env:
+  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+  AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - name: "Setup Python, Poetry and Dependencies"
+      uses: packetcoders/action-setup-cache-python-poetry@main
+      with:
+        python-version: "3.12"
+        poetry-version: "1.8.2"
+        install-args: "-E dev -E external-tools"
+
+    - name: Test first message contains expected function call and inner monologue
+      id: test_first_message
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message
+        echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model sends message with keyword
+      id: test_keyword_message
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword
+        echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model uses external tool correctly
+      id: test_external_tool
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool
+        echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model recalls chat memory
+      id: test_chat_memory
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory
+        echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model uses 'archival_memory_search' to find secret
+      id: test_archival_memory
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval
+        echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Test model can edit core memories
+      id: test_core_memory
+      env:
+        AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+        AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory
+        echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
+      continue-on-error: true
+
+    - name: Summarize test results
+      if: always()
+      run: |
+        echo "Test Results Summary:"
+
+        # If the exit code is empty, treat it as a failure (❌)
+        echo "Test first message: $([[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model sends message with keyword: $([[ -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model uses external tool: $([[ -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model recalls chat memory: $([[ -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model uses 'archival_memory_search' to find secret: $([[ -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+        echo "Test model can edit core memories: $([[ -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)"
+
+        # Check if any test failed (either non-zero or unset exit code)
+        if [[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \
+              -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \
+              -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \
+              -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \
+              -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \
+              -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]]; then
+          echo "Some tests failed."
+          exit 78
+        fi
+      continue-on-error: true
diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py
index 41ca4fbe21..932570472f 100644
--- a/letta/llm_api/azure_openai.py
+++ b/letta/llm_api/azure_openai.py
@@ -79,7 +79,7 @@ def azure_openai_chat_completions_request(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
 
-    url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.api_version)
+    url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.azure_api_version)
     response_json = make_post_request(url, headers, data)
     # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
     if "content" not in response_json["choices"][0].get("message"):
diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py
index a5aa43b2e3..05b36f3b7f 100644
--- a/letta/llm_api/helpers.py
+++ b/letta/llm_api/helpers.py
@@ -153,6 +153,7 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
                 return new_choice
             else:
                 warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+                return choice
 
         except json.JSONDecodeError as e:
             warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
diff --git a/letta/settings.py b/letta/settings.py
index 12d425670a..75a55bd9ab 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -25,7 +25,10 @@ class ModelSettings(BaseSettings):
     # azure
     azure_api_key: Optional[str] = None
     azure_base_url: Optional[str] = None
-    azure_api_version: Optional[str] = None
+    # We provide a default here, since usually people will want to be on the latest API version.
+    azure_api_version: Optional[str] = (
+        "2024-09-01-preview"  # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
+    )
 
     # google ai
     gemini_api_key: Optional[str] = None

From 8fc8c55f14af5353ec4179735e5771b47fec1c9d Mon Sep 17 00:00:00 2001
From: Shubham Naik <shubham.naik10@gmail.com>
Date: Fri, 11 Oct 2024 15:51:14 -0700
Subject: [PATCH 4/6] chore: support alembic (#1867)

Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
---
 .github/workflows/migration-test.yml          |  34 +++++
 CONTRIBUTING.md                               |  16 +++
 alembic.ini                                   | 116 ++++++++++++++++++
 alembic/README                                |   1 +
 alembic/env.py                                |  84 +++++++++++++
 alembic/script.py.mako                        |  26 ++++
 ...505cc7eca9_create_a_baseline_migrations.py |  27 ++++
 letta/agent_store/db.py                       |  10 +-
 letta/base.py                                 |   3 +
 letta/metadata.py                             |   7 +-
 letta/server/server.py                        |  40 ++----
 poetry.lock                                   |  12 +-
 pyproject.toml                                |   1 +
 tests/test_client.py                          |   2 +-
 tests/test_new_client.py                      |  67 ----------
 15 files changed, 334 insertions(+), 112 deletions(-)
 create mode 100644 .github/workflows/migration-test.yml
 create mode 100644 alembic.ini
 create mode 100644 alembic/README
 create mode 100644 alembic/env.py
 create mode 100644 alembic/script.py.mako
 create mode 100644 alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
 create mode 100644 letta/base.py

diff --git a/.github/workflows/migration-test.yml b/.github/workflows/migration-test.yml
new file mode 100644
index 0000000000..9f04b6d2d5
--- /dev/null
+++ b/.github/workflows/migration-test.yml
@@ -0,0 +1,34 @@
+name: Alembic Migration Tester
+on:
+  pull_request:
+    paths:
+      - '**.py'
+  workflow_dispatch:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Build and run container
+        run: bash db/run_postgres.sh
+
+      - name: "Setup Python, Poetry and Dependencies"
+        uses: packetcoders/action-setup-cache-python-poetry@main
+        with:
+          python-version: "3.12"
+          poetry-version: "1.8.2"
+          install-args: "--all-extras"
+      - name: Test alembic migration
+        env:
+          LETTA_PG_PORT: 8888
+          LETTA_PG_USER: letta
+          LETTA_PG_PASSWORD: letta
+          LETTA_PG_DB: letta
+          LETTA_PG_HOST: localhost
+          LETTA_SERVER_PASS: test_server_token
+        run: |
+          poetry run alembic upgrade head
+          poetry run alembic check
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 709ca15b80..c8b8e3989c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,6 +65,7 @@ $ . venv/bin/activate
 
 If you are having dependency issues using `pip`, we recommend you install the package using Poetry. Installing Letta from source using Poetry will ensure that you are using exact package versions that have been tested for the production build.
 
+
 #### (Optional) Installing pre-commit
 We recommend installing pre-commit to ensure proper formatting during development:
 ```
@@ -86,6 +87,21 @@ git checkout -b feature/your-feature
 
 Now, the world is your oyster! Go ahead and craft your fabulous changes. 🎨
 
+
+#### Handling Database Migrations
+If you are running Letta for the first time, your database will be automatically be setup. If you are updating Letta, you may need to run migrations. To run migrations, use the following command:
+```shell
+poetry run alembic upgrade head
+```
+
+#### Creating a new Database Migration
+If you have made changes to the database models, you will need to create a new migration. To create a new migration, use the following command:
+```shell
+poetry run alembic revision --autogenerate -m "Your migration message here"
+```
+
+Visit the [Alembic documentation](https://alembic.sqlalchemy.org/en/latest/tutorial.html) for more information on creating and running migrations.
+
 ## 3. ✅ Testing
 
 Before we hit the 'Wow, I'm Done' button, let's make sure everything works as expected. Run tests and make sure the existing ones don't throw a fit. And if needed, create new tests. 🕵️
diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 0000000000..72cc69904f
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/README b/alembic/README
new file mode 100644
index 0000000000..2500aa1bcf
--- /dev/null
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 0000000000..69e3a60b55
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,84 @@
+import os
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config, pool
+
+from alembic import context
+from letta.base import Base
+from letta.settings import settings
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+print(settings.letta_pg_uri_no_default)
+if settings.letta_pg_uri_no_default:
+    config.set_main_option("sqlalchemy.url", settings.letta_pg_uri)
+else:
+    config.set_main_option("sqlalchemy.url", "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True)
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
new file mode 100644
index 0000000000..fbc4b07dce
--- /dev/null
+++ b/alembic/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py b/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
new file mode 100644
index 0000000000..d1ee25e1d7
--- /dev/null
+++ b/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
@@ -0,0 +1,27 @@
+"""Create a baseline migrations
+
+Revision ID: 9a505cc7eca9
+Revises:
+Create Date: 2024-10-11 14:19:19.875656
+
+"""
+
+from typing import Sequence, Union
+
+# revision identifiers, used by Alembic.
+revision: str = "9a505cc7eca9"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
diff --git a/letta/agent_store/db.py b/letta/agent_store/db.py
index 585de6edee..ff22af8c4d 100644
--- a/letta/agent_store/db.py
+++ b/letta/agent_store/db.py
@@ -18,13 +18,14 @@
     select,
     text,
 )
-from sqlalchemy.orm import declarative_base, mapped_column
+from sqlalchemy.orm import mapped_column
 from sqlalchemy.orm.session import close_all_sessions
 from sqlalchemy.sql import func
 from sqlalchemy_json import MutableJson
 from tqdm import tqdm
 
 from letta.agent_store.storage import StorageConnector, TableType
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.constants import MAX_EMBEDDING_DIM
 from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
@@ -35,7 +36,6 @@
 from letta.schemas.passage import Passage
 from letta.settings import settings
 
-Base = declarative_base()
 config = LettaConfig()
 
 
@@ -560,3 +560,9 @@ def update(self, record):
 
             # Commit the changes to the database
             session.commit()
+
+
+def attach_base():
+    # This should be invoked in server.py to make sure Base gets initialized properly
+    # DO NOT REMOVE
+    print("Initializing database...")
diff --git a/letta/base.py b/letta/base.py
new file mode 100644
index 0000000000..860e54258a
--- /dev/null
+++ b/letta/base.py
@@ -0,0 +1,3 @@
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
diff --git a/letta/metadata.py b/letta/metadata.py
index 3e56fddbe3..c8f206f349 100644
--- a/letta/metadata.py
+++ b/letta/metadata.py
@@ -14,11 +14,10 @@
     String,
     TypeDecorator,
     desc,
-    func,
 )
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func
 
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.schemas.agent import AgentState
 from letta.schemas.api_key import APIKey
@@ -28,6 +27,8 @@
 from letta.schemas.job import Job
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
+
+# from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
 from letta.schemas.openai.chat_completions import ToolCall, ToolCallFunction
 from letta.schemas.organization import Organization
 from letta.schemas.source import Source
@@ -36,8 +37,6 @@
 from letta.settings import settings
 from letta.utils import enforce_types, get_utc_time, printd
 
-Base = declarative_base()
-
 
 class LLMConfigColumn(TypeDecorator):
     """Custom type for storing LLMConfig as JSON"""
diff --git a/letta/server/server.py b/letta/server/server.py
index b37ec867d4..efd16a784b 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -14,8 +14,8 @@
 import letta.server.utils as server_utils
 import letta.system as system
 from letta.agent import Agent, save_agent
+from letta.agent_store.db import attach_base
 from letta.agent_store.storage import StorageConnector, TableType
-from letta.config import LettaConfig
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data
 
@@ -41,7 +41,7 @@
 from letta.interface import CLIInterface  # for printing to terminal
 from letta.log import get_logger
 from letta.memory import get_memory_functions
-from letta.metadata import MetadataStore
+from letta.metadata import Base, MetadataStore
 from letta.prompts import gpt_system
 from letta.providers import (
     AnthropicProvider,
@@ -150,23 +150,11 @@ def run_command(self, user_id: str, agent_id: str, command: str) -> Union[str, N
 
 
 from sqlalchemy import create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker
+from sqlalchemy.orm import sessionmaker
 
-from letta.agent_store.db import MessageModel, PassageModel
 from letta.config import LettaConfig
 
 # NOTE: hack to see if single session management works
-from letta.metadata import (
-    AgentModel,
-    AgentSourceMappingModel,
-    APIKeyModel,
-    BlockModel,
-    JobModel,
-    OrganizationModel,
-    SourceModel,
-    ToolModel,
-    UserModel,
-)
 from letta.settings import model_settings, settings
 
 config = LettaConfig.load()
@@ -183,24 +171,12 @@ def run_command(self, user_id: str, agent_id: str, command: str) -> Union[str, N
     # TODO: don't rely on config storage
     engine = create_engine("sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
 
-Base = declarative_base()
+
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-Base.metadata.create_all(
-    engine,
-    tables=[
-        UserModel.__table__,
-        AgentModel.__table__,
-        SourceModel.__table__,
-        AgentSourceMappingModel.__table__,
-        APIKeyModel.__table__,
-        BlockModel.__table__,
-        ToolModel.__table__,
-        JobModel.__table__,
-        PassageModel.__table__,
-        MessageModel.__table__,
-        OrganizationModel.__table__,
-    ],
-)
+
+attach_base()
+
+Base.metadata.create_all(bind=engine)
 
 
 # Dependency
diff --git a/poetry.lock b/poetry.lock
index ae52f96686..011c932b38 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -139,13 +139,13 @@ frozenlist = ">=1.1.0"
 
 [[package]]
 name = "alembic"
-version = "1.13.2"
+version = "1.13.3"
 description = "A database migration tool for SQLAlchemy."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"},
-    {file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"},
+    {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
+    {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
 ]
 
 [package.dependencies]
@@ -3814,7 +3814,7 @@ Werkzeug = ">=2.0.0"
 name = "mako"
 version = "1.3.5"
 description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"},
@@ -8354,4 +8354,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.13,>=3.10"
-content-hash = "aa0bbf5825741bdc9c06388e7e27c1d9a2d85d517abb7f51cca71cc8349d1170"
+content-hash = "2302d430ae353f5453bbf4223e9e00be38fcca45259de2924b38b14e36ab8024"
diff --git a/pyproject.toml b/pyproject.toml
index 38114897ad..124ce3e923 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ langchain = {version = "^0.2.16", optional = true}
 langchain-community = {version = "^0.2.17", optional = true}
 composio-langchain = "^0.5.28"
 composio-core = "^0.5.28"
+alembic = "^1.13.3"
 
 [tool.poetry.extras]
 #local = ["llama-index-embeddings-huggingface"]
diff --git a/tests/test_client.py b/tests/test_client.py
index 58c7775ec6..fe3e581544 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -385,7 +385,7 @@ def test_sources(client: Union[LocalClient, RESTClient], agent: AgentState):
     # list archival memory
     archival_memories = client.get_archival_memory(agent_id=agent.id)
     # print(archival_memories)
-    assert len(archival_memories) == created_passages
+    assert len(archival_memories) == created_passages, f"Mismatched length {len(archival_memories)} vs. {created_passages}"
 
     # check number of passages
     sources = client.list_sources()
diff --git a/tests/test_new_client.py b/tests/test_new_client.py
index 4a436d7fe5..395b6020e4 100644
--- a/tests/test_new_client.py
+++ b/tests/test_new_client.py
@@ -405,70 +405,3 @@ def test_tool_creation_langchain_missing_imports(client):
     # Intentionally missing {"langchain_community.utilities": "WikipediaAPIWrapper"}
     with pytest.raises(RuntimeError):
         Tool.from_langchain(langchain_tool)
-
-
-def test_sources(client, agent):
-    # list sources (empty)
-    sources = client.list_sources()
-    assert len(sources) == 0
-
-    # create a source
-    test_source_name = "test_source"
-    source = client.create_source(name=test_source_name)
-
-    # list sources
-    sources = client.list_sources()
-    assert len(sources) == 1
-    assert sources[0].metadata_["num_passages"] == 0
-    assert sources[0].metadata_["num_documents"] == 0
-
-    # update the source
-    original_id = source.id
-    original_name = source.name
-    new_name = original_name + "_new"
-    client.update_source(source_id=source.id, name=new_name)
-
-    # get the source name (check that it's been updated)
-    source = client.get_source(source_id=source.id)
-    assert source.name == new_name
-    assert source.id == original_id
-
-    # get the source id (make sure that it's the same)
-    assert str(original_id) == client.get_source_id(source_name=new_name)
-
-    # check agent archival memory size
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    print(archival_memories)
-    assert len(archival_memories) == 0
-
-    # load a file into a source
-    filename = "CONTRIBUTING.md"
-    upload_job = client.load_file_into_source(filename=filename, source_id=source.id)
-    print("Upload job", upload_job, upload_job.status, upload_job.metadata_)
-
-    # TODO: make sure things run in the right order
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    assert len(archival_memories) == 0
-
-    # attach a source
-    client.attach_source_to_agent(source_id=source.id, agent_id=agent.id)
-
-    # list archival memory
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    # print(archival_memories)
-    assert len(archival_memories) == 20 or len(archival_memories) == 21
-
-    # check number of passages
-    sources = client.list_sources()
-
-    # TODO: do we want to add this metadata back?
-    # assert sources[0].metadata_["num_passages"] > 0
-    # assert sources[0].metadata_["num_documents"] == 0  # TODO: fix this once document store added
-    print(sources)
-
-    # detach the source
-    # TODO: add when implemented
-    # client.detach_source(source.name, agent.id)
-
-    # delete the source
-    client.delete_source(source.id)

From 30ff27473a59a264c53dedf967fa77c893bb3c31 Mon Sep 17 00:00:00 2001
From: Kevin Lin <klin5061@gmail.com>
Date: Fri, 11 Oct 2024 15:51:23 -0700
Subject: [PATCH 5/6] fix: fix typo (#1870)

Co-authored-by: Kevin Lin <kevinlin@Kevins-MacBook-Pro.local>
---
 letta/agent.py                 |  2 +-
 letta/llm_api/llm_api_tools.py | 26 +++++++++++++-------------
 letta/llm_api/openai.py        | 28 ++++++++++++++--------------
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/letta/agent.py b/letta/agent.py
index b8cc51f259..ee5bc01921 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -481,7 +481,7 @@ def _get_ai_reply(
                 first_message=first_message,
                 # streaming
                 stream=stream,
-                stream_inferface=self.interface,
+                stream_interface=self.interface,
                 # putting inner thoughts in func args or not
                 inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 6327d1cb75..50166a1ceb 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -115,7 +115,7 @@ def create(
     use_tool_naming: bool = True,
     # streaming?
     stream: bool = False,
-    stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
+    stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     # TODO move to llm_config?
     # if unspecified (None), default to something we've tested
     inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
@@ -149,19 +149,19 @@ def create(
 
         if stream:  # Client requested token streaming
             data.stream = True
-            assert isinstance(stream_inferface, AgentChunkStreamingInterface) or isinstance(
-                stream_inferface, AgentRefreshStreamingInterface
-            ), type(stream_inferface)
+            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
+                stream_interface, AgentRefreshStreamingInterface
+            ), type(stream_interface)
             response = openai_chat_completions_process_stream(
                 url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
                 api_key=model_settings.openai_api_key,
                 chat_completion_request=data,
-                stream_inferface=stream_inferface,
+                stream_interface=stream_interface,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
-            if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                stream_inferface.stream_start()
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_start()
             try:
                 response = openai_chat_completions_request(
                     url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
@@ -169,8 +169,8 @@ def create(
                     chat_completion_request=data,
                 )
             finally:
-                if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                    stream_inferface.stream_end()
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.stream_end()
 
         if inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
@@ -317,8 +317,8 @@ def create(
         # They mention that none of the messages can have names, but it seems to not error out (for now)
 
         data.stream = False
-        if isinstance(stream_inferface, AgentChunkStreamingInterface):
-            stream_inferface.stream_start()
+        if isinstance(stream_interface, AgentChunkStreamingInterface):
+            stream_interface.stream_start()
         try:
             # groq uses the openai chat completions API, so this component should be reusable
             assert model_settings.groq_api_key is not None, "Groq key is missing"
@@ -328,8 +328,8 @@ def create(
                 chat_completion_request=data,
             )
         finally:
-            if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                stream_inferface.stream_end()
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_end()
 
         if inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index f60150ee2a..753e7c22aa 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -157,7 +157,7 @@ def openai_chat_completions_process_stream(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
-    stream_inferface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
+    stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
     create_message_id: bool = True,
     create_message_datetime: bool = True,
 ) -> ChatCompletionResponse:
@@ -167,7 +167,7 @@ def openai_chat_completions_process_stream(
     on the chunks received from the OpenAI-compatible server POST SSE response.
     """
     assert chat_completion_request.stream == True
-    assert stream_inferface is not None, "Required"
+    assert stream_interface is not None, "Required"
 
     # Count the prompt tokens
     # TODO move to post-request?
@@ -220,8 +220,8 @@ def openai_chat_completions_process_stream(
         ),
     )
 
-    if stream_inferface:
-        stream_inferface.stream_start()
+    if stream_interface:
+        stream_interface.stream_start()
 
     n_chunks = 0  # approx == n_tokens
     try:
@@ -230,17 +230,17 @@ def openai_chat_completions_process_stream(
         ):
             assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
 
-            if stream_inferface:
-                if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                    stream_inferface.process_chunk(
+            if stream_interface:
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
                     )
-                elif isinstance(stream_inferface, AgentRefreshStreamingInterface):
-                    stream_inferface.process_refresh(chat_completion_response)
+                elif isinstance(stream_interface, AgentRefreshStreamingInterface):
+                    stream_interface.process_refresh(chat_completion_response)
                 else:
-                    raise TypeError(stream_inferface)
+                    raise TypeError(stream_interface)
 
             if chunk_idx == 0:
                 # initialize the choice objects which we will increment with the deltas
@@ -314,13 +314,13 @@ def openai_chat_completions_process_stream(
             n_chunks += 1
 
     except Exception as e:
-        if stream_inferface:
-            stream_inferface.stream_end()
+        if stream_interface:
+            stream_interface.stream_end()
         print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
         raise e
     finally:
-        if stream_inferface:
-            stream_inferface.stream_end()
+        if stream_interface:
+            stream_interface.stream_end()
 
     # make sure we didn't leave temp stuff in
     assert all([c.finish_reason != TEMP_STREAM_FINISH_REASON for c in chat_completion_response.choices])

From 32fbd71467ed6dc82647d0045feeb930a636c70a Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Fri, 11 Oct 2024 15:58:12 -0700
Subject: [PATCH 6/6] feat: add `VLLMProvider`  (#1866)

Co-authored-by: cpacker <packercharles@gmail.com>
---
 letta/cli/cli.py                  | 20 +-------
 letta/llm_api/llm_api_tools.py    |  4 ++
 letta/llm_api/openai.py           | 10 ++--
 letta/local_llm/vllm/api.py       |  2 +-
 letta/providers.py                | 81 +++++++++++++++++++++++++++----
 letta/schemas/embedding_config.py |  7 +++
 letta/schemas/llm_config.py       |  7 +++
 letta/server/server.py            | 20 ++++++--
 letta/settings.py                 | 10 +++-
 tests/test_providers.py           |  8 +++
 10 files changed, 132 insertions(+), 37 deletions(-)

diff --git a/letta/cli/cli.py b/letta/cli/cli.py
index 160615b7b6..04dbf359a4 100644
--- a/letta/cli/cli.py
+++ b/letta/cli/cli.py
@@ -14,9 +14,7 @@
 from letta.local_llm.constants import ASSISTANT_MESSAGE_CLI_SYMBOL
 from letta.log import get_logger
 from letta.metadata import MetadataStore
-from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import OptionState
-from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ChatMemory, Memory
 from letta.server.server import logger as server_logger
 
@@ -235,12 +233,7 @@ def run(
         # choose from list of llm_configs
         llm_configs = client.list_llm_configs()
         llm_options = [llm_config.model for llm_config in llm_configs]
-
-        # TODO move into LLMConfig as a class method?
-        def prettify_llm_config(llm_config: LLMConfig) -> str:
-            return f"{llm_config.model}" + f" ({llm_config.model_endpoint})" if llm_config.model_endpoint else ""
-
-        llm_choices = [questionary.Choice(title=prettify_llm_config(llm_config), value=llm_config) for llm_config in llm_configs]
+        llm_choices = [questionary.Choice(title=llm_config.pretty_print(), value=llm_config) for llm_config in llm_configs]
 
         # select model
         if len(llm_options) == 0:
@@ -255,17 +248,8 @@ def prettify_llm_config(llm_config: LLMConfig) -> str:
         embedding_configs = client.list_embedding_configs()
         embedding_options = [embedding_config.embedding_model for embedding_config in embedding_configs]
 
-        # TODO move into EmbeddingConfig as a class method?
-        def prettify_embed_config(embedding_config: EmbeddingConfig) -> str:
-            return (
-                f"{embedding_config.embedding_model}" + f" ({embedding_config.embedding_endpoint})"
-                if embedding_config.embedding_endpoint
-                else ""
-            )
-
         embedding_choices = [
-            questionary.Choice(title=prettify_embed_config(embedding_config), value=embedding_config)
-            for embedding_config in embedding_configs
+            questionary.Choice(title=embedding_config.pretty_print(), value=embedding_config) for embedding_config in embedding_configs
         ]
 
         # select model
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 50166a1ceb..9864fafe18 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -70,6 +70,10 @@ def wrapper(*args, **kwargs):
                 return func(*args, **kwargs)
 
             except requests.exceptions.HTTPError as http_err:
+
+                if not hasattr(http_err, "response") or not http_err.response:
+                    raise
+
                 # Retry on specified errors
                 if http_err.response.status_code in error_codes:
                     # Increment retries
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index 753e7c22aa..3d203fe2c6 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -61,6 +61,7 @@ def openai_get_model_list(
         headers["Authorization"] = f"Bearer {api_key}"
 
     printd(f"Sending request to {url}")
+    response = None
     try:
         # TODO add query param "tool" to be true
         response = requests.get(url, headers=headers, params=extra_params)
@@ -71,7 +72,8 @@ def openai_get_model_list(
     except requests.exceptions.HTTPError as http_err:
         # Handle HTTP errors (e.g., response 4XX, 5XX)
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got HTTPError, exception={http_err}, response={response}")
@@ -79,7 +81,8 @@ def openai_get_model_list(
     except requests.exceptions.RequestException as req_err:
         # Handle other requests-related errors (e.g., connection error)
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got RequestException, exception={req_err}, response={response}")
@@ -87,7 +90,8 @@ def openai_get_model_list(
     except Exception as e:
         # Handle other potential errors
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got unknown Exception, exception={e}, response={response}")
diff --git a/letta/local_llm/vllm/api.py b/letta/local_llm/vllm/api.py
index 102b9606d1..48c48b3260 100644
--- a/letta/local_llm/vllm/api.py
+++ b/letta/local_llm/vllm/api.py
@@ -3,7 +3,7 @@
 from letta.local_llm.settings.settings import get_completions_settings
 from letta.local_llm.utils import count_tokens, post_json_auth_request
 
-WEBUI_API_SUFFIX = "/v1/completions"
+WEBUI_API_SUFFIX = "/completions"
 
 
 def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user, grammar=None):
diff --git a/letta/providers.py b/letta/providers.py
index 761fcd7ee7..fa54570846 100644
--- a/letta/providers.py
+++ b/letta/providers.py
@@ -14,14 +14,18 @@
 
 class Provider(BaseModel):
 
-    def list_llm_models(self):
+    def list_llm_models(self) -> List[LLMConfig]:
         return []
 
-    def list_embedding_models(self):
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
         return []
 
-    def get_model_context_window(self, model_name: str):
-        pass
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
+        raise NotImplementedError
+
+    def provider_tag(self) -> str:
+        """String representation of the provider for display purposes"""
+        raise NotImplementedError
 
 
 class LettaProvider(Provider):
@@ -162,7 +166,7 @@ def list_llm_models(self) -> List[LLMConfig]:
             )
         return configs
 
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
 
         import requests
 
@@ -310,7 +314,7 @@ def list_embedding_models(self):
             )
         return configs
 
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
         from letta.llm_api.google_ai import google_ai_get_model_context_window
 
         return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
@@ -371,16 +375,75 @@ def list_embedding_models(self) -> List[EmbeddingConfig]:
             )
         return configs
 
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
         """
         This is hardcoded for now, since there is no API endpoints to retrieve metadata for a model.
         """
         return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, 4096)
 
 
-class VLLMProvider(OpenAIProvider):
+class VLLMChatCompletionsProvider(Provider):
+    """vLLM provider that treats vLLM as an OpenAI /chat/completions proxy"""
+
     # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
-    pass
+    name: str = "vllm"
+    base_url: str = Field(..., description="Base URL for the vLLM API.")
+
+    def list_llm_models(self) -> List[LLMConfig]:
+        # not supported with vLLM
+        from letta.llm_api.openai import openai_get_model_list
+
+        assert self.base_url, "base_url is required for vLLM provider"
+        response = openai_get_model_list(self.base_url, api_key=None)
+
+        configs = []
+        print(response)
+        for model in response["data"]:
+            configs.append(
+                LLMConfig(
+                    model=model["id"],
+                    model_endpoint_type="openai",
+                    model_endpoint=self.base_url,
+                    context_window=model["max_model_len"],
+                )
+            )
+        return configs
+
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # not supported with vLLM
+        return []
+
+
+class VLLMCompletionsProvider(Provider):
+    """This uses /completions API as the backend, not /chat/completions, so we need to specify a model wrapper"""
+
+    # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
+    name: str = "vllm"
+    base_url: str = Field(..., description="Base URL for the vLLM API.")
+    default_prompt_formatter: str = Field(..., description="Default prompt formatter (aka model wrapper)to use on vLLM /completions API.")
+
+    def list_llm_models(self) -> List[LLMConfig]:
+        # not supported with vLLM
+        from letta.llm_api.openai import openai_get_model_list
+
+        response = openai_get_model_list(self.base_url, api_key=None)
+
+        configs = []
+        for model in response["data"]:
+            configs.append(
+                LLMConfig(
+                    model=model["id"],
+                    model_endpoint_type="vllm",
+                    model_endpoint=self.base_url,
+                    model_wrapper=self.default_prompt_formatter,
+                    context_window=model["max_model_len"],
+                )
+            )
+        return configs
+
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # not supported with vLLM
+        return []
 
 
 class CohereProvider(OpenAIProvider):
diff --git a/letta/schemas/embedding_config.py b/letta/schemas/embedding_config.py
index e56b2f8272..31f7ee8da3 100644
--- a/letta/schemas/embedding_config.py
+++ b/letta/schemas/embedding_config.py
@@ -52,3 +52,10 @@ def default_config(cls, model_name: Optional[str] = None, provider: Optional[str
             )
         else:
             raise ValueError(f"Model {model_name} not supported.")
+
+    def pretty_print(self) -> str:
+        return (
+            f"{self.embedding_model}"
+            + (f" [type={self.embedding_endpoint_type}]" if self.embedding_endpoint_type else "")
+            + (f" [ip={self.embedding_endpoint}]" if self.embedding_endpoint else "")
+        )
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 412e6483ee..b3d7f02f0a 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -68,3 +68,10 @@ def default_config(cls, model_name: str):
             )
         else:
             raise ValueError(f"Model {model_name} not supported.")
+
+    def pretty_print(self) -> str:
+        return (
+            f"{self.model}"
+            + (f" [type={self.model_endpoint_type}]" if self.model_endpoint_type else "")
+            + (f" [ip={self.model_endpoint}]" if self.model_endpoint else "")
+        )
diff --git a/letta/server/server.py b/letta/server/server.py
index efd16a784b..08050ac080 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -51,7 +51,8 @@
     OllamaProvider,
     OpenAIProvider,
     Provider,
-    VLLMProvider,
+    VLLMChatCompletionsProvider,
+    VLLMCompletionsProvider,
 )
 from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgentState
 from letta.schemas.api_key import APIKey, APIKeyCreate
@@ -244,12 +245,11 @@ def __init__(
         if model_settings.anthropic_api_key:
             self._enabled_providers.append(AnthropicProvider(api_key=model_settings.anthropic_api_key))
         if model_settings.ollama_base_url:
-            self._enabled_providers.append(OllamaProvider(base_url=model_settings.ollama_base_url))
-        if model_settings.vllm_base_url:
-            self._enabled_providers.append(VLLMProvider(base_url=model_settings.vllm_base_url))
+            self._enabled_providers.append(OllamaProvider(base_url=model_settings.ollama_base_url, api_key=None))
         if model_settings.gemini_api_key:
             self._enabled_providers.append(GoogleAIProvider(api_key=model_settings.gemini_api_key))
         if model_settings.azure_api_key and model_settings.azure_base_url:
+            assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
             self._enabled_providers.append(
                 AzureProvider(
                     api_key=model_settings.azure_api_key,
@@ -257,6 +257,18 @@ def __init__(
                     api_version=model_settings.azure_api_version,
                 )
             )
+        if model_settings.vllm_api_base:
+            # vLLM exposes both a /chat/completions and a /completions endpoint
+            self._enabled_providers.append(
+                VLLMCompletionsProvider(
+                    base_url=model_settings.vllm_api_base,
+                    default_prompt_formatter=model_settings.default_prompt_formatter,
+                )
+            )
+            # NOTE: to use the /chat/completions endpoint, you need to specify extra flags on vLLM startup
+            # see: https://docs.vllm.ai/en/latest/getting_started/examples/openai_chat_completion_client_with_tools.html
+            # e.g. "... --enable-auto-tool-choice --tool-call-parser hermes"
+            self._enabled_providers.append(VLLMChatCompletionsProvider(base_url=model_settings.vllm_api_base))
 
     def save_agents(self):
         """Saves all the agents that are in the in-memory object store"""
diff --git a/letta/settings.py b/letta/settings.py
index 75a55bd9ab..91c7add526 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -4,14 +4,20 @@
 from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
+from letta.local_llm.constants import DEFAULT_WRAPPER_NAME
+
 
 class ModelSettings(BaseSettings):
 
     # env_prefix='my_prefix_'
 
+    # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper
+    # the "model wrapper" is responsible for prompt formatting and function calling parsing
+    default_prompt_formatter: str = DEFAULT_WRAPPER_NAME
+
     # openai
     openai_api_key: Optional[str] = None
-    openai_api_base: Optional[str] = "https://api.openai.com/v1"
+    openai_api_base: str = "https://api.openai.com/v1"
 
     # groq
     groq_api_key: Optional[str] = None
@@ -34,7 +40,7 @@ class ModelSettings(BaseSettings):
     gemini_api_key: Optional[str] = None
 
     # vLLM
-    vllm_base_url: Optional[str] = None
+    vllm_api_base: Optional[str] = None
 
     # openllm
     openllm_auth_type: Optional[str] = None
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 684fed5fbd..01bb8d4115 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -52,3 +52,11 @@ def test_googleai():
     print(models)
 
     provider.list_embedding_models()
+
+
+# def test_vllm():
+#    provider = VLLMProvider(base_url=os.getenv("VLLM_API_BASE"))
+#    models = provider.list_llm_models()
+#    print(models)
+#
+#    provider.list_embedding_models()