Skip to content

Commit

Permalink
feat: update llama-index + dependencies (#2092)
Browse files Browse the repository at this point in the history
* chore: update libraries

* fix: mypy

* chore: more updates

* fix: mypy/black

* chore: fix docker warnings

* fix: mypy

* fix: black
  • Loading branch information
jaluma authored Sep 26, 2024
1 parent 5fbb402 commit 5851b02
Show file tree
Hide file tree
Showing 16 changed files with 2,782 additions and 2,429 deletions.
6 changes: 5 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ services:
ollama:
image: traefik:v2.10
ports:
- "11434:11434"
- "8080:8080"
command:
- "--providers.file.filename=/etc/router.yml"
- "--log.level=ERROR"
Expand All @@ -83,6 +83,8 @@ services:
# Ollama service for the CPU mode
ollama-cpu:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
profiles:
Expand All @@ -92,6 +94,8 @@ services:
# Ollama service for the CUDA mode
ollama-cuda:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
deploy:
Expand Down
5,015 changes: 2,680 additions & 2,335 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion private_gpt/components/ingest/ingest_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None
self.transformations,
show_progress=self.show_progress,
)
self.node_q.put(("process", file_name, documents, nodes))
self.node_q.put(("process", file_name, documents, list(nodes)))
finally:
self.doc_semaphore.release()
self.doc_q.task_done() # unblock Q joins
Expand Down
9 changes: 4 additions & 5 deletions private_gpt/components/llm/llm_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def __init__(self, settings: Settings) -> None:
api_version="",
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
max_new_tokens=settings.llm.max_new_tokens,
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
tokenizer=settings.llm.tokenizer,
Expand Down Expand Up @@ -184,10 +183,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:

return wrapper

Ollama.chat = add_keep_alive(Ollama.chat)
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
Ollama.complete = add_keep_alive(Ollama.complete)
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
Ollama.chat = add_keep_alive(Ollama.chat) # type: ignore
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) # type: ignore
Ollama.complete = add_keep_alive(Ollama.complete) # type: ignore
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) # type: ignore

self.llm = llm

Expand Down
8 changes: 5 additions & 3 deletions private_gpt/components/llm/prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
return prompt

def completion_to_prompt(self, completion: str) -> str:
def completion_to_prompt(self, prompt: str) -> str:
completion = prompt # Fix: Llama-index parameter has to be named as prompt
prompt = self._completion_to_prompt(completion)
logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
return prompt
Expand Down Expand Up @@ -285,8 +286,9 @@ def _completion_to_prompt(self, completion: str) -> str:


def get_prompt_style(
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
| None
prompt_style: (
Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
)
) -> AbstractPromptStyle:
"""Get the prompt style to use from the given string.
Expand Down
5 changes: 3 additions & 2 deletions private_gpt/components/node_store/node_store_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def __init__(self, settings: Settings) -> None:

case "postgres":
try:
from llama_index.core.storage.docstore.postgres_docstore import (
from llama_index.storage.docstore.postgres import ( # type: ignore
PostgresDocumentStore,
)
from llama_index.core.storage.index_store.postgres_index_store import (
from llama_index.storage.index_store.postgres import ( # type: ignore
PostgresIndexStore,
)
except ImportError:
Expand All @@ -55,6 +55,7 @@ def __init__(self, settings: Settings) -> None:
self.index_store = PostgresIndexStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)

self.doc_store = PostgresDocumentStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)
Expand Down
17 changes: 10 additions & 7 deletions private_gpt/components/vector_store/batched_chroma.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from collections.abc import Generator
from typing import Any
from collections.abc import Generator, Sequence
from typing import TYPE_CHECKING, Any

from llama_index.core.schema import BaseNode, MetadataMode
from llama_index.core.vector_stores.utils import node_to_metadata_dict
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore

if TYPE_CHECKING:
from collections.abc import Mapping


def chunk_list(
lst: list[BaseNode], max_chunk_size: int
) -> Generator[list[BaseNode], None, None]:
lst: Sequence[BaseNode], max_chunk_size: int
) -> Generator[Sequence[BaseNode], None, None]:
"""Yield successive max_chunk_size-sized chunks from lst.
Args:
Expand Down Expand Up @@ -60,7 +63,7 @@ def __init__(
)
self.chroma_client = chroma_client

def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
"""Add nodes to index, batching the insertion to avoid issues.
Args:
Expand All @@ -78,8 +81,8 @@ def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:

all_ids = []
for node_chunk in node_chunks:
embeddings = []
metadatas = []
embeddings: list[Sequence[float]] = []
metadatas: list[Mapping[str, Any]] = []
ids = []
documents = []
for node in node_chunk:
Expand Down
15 changes: 11 additions & 4 deletions private_gpt/server/chat/chat_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING

from injector import inject, singleton
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
Expand Down Expand Up @@ -26,6 +27,9 @@
from private_gpt.server.chunks.chunks_service import Chunk
from private_gpt.settings.settings import Settings

if TYPE_CHECKING:
from llama_index.core.postprocessor.types import BaseNodePostprocessor


class Completion(BaseModel):
response: str
Expand Down Expand Up @@ -114,12 +118,15 @@ def _chat_engine(
context_filter=context_filter,
similarity_top_k=self.settings.rag.similarity_top_k,
)
node_postprocessors = [
node_postprocessors: list[BaseNodePostprocessor] = [
MetadataReplacementPostProcessor(target_metadata_key="window"),
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
),
]
if settings.rag.similarity_value:
node_postprocessors.append(
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
)
)

if settings.rag.rerank.enabled:
rerank_postprocessor = SentenceTransformerRerank(
Expand Down
6 changes: 3 additions & 3 deletions private_gpt/server/recipes/summarize/summarize_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def _summarize(
# Add context documents to summarize
if use_context:
# 1. Recover all ref docs
ref_docs: dict[
str, RefDocInfo
] | None = self.storage_context.docstore.get_all_ref_doc_info()
ref_docs: dict[str, RefDocInfo] | None = (
self.storage_context.docstore.get_all_ref_doc_info()
)
if ref_docs is None:
raise ValueError("No documents have been ingested yet.")

Expand Down
26 changes: 13 additions & 13 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
)
prompt_style: Literal[
"default", "llama2", "llama3", "tag", "mistral", "chatml"
] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)
)


Expand Down
1 change: 1 addition & 0 deletions private_gpt/ui/ui.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This file should be imported if and only if you want to run the UI locally."""

import base64
import logging
import time
Expand Down
93 changes: 42 additions & 51 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,63 +7,54 @@ authors = ["Zylon <hi@zylon.ai>"]
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
# PrivateGPT
fastapi = { extras = ["all"], version = "^0.111.0" }
python-multipart = "^0.0.9"
injector = "^0.21.0"
pyyaml = "^6.0.1"
fastapi = { extras = ["all"], version = "^0.115.0" }
python-multipart = "^0.0.10"
injector = "^0.22.0"
pyyaml = "^6.0.2"
watchdog = "^4.0.1"
transformers = "^4.42.3"
transformers = "^4.44.2"
docx2txt = "^0.8"
cryptography = "^3.1"
# LlamaIndex core libs
llama-index-core = "^0.10.52"
llama-index-readers-file = "^0.1.27"
llama-index-core = ">=0.11.2,<0.12.0"
llama-index-readers-file = "*"
# Optional LlamaIndex integration libs
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
llama-index-llms-llama-cpp = {version = "*", optional = true}
llama-index-llms-openai = {version ="*", optional = true}
llama-index-llms-openai-like = {version ="*", optional = true}
llama-index-llms-ollama = {version ="*", optional = true}
llama-index-llms-azure-openai = {version ="*", optional = true}
llama-index-llms-gemini = {version ="*", optional = true}
llama-index-embeddings-ollama = {version ="*", optional = true}
llama-index-embeddings-huggingface = {version ="*", optional = true}
llama-index-embeddings-openai = {version ="*", optional = true}
llama-index-embeddings-azure-openai = {version ="*", optional = true}
llama-index-embeddings-gemini = {version ="*", optional = true}
llama-index-embeddings-mistralai = {version ="*", optional = true}
llama-index-vector-stores-qdrant = {version ="*", optional = true}
llama-index-vector-stores-milvus = {version ="*", optional = true}
llama-index-vector-stores-chroma = {version ="*", optional = true}
llama-index-vector-stores-postgres = {version ="*", optional = true}
llama-index-vector-stores-clickhouse = {version ="*", optional = true}
llama-index-storage-docstore-postgres = {version ="*", optional = true}
llama-index-storage-index-store-postgres = {version ="*", optional = true}
# Postgres
psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true}

# ClickHouse
clickhouse-connect = {version = "^0.7.15", optional = true}
clickhouse-connect = {version = "^0.7.19", optional = true}

# Optional Sagemaker dependency
boto3 = {version ="^1.34.139", optional = true}

# Optional Qdrant client
qdrant-client = {version ="^1.9.0", optional = true}
boto3 = {version ="^1.35.26", optional = true}

# Optional Reranker dependencies
torch = {version ="^2.3.1", optional = true}
sentence-transformers = {version ="^3.0.1", optional = true}
torch = {version ="^2.4.1", optional = true}
sentence-transformers = {version ="^3.1.1", optional = true}

# Optional UI
gradio = {version ="^4.37.2", optional = true}
ffmpy = "0.4.0"

# Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true}

# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}
gradio = {version ="^4.44.0", optional = true}
ffmpy = {version ="^0.4.0", optional = true}

# Optional HF Transformers
einops = {version = "^0.8.0", optional = true}
Expand All @@ -74,11 +65,11 @@ ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-ollama = ["llama-index-llms-ollama"]
llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
llms-gemini = ["llama-index-llms-gemini"]
embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
Expand All @@ -94,14 +85,14 @@ storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-ind
rerank-sentence-transformers = ["torch", "sentence-transformers"]

[tool.poetry.group.dev.dependencies]
black = "^22"
mypy = "^1.2"
pre-commit = "^2"
pytest = "^7"
pytest-cov = "^3"
black = "^24"
mypy = "^1.11"
pre-commit = "^3"
pytest = "^8"
pytest-cov = "^5"
ruff = "^0"
pytest-asyncio = "^0.21.1"
types-pyyaml = "^6.0.12.12"
pytest-asyncio = "^0.24.0"
types-pyyaml = "^6.0.12.20240917"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/fast_api_test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests.fixtures.mock_injector import MockInjector


@pytest.fixture()
@pytest.fixture
def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
if request is not None and hasattr(request, "param"):
injector.bind_settings(request.param or {})
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/ingest_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ def ingest_file(self, path: Path) -> IngestResponse:
return ingest_result


@pytest.fixture()
@pytest.fixture
def ingest_helper(test_client: TestClient) -> IngestHelper:
return IngestHelper(test_client)
2 changes: 1 addition & 1 deletion tests/fixtures/mock_injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ def get(self, interface: type[T]) -> T:
return self.test_injector.get(interface)


@pytest.fixture()
@pytest.fixture
def injector() -> MockInjector:
return MockInjector()
2 changes: 1 addition & 1 deletion tests/server/ingest/test_local_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fastapi.testclient import TestClient


@pytest.fixture()
@pytest.fixture
def file_path() -> str:
return "test.txt"

Expand Down

0 comments on commit 5851b02

Please sign in to comment.