diff --git a/README.md b/README.md
index 1ebc389..f66861c 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,13 @@ for older docker without GUI use `casalioy:latest` might deprecate soon
> Fetch the default models
+```
+cd models
+wget https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin &&
+wget https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin
+cd ../
+```
+
> All set! Proceed with ingesting your [dataset](#ingesting-your-own-dataset)
### Build it from source
@@ -67,12 +74,20 @@ pip uninstall -y llama-cpp-python
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --force llama-cpp-python
```
+> Download the 2 models and place them in a folder called `./models`:
+
+- LLM: default
+ is [ggml-vic7b-q5_1](https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin)
+- Embedding: default
+ to [ggml-model-q4_0](https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin).
+
> > Edit the example.env to fit your models and rename it to .env
```env
# Generic
+# Generic
MODEL_N_CTX=1024
-TEXT_EMBEDDINGS_MODEL=sentence-transformers/all-MiniLM-L6-v2
+TEXT_EMBEDDINGS_MODEL=all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL_TYPE=HF # LlamaCpp or HF
USE_MLOCK=true
@@ -84,12 +99,10 @@ INGEST_CHUNK_OVERLAP=50
# Generation
MODEL_TYPE=LlamaCpp # GPT4All or LlamaCpp
-MODEL_PATH=eachadea/ggml-vicuna-7b-1.1/ggml-vic7b-q5_1.bin
+MODEL_PATH=models/ggml-vic7b-q5_1.bin
MODEL_TEMP=0.8
MODEL_STOP=[STOP]
CHAIN_TYPE=stuff
-N_RETRIEVE_DOCUMENTS=100 # How many documents to retrieve from the db
-N_FORWARD_DOCUMENTS=6 # How many documents to forward to the LLM, chosen among those retrieved
```
This should look like this
@@ -98,13 +111,14 @@ This should look like this
└── repo
├── startLLM.py
├── casalioy
- │ └── ingest.py, load_env.py, startLLM.py, gui.py, ...
+ │ └── ingest.py, load_env.py, startLLM.py, gui.py, __init__.py
├── source_documents
│ └── sample.csv
- │ └── ...
+ │ └── shor.pdfstate_of_the_union.txt
+ │ └── state_of_the_union.txt
├── models
│ ├── ggml-vic7b-q5_1.bin
- │ └── ...
+ │ └── ggml-model-q4_0.bin
└── .env, convert.py, Dockerfile
```
@@ -167,6 +181,7 @@ streamlit run casalioy/gui.py
| Model | BoolQ | PIQA | HellaSwag | WinoGrande | ARC-e | ARC-c | OBQA | Avg. |
|:-------------------------------------------------------------------------------------------------------------------------------------------------|:-----:|:----:|:---------:|:----------:|:-----:|:-----:|:----:|:----:|
+| [ggml-vic-7b-uncensored](https://huggingface.co/datasets/dnato/ggjt-v1-vic7b-uncensored-q4_0.bin/resolve/main/ggjt-v1-vic7b-uncensored-q4_0.bin) | 73.4 | 74.8 | 63.4 | 64.7 | 54.9 | 36.0 | 40.2 | 58.2 |
| [GPT4All-13b-snoozy q5](https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/blob/main/GPT4All-13B-snoozy.ggml.q5_1.bin) | 83.3 | 79.2 | 75.0 | 71.3 | 60.9 | 44.2 | 43.4 | 65.3 |
### models inside of the GPT-J ecosphere
@@ -209,7 +224,6 @@ leaving your environment, and with reasonable performance.
-
# Disclaimer
The contents of this repository are provided "as is" and without warranties of any kind, whether express or implied. We
diff --git a/casalioy/ask_libgen.py b/casalioy/ask_libgen.py
index 3d634aa..17088d0 100644
--- a/casalioy/ask_libgen.py
+++ b/casalioy/ask_libgen.py
@@ -20,12 +20,13 @@
model_temp,
n_gpu_layers,
persist_directory,
+ print_HTML,
+ prompt_HTML,
use_mlock,
)
from casalioy.startLLM import QASystem
-from casalioy.utils import print_HTML, prompt_HTML
-max_doc_size_mb = 5
+max_doc_size_mb = 10
out_path = Path("source_documents/libgen")
logging.getLogger().setLevel(logging.WARNING) # because libgenesis changes it
@@ -37,10 +38,9 @@
def load_documents(keyword: str, n: int = 3) -> None:
"""load random documents from LG using keyword"""
- lg = Libgen(result_limit=100)
+ lg = Libgen()
result = asyncio.run(lg.search(keyword))
dl_N = 0
- print_HTML(f"Searching for interesting documents (max {n})")
with ProgressBar() as pb:
for item_id in pb(result):
if dl_N >= n:
diff --git a/casalioy/ingest.py b/casalioy/ingest.py
index 358229b..49543ae 100644
--- a/casalioy/ingest.py
+++ b/casalioy/ingest.py
@@ -20,13 +20,11 @@
UnstructuredPowerPointLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
-from load_env import chunk_overlap, chunk_size, documents_directory, get_embedding_model, persist_directory
+from load_env import chunk_overlap, chunk_size, documents_directory, get_embedding_model, persist_directory, print_HTML, prompt_HTML
from prompt_toolkit import PromptSession
from prompt_toolkit.shortcuts import ProgressBar
from qdrant_client import QdrantClient, models
-from casalioy.utils import print_HTML, prompt_HTML
-
class Ingester:
"""ingest documents"""
diff --git a/casalioy/load_env.py b/casalioy/load_env.py
index 8accde1..88021e2 100644
--- a/casalioy/load_env.py
+++ b/casalioy/load_env.py
@@ -5,8 +5,9 @@
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings
from langchain.prompts import PromptTemplate
-
-from casalioy.utils import download_if_repo
+from prompt_toolkit import HTML, PromptSession, print_formatted_text
+from prompt_toolkit.styles import Style
+from pyexpat import ExpatError
load_dotenv()
@@ -29,13 +30,8 @@
model_stop = os.environ.get("MODEL_STOP", "")
model_stop = model_stop.split(",") if model_stop else []
chain_type = os.environ.get("CHAIN_TYPE", "refine")
-n_retrieve_documents = int(os.environ.get("N_RETRIEVE_DOCUMENTS", 25))
-n_forward_documents = int(os.environ.get("N_FORWARD_DOCUMENTS", 3))
n_gpu_layers = int(os.environ.get("N_GPU_LAYERS", 0))
-text_embeddings_model = download_if_repo(text_embeddings_model)
-model_path = download_if_repo(model_path)
-
def get_embedding_model() -> tuple[HuggingFaceEmbeddings | LlamaCppEmbeddings, Callable]:
"""get the text embedding model
@@ -90,3 +86,33 @@ def get_prompt_template_kwargs() -> dict[str, PromptTemplate]:
}
case _:
return {}
+
+
+style = Style.from_dict(
+ {
+ "r": "italic gray", # remark
+ "w": "italic yellow", # warning
+ "d": "bold red", # danger
+ "b": "bold",
+ "i": "italic",
+ "question": "ansicyan",
+ "answer": "ansigreen",
+ "source": "ansimagenta",
+ }
+)
+
+
+def print_HTML(text: str, **kwargs) -> None:
+ """print formatted HTML text"""
+ try:
+ print_formatted_text(HTML(text).format(**kwargs), style=style)
+ except (ExpatError, IndexError):
+ print(text)
+
+
+def prompt_HTML(session: PromptSession, prompt: str, **kwargs) -> str:
+ """print formatted HTML text"""
+ try:
+ return session.prompt(HTML(prompt).format(**kwargs), style=style)
+ except (ExpatError, IndexError):
+ print(prompt)
diff --git a/casalioy/startLLM.py b/casalioy/startLLM.py
index daf439e..38ab3b7 100644
--- a/casalioy/startLLM.py
+++ b/casalioy/startLLM.py
@@ -18,13 +18,12 @@
model_stop,
model_temp,
model_type,
- n_forward_documents,
n_gpu_layers,
- n_retrieve_documents,
persist_directory,
+ print_HTML,
+ prompt_HTML,
use_mlock,
)
-from casalioy.utils import print_HTML, prompt_HTML
class QASystem:
@@ -87,7 +86,6 @@ def __init__(
return_source_documents=True,
chain_type_kwargs=get_prompt_template_kwargs(),
)
- self.qa.retriever.search_kwargs = {**self.qa.retriever.search_kwargs, "k": n_forward_documents, "fetch_k": n_retrieve_documents}
def prompt_once(self, query: str) -> tuple[str, str]:
"""run a prompt"""
diff --git a/casalioy/utils.py b/casalioy/utils.py
deleted file mode 100644
index 39ac155..0000000
--- a/casalioy/utils.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""some useful functions"""
-from pathlib import Path
-
-from huggingface_hub import snapshot_download
-from huggingface_hub.utils import HFValidationError, validate_repo_id
-from prompt_toolkit import HTML, PromptSession, print_formatted_text
-from prompt_toolkit.styles import Style
-from pyexpat import ExpatError
-from requests import HTTPError
-
-style = Style.from_dict(
- {
- "r": "italic gray", # remark
- "w": "italic yellow", # warning
- "d": "bold red", # danger
- "b": "bold",
- "i": "italic",
- "question": "ansicyan",
- "answer": "ansigreen",
- "source": "ansimagenta",
- }
-)
-
-
-def print_HTML(text: str, **kwargs) -> None:
- """print formatted HTML text"""
- try:
- for k, v in kwargs.items(): # necessary
- kwargs[k] = str(v).replace("\f", "")
- text = text.replace("\f", "")
- print_formatted_text(HTML(text).format(**kwargs), style=style)
- except ExpatError:
- print(text)
-
-
-def prompt_HTML(session: PromptSession, prompt: str, **kwargs) -> str:
- """print formatted HTML text"""
- try:
- for k, v in kwargs.items(): # necessary
- kwargs[k] = str(v).replace("\f", "")
- prompt = prompt.replace("\f", "")
- return session.prompt(HTML(prompt).format(**kwargs), style=style)
- except ExpatError:
- return input(prompt)
-
-
-def download_if_repo(path: str, file: str = None, allow_patterns: str | list[str] = None) -> str:
- """download model from HF if not local"""
- if allow_patterns is None:
- allow_patterns = ["*.bin", "*.json"]
-
- p = Path(path)
- if p.is_file() or p.is_dir():
- return str(p)
- try:
- split = path.split("/")
- is_dataset = split[0] == "datasets"
- if is_dataset:
- split = split[1:]
- path = "/".join(split)
-
- if path.endswith(".bin"):
- path, file = "/".join(split[: 3 if is_dataset else 2]), split[-1]
- validate_repo_id(path)
- print_HTML("Downloading {model} from HF", model=path)
- new_path = Path(
- snapshot_download(
- repo_id=path,
- allow_patterns=file or allow_patterns,
- local_dir=f"models/{path}",
- repo_type="dataset" if is_dataset else None,
- local_dir_use_symlinks=False,
- )
- )
- if file is not None:
- files = [f for f in new_path.iterdir() if f.is_file() and f.name.endswith(".bin")]
- if len(files) > 1:
- names = "\n".join([f" - {f.name}" for f in files])
- raise ValueError(f"Multiple model files found: \n\n{names}\n\n")
- new_path = files[0]
- return str(new_path.resolve())
-
- except (HFValidationError, HTTPError) as e:
- print_HTML("Could not download model {model} from HF: {e}", model=path, e=e)
diff --git a/example.env b/example.env
index 9aa6465..5a7d07a 100644
--- a/example.env
+++ b/example.env
@@ -1,6 +1,6 @@
# Generic
MODEL_N_CTX=1024
-TEXT_EMBEDDINGS_MODEL=sentence-transformers/all-MiniLM-L6-v2
+TEXT_EMBEDDINGS_MODEL=all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL_TYPE=HF # LlamaCpp or HF
USE_MLOCK=true
@@ -12,10 +12,8 @@ INGEST_CHUNK_OVERLAP=50
# Generation
MODEL_TYPE=LlamaCpp # GPT4All or LlamaCpp
-MODEL_PATH=eachadea/ggml-vicuna-7b-1.1/ggml-vic7b-q5_1.bin
+MODEL_PATH=models/ggml-vic7b-q5_1.bin
MODEL_TEMP=0.8
MODEL_STOP=[STOP]
CHAIN_TYPE=stuff
-N_RETRIEVE_DOCUMENTS=100 # How many documents to retrieve from the db
-N_FORWARD_DOCUMENTS=6 # How many documents to forward to the LLM, chosen among those retrieved
-N_GPU_LAYERS=4
+N_GPU_LAYERS=1
diff --git a/meta.json b/meta.json
new file mode 100644
index 0000000..5af20a4
--- /dev/null
+++ b/meta.json
@@ -0,0 +1 @@
+{"collections": {"db": {"vectors": {"size": 4096, "distance": "Cosine", "hnsw_config": null, "quantization_config": null}, "shard_number": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null}}, "aliases": {}}