Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "models from HF" until checks #64

Merged
merged 1 commit into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ for older docker without GUI use `casalioy:latest` might deprecate soon

> Fetch the default models

```
cd models
wget https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin &&
wget https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin
cd ../
```

> All set! Proceed with ingesting your [dataset](#ingesting-your-own-dataset)

### Build it from source
Expand All @@ -67,12 +74,20 @@ pip uninstall -y llama-cpp-python
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --force llama-cpp-python
```

> Download the 2 models and place them in a folder called `./models`:

- LLM: default
is [ggml-vic7b-q5_1](https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin)
- Embedding: default
to [ggml-model-q4_0](https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin).

> > Edit the example.env to fit your models and rename it to .env

```env
# Generic
# Generic
MODEL_N_CTX=1024
TEXT_EMBEDDINGS_MODEL=sentence-transformers/all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL=all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL_TYPE=HF # LlamaCpp or HF
USE_MLOCK=true

Expand All @@ -84,12 +99,10 @@ INGEST_CHUNK_OVERLAP=50

# Generation
MODEL_TYPE=LlamaCpp # GPT4All or LlamaCpp
MODEL_PATH=eachadea/ggml-vicuna-7b-1.1/ggml-vic7b-q5_1.bin
MODEL_PATH=models/ggml-vic7b-q5_1.bin
MODEL_TEMP=0.8
MODEL_STOP=[STOP]
CHAIN_TYPE=stuff
N_RETRIEVE_DOCUMENTS=100 # How many documents to retrieve from the db
N_FORWARD_DOCUMENTS=6 # How many documents to forward to the LLM, chosen among those retrieved
```

This should look like this
Expand All @@ -98,13 +111,14 @@ This should look like this
└── repo
├── startLLM.py
├── casalioy
│ └── ingest.py, load_env.py, startLLM.py, gui.py, ...
│ └── ingest.py, load_env.py, startLLM.py, gui.py, __init__.py
├── source_documents
│ └── sample.csv
│ └── ...
│ └── shor.pdfstate_of_the_union.txt
│ └── state_of_the_union.txt
├── models
│ ├── ggml-vic7b-q5_1.bin
│ └── ...
│ └── ggml-model-q4_0.bin
└── .env, convert.py, Dockerfile
```

Expand Down Expand Up @@ -167,6 +181,7 @@ streamlit run casalioy/gui.py

| Model | BoolQ | PIQA | HellaSwag | WinoGrande | ARC-e | ARC-c | OBQA | Avg. |
|:-------------------------------------------------------------------------------------------------------------------------------------------------|:-----:|:----:|:---------:|:----------:|:-----:|:-----:|:----:|:----:|
| [ggml-vic-7b-uncensored](https://huggingface.co/datasets/dnato/ggjt-v1-vic7b-uncensored-q4_0.bin/resolve/main/ggjt-v1-vic7b-uncensored-q4_0.bin) | 73.4 | 74.8 | 63.4 | 64.7 | 54.9 | 36.0 | 40.2 | 58.2 |
| [GPT4All-13b-snoozy q5](https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/blob/main/GPT4All-13B-snoozy.ggml.q5_1.bin) | 83.3 | 79.2 | 75.0 | 71.3 | 60.9 | 44.2 | 43.4 | 65.3 |

### models inside of the GPT-J ecosphere
Expand Down Expand Up @@ -209,7 +224,6 @@ leaving your environment, and with reasonable performance.

<br><br>


# Disclaimer

The contents of this repository are provided "as is" and without warranties of any kind, whether express or implied. We
Expand Down
8 changes: 4 additions & 4 deletions casalioy/ask_libgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
model_temp,
n_gpu_layers,
persist_directory,
print_HTML,
prompt_HTML,
use_mlock,
)
from casalioy.startLLM import QASystem
from casalioy.utils import print_HTML, prompt_HTML

max_doc_size_mb = 5
max_doc_size_mb = 10
out_path = Path("source_documents/libgen")

logging.getLogger().setLevel(logging.WARNING) # because libgenesis changes it
Expand All @@ -37,10 +38,9 @@

def load_documents(keyword: str, n: int = 3) -> None:
"""load random documents from LG using keyword"""
lg = Libgen(result_limit=100)
lg = Libgen()
result = asyncio.run(lg.search(keyword))
dl_N = 0
print_HTML(f"<r>Searching for interesting documents (max {n})</r>")
with ProgressBar() as pb:
for item_id in pb(result):
if dl_N >= n:
Expand Down
4 changes: 1 addition & 3 deletions casalioy/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,11 @@
UnstructuredPowerPointLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from load_env import chunk_overlap, chunk_size, documents_directory, get_embedding_model, persist_directory
from load_env import chunk_overlap, chunk_size, documents_directory, get_embedding_model, persist_directory, print_HTML, prompt_HTML
from prompt_toolkit import PromptSession
from prompt_toolkit.shortcuts import ProgressBar
from qdrant_client import QdrantClient, models

from casalioy.utils import print_HTML, prompt_HTML


class Ingester:
"""ingest documents"""
Expand Down
40 changes: 33 additions & 7 deletions casalioy/load_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings
from langchain.prompts import PromptTemplate

from casalioy.utils import download_if_repo
from prompt_toolkit import HTML, PromptSession, print_formatted_text
from prompt_toolkit.styles import Style
from pyexpat import ExpatError

load_dotenv()

Expand All @@ -29,13 +30,8 @@
model_stop = os.environ.get("MODEL_STOP", "")
model_stop = model_stop.split(",") if model_stop else []
chain_type = os.environ.get("CHAIN_TYPE", "refine")
n_retrieve_documents = int(os.environ.get("N_RETRIEVE_DOCUMENTS", 25))
n_forward_documents = int(os.environ.get("N_FORWARD_DOCUMENTS", 3))
n_gpu_layers = int(os.environ.get("N_GPU_LAYERS", 0))

text_embeddings_model = download_if_repo(text_embeddings_model)
model_path = download_if_repo(model_path)


def get_embedding_model() -> tuple[HuggingFaceEmbeddings | LlamaCppEmbeddings, Callable]:
"""get the text embedding model
Expand Down Expand Up @@ -90,3 +86,33 @@ def get_prompt_template_kwargs() -> dict[str, PromptTemplate]:
}
case _:
return {}


style = Style.from_dict(
{
"r": "italic gray", # remark
"w": "italic yellow", # warning
"d": "bold red", # danger
"b": "bold",
"i": "italic",
"question": "ansicyan",
"answer": "ansigreen",
"source": "ansimagenta",
}
)


def print_HTML(text: str, **kwargs) -> None:
"""print formatted HTML text"""
try:
print_formatted_text(HTML(text).format(**kwargs), style=style)
except (ExpatError, IndexError):
print(text)


def prompt_HTML(session: PromptSession, prompt: str, **kwargs) -> str:
"""print formatted HTML text"""
try:
return session.prompt(HTML(prompt).format(**kwargs), style=style)
except (ExpatError, IndexError):
print(prompt)
6 changes: 2 additions & 4 deletions casalioy/startLLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
model_stop,
model_temp,
model_type,
n_forward_documents,
n_gpu_layers,
n_retrieve_documents,
persist_directory,
print_HTML,
prompt_HTML,
use_mlock,
)
from casalioy.utils import print_HTML, prompt_HTML


class QASystem:
Expand Down Expand Up @@ -87,7 +86,6 @@ def __init__(
return_source_documents=True,
chain_type_kwargs=get_prompt_template_kwargs(),
)
self.qa.retriever.search_kwargs = {**self.qa.retriever.search_kwargs, "k": n_forward_documents, "fetch_k": n_retrieve_documents}

def prompt_once(self, query: str) -> tuple[str, str]:
"""run a prompt"""
Expand Down
84 changes: 0 additions & 84 deletions casalioy/utils.py

This file was deleted.

8 changes: 3 additions & 5 deletions example.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generic
MODEL_N_CTX=1024
TEXT_EMBEDDINGS_MODEL=sentence-transformers/all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL=all-MiniLM-L6-v2
TEXT_EMBEDDINGS_MODEL_TYPE=HF # LlamaCpp or HF
USE_MLOCK=true

Expand All @@ -12,10 +12,8 @@ INGEST_CHUNK_OVERLAP=50

# Generation
MODEL_TYPE=LlamaCpp # GPT4All or LlamaCpp
MODEL_PATH=eachadea/ggml-vicuna-7b-1.1/ggml-vic7b-q5_1.bin
MODEL_PATH=models/ggml-vic7b-q5_1.bin
MODEL_TEMP=0.8
MODEL_STOP=[STOP]
CHAIN_TYPE=stuff
N_RETRIEVE_DOCUMENTS=100 # How many documents to retrieve from the db
N_FORWARD_DOCUMENTS=6 # How many documents to forward to the LLM, chosen among those retrieved
N_GPU_LAYERS=4
N_GPU_LAYERS=1
1 change: 1 addition & 0 deletions meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"collections": {"db": {"vectors": {"size": 4096, "distance": "Cosine", "hnsw_config": null, "quantization_config": null}, "shard_number": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null}}, "aliases": {}}