Rework g4f, update Nodejs (#1218)

* fixing browser * fix syntax * try other container as base * use node 20 * use layers * fix chromium * add chromium sandbox and env vars * add chromium flags * clean up * revert to a previous version of g4f * use client * use asyncio * revert to previous version * switching retry method * use str * change defaults * more changes to default models * improve rotation * improve logic * select model * fix err * fix file path * fix defaults * improve logic * add logging, remove provider * skip if provider models is empty * remove provider
Josh-XT · Jun 25, 2024 · 24cb5ba · 24cb5ba
1 parent 1aa8fce
commit 24cb5ba
Show file tree

Hide file tree

Showing 8 changed files with 159 additions and 46 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -8,14 +8,22 @@ ENV PYTHONUNBUFFERED=1 \
     HNSWLIB_NO_NATIVE=1 \
     PATH="/usr/local/bin:$PATH" \
     LD_PRELOAD=libgomp.so.1 \
-    LD_LIBRARY_PATH="/usr/local/lib64/:$LD_LIBRARY_PATH"
+    LD_LIBRARY_PATH="/usr/local/lib64/:$LD_LIBRARY_PATH" \
+    DEBIAN_FRONTEND=noninteractive \
+    CHROME_BIN=/usr/bin/chromium \
+    CHROMIUM_PATH=/usr/bin/chromium \
+    CHROMIUM_FLAGS="--no-sandbox"
 
 # Install system packages
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    apt-get update --fix-missing ; \
-    apt-get upgrade -y ; \
-    curl -sL https://deb.nodesource.com/setup_14.x | bash - ; \
-    apt-get install -y --fix-missing --no-install-recommends git build-essential gcc g++ sqlite3 libsqlite3-dev wget libgomp1 ffmpeg python3 python3-pip python3-dev curl postgresql-client libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libatspi2.0-0 libxcomposite1 nodejs libportaudio2 libasound-dev libreoffice unoconv poppler-utils && \
+    apt-get update --fix-missing && \
+    apt-get upgrade -y && \
+    curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y --fix-missing --no-install-recommends \
+    git build-essential gcc g++ sqlite3 libsqlite3-dev wget libgomp1 ffmpeg \
+    python3 python3-pip python3-dev curl postgresql-client libnss3 libnspr4 \
+    libatk1.0-0 libatk-bridge2.0-0 libcups2 libatspi2.0-0 libxcomposite1 nodejs \
+    libportaudio2 libasound-dev libreoffice unoconv poppler-utils chromium chromium-sandbox && \
     apt-get install -y gcc-10 g++-10 && \
     update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10 && \
     update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 && \
@@ -64,4 +72,4 @@ COPY . .
 
 WORKDIR /agixt
 EXPOSE 7437
-ENTRYPOINT ["sh", "-c", "./launch-backend.sh"]
+ENTRYPOINT ["sh", "-c", "./launch-backend.sh"]
diff --git a/agixt/Globals.py b/agixt/Globals.py
@@ -57,7 +57,7 @@ def get_tokens(text: str) -> int:
         "translation_provider": "default",
         "image_provider": "None",
         "vision_provider": "None",
-        "AI_MODEL": "gemini-pro",
+        "AI_MODEL": "gpt-3.5-turbo",
         "AI_TEMPERATURE": "0.7",
         "AI_TOP_P": "1",
         "MAX_TOKENS": "4096",

diff --git a/agixt/Interactions.py b/agixt/Interactions.py
@@ -628,7 +628,7 @@ async def run(
                     try:
                         search_suggestions = json.loads(search_strings)
                     except:
-                        keywords = extract_keywords(text=search_string, limit=5)
+                        keywords = extract_keywords(text=str(search_strings), limit=5)
                         if keywords:
                             search_string = " ".join(keywords)
                             # add month and year to the end of the search string

diff --git a/agixt/Memories.py b/agixt/Memories.py
@@ -345,8 +345,6 @@ async def write_text_to_memory(
                         documents=chunk,
                     )
                 except:
-                    logging.warning(f"Error writing to memory: {chunk}")
-                    # Try again 5 times before giving up
                     self.failures += 1
                     for i in range(5):
                         try:

diff --git a/agixt/XT.py b/agixt/XT.py
@@ -847,8 +847,9 @@ async def learn_from_file(
                 )
         else:
             timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            if os.path.normpath(file_path).startswith(self.agent_workspace):
-                with open(file_path, "r") as f:
+            fp = os.path.normpath(file_path)
+            if fp.startswith(self.agent_workspace):
+                with open(fp, "r") as f:
                     file_content = f.read()
                 # Check how many lines are in the file content
                 lines = file_content.split("\n")
@@ -857,13 +858,13 @@ async def learn_from_file(
                         await file_reader.write_text_to_memory(
                             user_input=user_input,
                             text=f"Content from file uploaded named `{file_name}` at {timestamp} on line number {line_number + 1}:\n{line}",
-                            external_source=f"file {file_path}",
+                            external_source=f"file {fp}",
                         )
                 else:
                     await file_reader.write_text_to_memory(
                         user_input=user_input,
                         text=f"Content from file uploaded named `{file_name}` at {timestamp}:\n{file_content}",
-                        external_source=f"file {file_path}",
+                        external_source=f"file {fp}",
                     )
                 response = f"Read [{file_name}]({file_url}) into memory."
             else:

diff --git a/agixt/agents/gpt4free/config.json b/agixt/agents/gpt4free/config.json
@@ -8,7 +8,7 @@
     "embeddings_provider": "default",
     "image_provider": "None",
     "vision_provider": "None",
-    "AI_MODEL": "gemini-pro",
+    "AI_MODEL": "gpt-3.5-turbo",
     "AI_TEMPERATURE": "0.4",
     "MAX_TOKENS": "32000",
     "embedder": "default",

diff --git a/agixt/providers/gpt4free.py b/agixt/providers/gpt4free.py
@@ -1,39 +1,146 @@
-from g4f.client import AsyncClient
 import logging
 import asyncio
+import random
+from g4f.Provider import (
+    HuggingChat,
+    ChatgptAi,
+    DeepInfra,
+    ChatBase,
+    Liaobots,
+    FreeGpt,
+    GptGo,
+    Gpt6,
+)
 
 
 class Gpt4freeProvider:
-    def __init__(self, AI_MODEL: str = "gemini-pro", **kwargs):
-        self.requirements = ["g4f"]
-        self.AI_MODEL = AI_MODEL if AI_MODEL else "gemini-pro"
+    def __init__(self, AI_MODEL: str = "gpt-3.5-turbo", **kwargs):
+        self.requirements = ["g4f"]  # Breaking changes were made after g4f v0.2.6.2
+        self.AI_MODEL = AI_MODEL if AI_MODEL else "gpt-3.5-turbo"
+        self.provider = ChatgptAi
+        self.provider_name = "ChatgptAi"
+        self.providers = [
+            {
+                "name": "HuggingChat",
+                "class": HuggingChat,
+                "models": [
+                    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                    "mistralai/Mistral-7B-Instruct-v0.1",
+                    "openchat/openchat_3.5",
+                    "meta-llama/Llama-2-70b-chat-hf",
+                ],
+            },
+            {
+                "name": "ChatgptAi",
+                "class": ChatgptAi,
+                "models": [
+                    "gpt-3.5-turbo",
+                ],
+            },
+            {
+                "name": "DeepInfra",
+                "class": DeepInfra,
+                "models": [
+                    "meta-llama/Meta-Llama-3-70B-Instruct",
+                    "Qwen/Qwen2-72B-Instruct",
+                ],
+            },
+            {
+                "name": "ChatBase",
+                "class": ChatBase,
+                "models": [
+                    "gpt-3.5-turbo",
+                ],
+            },
+            {
+                "name": "Liaobots",
+                "class": Liaobots,
+                "models": [
+                    "gpt-4",
+                ],
+            },
+            {
+                "name": "FreeGpt",
+                "class": FreeGpt,
+                "models": [
+                    "gpt-3.5-turbo",
+                ],
+            },
+            {
+                "name": "GptGo",
+                "class": GptGo,
+                "models": [
+                    "gpt-3.5-turbo",
+                ],
+            },
+            {
+                "name": "Gpt6",
+                "class": Gpt6,
+                "models": [
+                    "gpt-3.5-turbo",
+                ],
+            },
+        ]
+        self.failures = []
 
     @staticmethod
     def services():
         return ["llm"]
 
     async def inference(self, prompt, tokens: int = 0, images: list = []):
-        asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
-        client = AsyncClient()
-        models = ["gemini-pro", "gpt-4-turbo", "gpt-4", "mixtral-8x7b", "mistral-7b"]
+        logging.info(
+            f"[Gpt4Free] Using provider: {self.provider_name} with model: {self.AI_MODEL}"
+        )
         try:
-            response = await client.chat.completions.create(
-                model=self.AI_MODEL,
-                messages=[{"role": "user", "content": prompt}],
-                stream=False,
-            )
-            return str(response.choices[0].message.content)
-        except Exception as e:
-            logging.warning(f"gpt4free API Error: {e}")
-            for model in models:
-                try:
-                    response = await client.chat.completions.create(
-                        model=model,
+            return (
+                await asyncio.gather(
+                    self.provider.create_async(
+                        model=self.AI_MODEL,
                         messages=[{"role": "user", "content": prompt}],
-                        stream=False,
                     )
-                    return str(response.choices[0].message.content)
-                except Exception as e:
-                    logging.warning(f"gpt4free API Error: {e}")
-                    continue
-        return "Unable to retrieve a response from the gpt4free provider."
+                )
+            )[0]
+        except Exception as e:
+            logging.error(f"[Gpt4Free] {e}")
+            self.failures.append(
+                {"provider": self.provider_name, "model": self.AI_MODEL}
+            )
+            if len(self.failures) < len(self.providers):
+                available_providers = self.get_available_providers()
+                if available_providers:
+                    provider = random.choice(available_providers)
+                    self.provider = provider["class"]
+                    self.provider_name = provider["name"]
+                    self.AI_MODEL = random.choice(provider["models"])
+                    logging.info(
+                        f"[Gpt4Free] Switching to provider: {self.provider_name} with model: {self.AI_MODEL}"
+                    )
+                    return await self.inference(
+                        prompt=prompt, tokens=tokens, images=images
+                    )
+                else:
+                    return "No available providers. Unable to retrieve response."
+            else:
+                return "All providers exhausted. Unable to retrieve response."
+
+    def get_available_providers(self):
+        available_providers = []
+        for provider in self.providers:
+            provider_models = provider["models"]
+            if not isinstance(provider_models, list):
+                provider_models = [provider_models]
+            # Remove any models that have failed
+            available_models = [
+                model
+                for model in provider_models
+                if not any(
+                    failure["provider"] == provider["name"]
+                    and failure["model"] == model
+                    for failure in self.failures
+                )
+            ]
+            if available_models:
+                provider_copy = provider.copy()
+                provider_copy["models"] = available_models
+                available_providers.append(provider_copy)
+        return available_providers
diff --git a/requirements.txt b/requirements.txt
@@ -18,12 +18,11 @@ google-api-python-client==2.125.0
 google-auth-oauthlib
 python-multipart==0.0.9
 nest_asyncio
-g4f==0.3.2.0
+g4f==0.2.6.2
 pyotp
 pytz
 openpyxl==3.1.4
-nodriver
-undetected-chromedriver
-selenium-wire
-pywebview
-plyer
+curl_cffi
+PyExecJS
+undetected_chromedriver
+platformdirs