From 9c63b79b3558f11154a004aea6f463d499a46ddf Mon Sep 17 00:00:00 2001 From: eliranwong Date: Sat, 28 Dec 2024 15:07:07 +0000 Subject: [PATCH] support custom Azure model names --- setup.py | 2 +- uniquebible/__init__.py | 23 ++++++++++++++++++++++- uniquebible/gui/WorkSpace.py | 7 +++---- uniquebible/latest_changes.txt | 6 ++++++ uniquebible/plugins/menu/Bible Chat.py | 8 +++++++- uniquebible/util/ConfigUtil.py | 3 +++ uniquebible/util/LocalCliHandler.py | 9 ++++----- uniquebible/util/checkup.py | 8 ++++---- uniquebible/util/terminal_text_editor.py | 5 ++--- uniquebible/util/text_editor_checkup.py | 6 +++--- 10 files changed, 55 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index 2f6baa23e5..8d5fbdae92 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ # https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/ setup( name=package, - version="0.2.15", + version="0.2.16", python_requires=">=3.8, <3.13", description=f"UniqueBible App is a cross-platform & offline bible application, integrated with high-quality resources and unique features. Developers: Eliran Wong and Oliver Tseng", long_description=long_description, diff --git a/uniquebible/__init__.py b/uniquebible/__init__.py index fdc8f6ee33..23f4ac247c 100644 --- a/uniquebible/__init__.py +++ b/uniquebible/__init__.py @@ -135,7 +135,7 @@ def isServerAlive(ip, port): # check latest version of azure api at https://learn.microsoft.com/en-us/azure/ai-services/openai/reference config.azure_api_version = "2024-10-21" -def is_CJK(self, text): +def is_CJK(text): for char in text: if 'CJK' in unicodedata.name(char): return True @@ -214,6 +214,27 @@ def getMistralApi_key() -> str: else: return "" +def getOpenAIClient(): + # priority in order: azure > github > openai + if config.azureApi_key: + return AzureOpenAI(azure_endpoint=re.sub("/models[/]*$", "", config.azureBaseUrl),api_version=config.azure_api_version,api_key=config.azureApi_key) + if config.githubApi_key: + return OpenAI(api_key=getGithubApi_key(),base_url="https://models.inference.ai.azure.com") + return OpenAI() + +def extract_text(filepath): + try: + from markitdown import MarkItDown + filepath = filepath.rstrip() + if os.path.isfile(filepath): + if re.search("(\.jpg|\.jpeg|\.png)$", filepath.lower()): + md = MarkItDown(llm_client=getOpenAIClient(), llm_model="gpt-4o") + else: + md = MarkItDown() + return md.convert(filepath) + except: + return "Install markitdown first!" + def getChatResponse(backend, chatMessages) -> Optional[str]: if not isLLMReady(backend) or not backend in config.llm_backends: return None diff --git a/uniquebible/gui/WorkSpace.py b/uniquebible/gui/WorkSpace.py index 142574fd8f..f7d940e2a5 100755 --- a/uniquebible/gui/WorkSpace.py +++ b/uniquebible/gui/WorkSpace.py @@ -1,4 +1,4 @@ -from uniquebible import config +from uniquebible import config, extract_text import re, os, base64, glob, webbrowser, markdown from datetime import datetime import uniquebible.shortcut as sc @@ -197,11 +197,10 @@ def extractTextFromDocument(self, editable=False): html = config.mainWindow.htmlWrapper(html, True, html=False if fileName.lower().endswith(".md") else True) else: try: - import textract - html = textract.process(fileName).decode() + html = extract_text(fileName) html = config.mainWindow.htmlWrapper(html, True, html=False) except: - self.parent.displayMessage("Optional package 'textract' is not installed!") + self.parent.displayMessage("Optional package 'markitdown' is not installed!") self.addHtmlContent(html, editable, os.path.basename(fileName)) def fixNoteFont(self, note): diff --git a/uniquebible/latest_changes.txt b/uniquebible/latest_changes.txt index f7a8a37f5a..a15a0e709c 100755 --- a/uniquebible/latest_changes.txt +++ b/uniquebible/latest_changes.txt @@ -1,5 +1,11 @@ PIP package: +0.2.16 + +* replace textextract package with markitdown + +* support custom model names deployed via Azure service + 0.2.7-0.2.11 * added support of using FREE Github API key diff --git a/uniquebible/plugins/menu/Bible Chat.py b/uniquebible/plugins/menu/Bible Chat.py index 61819d9dfd..5ea6c490fe 100644 --- a/uniquebible/plugins/menu/Bible Chat.py +++ b/uniquebible/plugins/menu/Bible Chat.py @@ -78,12 +78,18 @@ def __init__(self, parent=None): self.apiModelBox = QComboBox() initialIndex = 0 index = 0 - if config.llm_backend in ("openai", "github", "azure"): + if config.llm_backend in ("openai", "github"): for key in ("gpt-4o", "gpt-4o-mini"): self.apiModelBox.addItem(key) if key == config.openaiApi_chat_model: initialIndex = index index += 1 + elif config.llm_backend == "azure": + for key in config.azureOpenAIModels: # users can manually change config.azureOpenAIModels to match custom deployed model names + self.apiModelBox.addItem(key) + if key == config.openaiApi_chat_model: + initialIndex = index + index += 1 elif config.llm_backend == "google": for key in ("gemini-2.0-flash-exp", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-1.5-pro"): self.apiModelBox.addItem(key) diff --git a/uniquebible/util/ConfigUtil.py b/uniquebible/util/ConfigUtil.py index c140fe0ffd..49adf8b3b7 100644 --- a/uniquebible/util/ConfigUtil.py +++ b/uniquebible/util/ConfigUtil.py @@ -332,6 +332,9 @@ def updateModules(module, isInstalled): setConfig("azureApi_key", """ # Azure API Key""", "") + setConfig("azureOpenAIModels", """ + # users can manually change config.azureOpenAIModels to match custom model names deployed via Azure service""", + ["gpt-4o", "gpt-4o-mini"]) setConfig("azureBaseUrl", """ # Github API inference endpoint""", "") diff --git a/uniquebible/util/LocalCliHandler.py b/uniquebible/util/LocalCliHandler.py index 6d90b9e964..da8d9778ac 100644 --- a/uniquebible/util/LocalCliHandler.py +++ b/uniquebible/util/LocalCliHandler.py @@ -1,6 +1,6 @@ # flake8: noqa import re, pprint, os, requests, platform, pydoc, markdown, sys, subprocess, json, shutil, webbrowser, traceback, textwrap, wcwidth, unicodedata -from uniquebible import config +from uniquebible import config, extract_text import threading, time #from duckduckgo_search import ddg from functools import partial @@ -2228,12 +2228,11 @@ def downloadyoutube(self): return "" def opentext(self, editMode=False): - if ("Textract" in config.enabled): + if ("Markitdown" in config.enabled): self.print(self.divider) userInput = self.getPath.getFilePath(check_isfile=True, empty_to_cancel=True) if userInput: - import textract - content = textract.process(userInput).decode() + content = extract_text(userInput) if editMode: self.multilineEditor(content) return "" @@ -2241,7 +2240,7 @@ def opentext(self, editMode=False): return content else: return self.cancelAction() - self.printToolNotFound("textract") + self.printToolNotFound("markitdown") return "" def printToolNotFound(self, tool): diff --git a/uniquebible/util/checkup.py b/uniquebible/util/checkup.py index 8ed93b58ed..2408295243 100644 --- a/uniquebible/util/checkup.py +++ b/uniquebible/util/checkup.py @@ -238,9 +238,9 @@ def isTranslateInstalled(): except: return False -def isTextractInstalled(): +def isMarkitdownInstalled(): try: - import textract + from markitdown import MarkItDown return True except: return False @@ -743,7 +743,7 @@ def runTerminalMode(): ("word-forms", "Generate English Word Forms", isWordformsInstalled), ("lemmagen3", "Lemmatizer", isLemmagen3Installed), ("chinese-english-lookup", "Chinese-to-English word definition", isChineseEnglishLookupInstalled), - #("textract", "Extract text from document", isTextractInstalled), + ("markitdown", "Extract text from document", isMarkitdownInstalled), ("tabulate", "Pretty-print tabular data", isTabulateInstalled), #("apsw", "Another Python SQLite Wrapper", isApswInstalled), ("pyluach", "Hebrew (Jewish) calendar dates", isPyluachInstalled), @@ -799,7 +799,7 @@ def runTerminalMode(): ("word-forms", "Generate English Word Forms", isWordformsInstalled), ("lemmagen3", "Lemmatizer", isLemmagen3Installed), ("chinese-english-lookup", "Chinese-to-English word definition", isChineseEnglishLookupInstalled), - #("textract", "Extract text from document", isTextractInstalled), + ("markitdown", "Extract text from document", isMarkitdownInstalled), ("tabulate", "Pretty-print tabular data", isTabulateInstalled), #("apsw", "Another Python SQLite Wrapper", isApswInstalled), ("pyluach", "Hebrew (Jewish) calendar dates", isPyluachInstalled), diff --git a/uniquebible/util/terminal_text_editor.py b/uniquebible/util/terminal_text_editor.py index 2fd8ee0ebe..a1b2103583 100644 --- a/uniquebible/util/terminal_text_editor.py +++ b/uniquebible/util/terminal_text_editor.py @@ -1,4 +1,4 @@ -from uniquebible import config +from uniquebible import config, extract_text import re, os from uniquebible.util.TextUtil import TextUtil from prompt_toolkit.formatted_text import HTML @@ -685,8 +685,7 @@ def extractFile(self, filepath="", getTextOnly=False): def extractFileText(self, filepath): if os.path.isfile(filepath): - import textract - text = text = textract.process(filepath).decode() + text = extract_text(filepath) # to prevent corrupting original file, users need to specify a file path for saving #self.filepath = filepath self.savedText = text diff --git a/uniquebible/util/text_editor_checkup.py b/uniquebible/util/text_editor_checkup.py index f6a96d2a05..f30ec69746 100644 --- a/uniquebible/util/text_editor_checkup.py +++ b/uniquebible/util/text_editor_checkup.py @@ -72,9 +72,9 @@ def isTranslateInstalled(): return False # optional -def isTextractInstalled(): +def isMarkitdownInstalled(): try: - import textract + from markitdown import MarkItDown return True except: return False @@ -234,7 +234,7 @@ def updateModules(module, isInstalled): ("pyperclip", "Cross-platform clipboard utilities", isPyperclipInstalled), ("Pygments", "Syntax highlighting package", isPygmentsInstalled), ("translate", "Google Translate", isTranslateInstalled), - #("textract", "Extract text from document", isTextractInstalled), + ("markitdown", "Extract text from document", isMarkitdownInstalled), ] for module, feature, isInstalled in optional: checkModule = re.sub("-|_", "", module)