Skip to content

Commit

Permalink
support custom Azure model names
Browse files Browse the repository at this point in the history
  • Loading branch information
eliranwong committed Dec 28, 2024
1 parent fed9745 commit 9c63b79
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 22 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
# https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/
setup(
name=package,
version="0.2.15",
version="0.2.16",
python_requires=">=3.8, <3.13",
description=f"UniqueBible App is a cross-platform & offline bible application, integrated with high-quality resources and unique features. Developers: Eliran Wong and Oliver Tseng",
long_description=long_description,
Expand Down
23 changes: 22 additions & 1 deletion uniquebible/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def isServerAlive(ip, port):
# check latest version of azure api at https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
config.azure_api_version = "2024-10-21"

def is_CJK(self, text):
def is_CJK(text):
for char in text:
if 'CJK' in unicodedata.name(char):
return True
Expand Down Expand Up @@ -214,6 +214,27 @@ def getMistralApi_key() -> str:
else:
return ""

def getOpenAIClient():
# priority in order: azure > github > openai
if config.azureApi_key:
return AzureOpenAI(azure_endpoint=re.sub("/models[/]*$", "", config.azureBaseUrl),api_version=config.azure_api_version,api_key=config.azureApi_key)
if config.githubApi_key:
return OpenAI(api_key=getGithubApi_key(),base_url="https://models.inference.ai.azure.com")
return OpenAI()

def extract_text(filepath):
try:
from markitdown import MarkItDown
filepath = filepath.rstrip()
if os.path.isfile(filepath):
if re.search("(\.jpg|\.jpeg|\.png)$", filepath.lower()):
md = MarkItDown(llm_client=getOpenAIClient(), llm_model="gpt-4o")
else:
md = MarkItDown()
return md.convert(filepath)
except:
return "Install markitdown first!"

def getChatResponse(backend, chatMessages) -> Optional[str]:
if not isLLMReady(backend) or not backend in config.llm_backends:
return None
Expand Down
7 changes: 3 additions & 4 deletions uniquebible/gui/WorkSpace.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from uniquebible import config
from uniquebible import config, extract_text
import re, os, base64, glob, webbrowser, markdown
from datetime import datetime
import uniquebible.shortcut as sc
Expand Down Expand Up @@ -197,11 +197,10 @@ def extractTextFromDocument(self, editable=False):
html = config.mainWindow.htmlWrapper(html, True, html=False if fileName.lower().endswith(".md") else True)
else:
try:
import textract
html = textract.process(fileName).decode()
html = extract_text(fileName)
html = config.mainWindow.htmlWrapper(html, True, html=False)
except:
self.parent.displayMessage("Optional package 'textract' is not installed!")
self.parent.displayMessage("Optional package 'markitdown' is not installed!")
self.addHtmlContent(html, editable, os.path.basename(fileName))

def fixNoteFont(self, note):
Expand Down
6 changes: 6 additions & 0 deletions uniquebible/latest_changes.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
PIP package:

0.2.16

* replace textextract package with markitdown

* support custom model names deployed via Azure service

0.2.7-0.2.11

* added support of using FREE Github API key
Expand Down
8 changes: 7 additions & 1 deletion uniquebible/plugins/menu/Bible Chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,18 @@ def __init__(self, parent=None):
self.apiModelBox = QComboBox()
initialIndex = 0
index = 0
if config.llm_backend in ("openai", "github", "azure"):
if config.llm_backend in ("openai", "github"):
for key in ("gpt-4o", "gpt-4o-mini"):
self.apiModelBox.addItem(key)
if key == config.openaiApi_chat_model:
initialIndex = index
index += 1
elif config.llm_backend == "azure":
for key in config.azureOpenAIModels: # users can manually change config.azureOpenAIModels to match custom deployed model names
self.apiModelBox.addItem(key)
if key == config.openaiApi_chat_model:
initialIndex = index
index += 1
elif config.llm_backend == "google":
for key in ("gemini-2.0-flash-exp", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-1.5-pro"):
self.apiModelBox.addItem(key)
Expand Down
3 changes: 3 additions & 0 deletions uniquebible/util/ConfigUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,9 @@ def updateModules(module, isInstalled):
setConfig("azureApi_key", """
# Azure API Key""",
"")
setConfig("azureOpenAIModels", """
# users can manually change config.azureOpenAIModels to match custom model names deployed via Azure service""",
["gpt-4o", "gpt-4o-mini"])
setConfig("azureBaseUrl", """
# Github API inference endpoint""",
"")
Expand Down
9 changes: 4 additions & 5 deletions uniquebible/util/LocalCliHandler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# flake8: noqa
import re, pprint, os, requests, platform, pydoc, markdown, sys, subprocess, json, shutil, webbrowser, traceback, textwrap, wcwidth, unicodedata
from uniquebible import config
from uniquebible import config, extract_text
import threading, time
#from duckduckgo_search import ddg
from functools import partial
Expand Down Expand Up @@ -2228,20 +2228,19 @@ def downloadyoutube(self):
return ""

def opentext(self, editMode=False):
if ("Textract" in config.enabled):
if ("Markitdown" in config.enabled):
self.print(self.divider)
userInput = self.getPath.getFilePath(check_isfile=True, empty_to_cancel=True)
if userInput:
import textract
content = textract.process(userInput).decode()
content = extract_text(userInput)
if editMode:
self.multilineEditor(content)
return ""
else:
return content
else:
return self.cancelAction()
self.printToolNotFound("textract")
self.printToolNotFound("markitdown")
return ""

def printToolNotFound(self, tool):
Expand Down
8 changes: 4 additions & 4 deletions uniquebible/util/checkup.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,9 @@ def isTranslateInstalled():
except:
return False

def isTextractInstalled():
def isMarkitdownInstalled():
try:
import textract
from markitdown import MarkItDown
return True
except:
return False
Expand Down Expand Up @@ -743,7 +743,7 @@ def runTerminalMode():
("word-forms", "Generate English Word Forms", isWordformsInstalled),
("lemmagen3", "Lemmatizer", isLemmagen3Installed),
("chinese-english-lookup", "Chinese-to-English word definition", isChineseEnglishLookupInstalled),
#("textract", "Extract text from document", isTextractInstalled),
("markitdown", "Extract text from document", isMarkitdownInstalled),
("tabulate", "Pretty-print tabular data", isTabulateInstalled),
#("apsw", "Another Python SQLite Wrapper", isApswInstalled),
("pyluach", "Hebrew (Jewish) calendar dates", isPyluachInstalled),
Expand Down Expand Up @@ -799,7 +799,7 @@ def runTerminalMode():
("word-forms", "Generate English Word Forms", isWordformsInstalled),
("lemmagen3", "Lemmatizer", isLemmagen3Installed),
("chinese-english-lookup", "Chinese-to-English word definition", isChineseEnglishLookupInstalled),
#("textract", "Extract text from document", isTextractInstalled),
("markitdown", "Extract text from document", isMarkitdownInstalled),
("tabulate", "Pretty-print tabular data", isTabulateInstalled),
#("apsw", "Another Python SQLite Wrapper", isApswInstalled),
("pyluach", "Hebrew (Jewish) calendar dates", isPyluachInstalled),
Expand Down
5 changes: 2 additions & 3 deletions uniquebible/util/terminal_text_editor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from uniquebible import config
from uniquebible import config, extract_text
import re, os
from uniquebible.util.TextUtil import TextUtil
from prompt_toolkit.formatted_text import HTML
Expand Down Expand Up @@ -685,8 +685,7 @@ def extractFile(self, filepath="", getTextOnly=False):

def extractFileText(self, filepath):
if os.path.isfile(filepath):
import textract
text = text = textract.process(filepath).decode()
text = extract_text(filepath)
# to prevent corrupting original file, users need to specify a file path for saving
#self.filepath = filepath
self.savedText = text
Expand Down
6 changes: 3 additions & 3 deletions uniquebible/util/text_editor_checkup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def isTranslateInstalled():
return False

# optional
def isTextractInstalled():
def isMarkitdownInstalled():
try:
import textract
from markitdown import MarkItDown
return True
except:
return False
Expand Down Expand Up @@ -234,7 +234,7 @@ def updateModules(module, isInstalled):
("pyperclip", "Cross-platform clipboard utilities", isPyperclipInstalled),
("Pygments", "Syntax highlighting package", isPygmentsInstalled),
("translate", "Google Translate", isTranslateInstalled),
#("textract", "Extract text from document", isTextractInstalled),
("markitdown", "Extract text from document", isMarkitdownInstalled),
]
for module, feature, isInstalled in optional:
checkModule = re.sub("-|_", "", module)
Expand Down

0 comments on commit 9c63b79

Please sign in to comment.