From 1efac6a3fe19e4d62325e2c2915cd84ea277f04f Mon Sep 17 00:00:00 2001 From: Otto L Date: Fri, 15 Mar 2024 16:49:50 +0100 Subject: [PATCH] feat(llm - embed): Add support for Azure OpenAI (#1698) * Add support for Azure OpenAI * fix: wrong default api_version Should be dashes instead of underscores. see: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference * fix: code styling applied "make check" changes * refactor: extend documentation * mention azopenai as available option and extras * add recommended section * include settings-azopenai.yaml configuration file * fix: documentation --- fern/docs/pages/installation/installation.mdx | 29 +- fern/docs/pages/manual/llms.mdx | 37 +++ poetry.lock | 263 +++++++++++++++++- .../embedding/embedding_component.py | 18 ++ private_gpt/components/llm/llm_component.py | 18 ++ private_gpt/settings/settings.py | 26 +- pyproject.toml | 4 + settings-azopenai.yaml | 17 ++ settings.yaml | 9 + 9 files changed, 415 insertions(+), 6 deletions(-) create mode 100644 settings-azopenai.yaml diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index 67431ebdb..b47e2b2b7 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -30,8 +30,8 @@ pyenv local 3.11 PrivateGPT allows to customize the setup -from fully local to cloud based- by deciding the modules to use. Here are the different options available: -- LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike" -- Embeddings: "huggingface", "openai", "sagemaker" +- LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike", "azopenai" +- Embeddings: "huggingface", "openai", "sagemaker", "azopenai" - Vector stores: "qdrant", "chroma", "postgres" - UI: whether or not to enable UI (Gradio) or just go with the API @@ -49,10 +49,12 @@ Where `` can be any of the following: - llms-sagemaker: adds support for Amazon Sagemaker LLM, requires Sagemaker inference endpoints - llms-openai: adds support for OpenAI LLM, requires OpenAI API key - llms-openai-like: adds support for 3rd party LLM providers that are compatible with OpenAI's API +- llms-azopenai: adds support for Azure OpenAI LLM, requires Azure OpenAI inference endpoints - embeddings-ollama: adds support for Ollama Embeddings, requires Ollama running locally - embeddings-huggingface: adds support for local Embeddings using HuggingFace - embeddings-sagemaker: adds support for Amazon Sagemaker Embeddings, requires Sagemaker inference endpoints - embeddings-openai = adds support for OpenAI Embeddings, requires OpenAI API key +- embeddings-azopenai = adds support for Azure OpenAI Embeddings, requires Azure OpenAI inference endpoints - vector-stores-qdrant: adds support for Qdrant vector store - vector-stores-chroma: adds support for Chroma DB vector store - vector-stores-postgres: adds support for Postgres vector store @@ -160,6 +162,29 @@ PrivateGPT will use the already existing `settings-openai.yaml` settings file, w The UI will be available at http://localhost:8001 +### Non-Private, Azure OpenAI-powered test setup + +If you want to test PrivateGPT with Azure OpenAI's LLM and Embeddings -taking into account your data is going to Azure OpenAI!- you can run the following command: + +You need to have access to Azure OpenAI inference endpoints for the LLM and / or the embeddings, and have Azure OpenAI credentials properly configured. + +Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints. + +Then, install PrivateGPT with the following command: +```bash +poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant" +``` + +Once installed, you can run PrivateGPT. + +```bash +PGPT_PROFILES=azopenai make run +``` + +PrivateGPT will use the already existing `settings-azopenai.yaml` settings file, which is already configured to use Azure OpenAI LLM and Embeddings endpoints, and Qdrant. + +The UI will be available at http://localhost:8001 + ### Local, Llama-CPP powered setup If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command: diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx index 3013a0e62..8d9077eaa 100644 --- a/fern/docs/pages/manual/llms.mdx +++ b/fern/docs/pages/manual/llms.mdx @@ -98,6 +98,43 @@ to run an OpenAI compatible server. Then, you can run PrivateGPT using the `sett `PGPT_PROFILES=vllm make run` +### Using Azure OpenAI + +If you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may +decide to run PrivateGPT using Azure OpenAI as the LLM and Embeddings model. + +In order to do so, create a profile `settings-azopenai.yaml` with the following contents: + +```yaml +llm: + mode: azopenai + +embedding: + mode: azopenai + +azopenai: + api_key: # You could skip this configuration and use the AZ_OPENAI_API_KEY env var instead + azure_endpoint: # You could skip this configuration and use the AZ_OPENAI_ENDPOINT env var instead + api_version: # The API version to use. Default is "2023_05_15" + embedding_deployment_name: # You could skip this configuration and use the AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var instead + embedding_model: # Optional model to use. Default is "text-embedding-ada-002" + llm_deployment_name: # You could skip this configuration and use the AZ_OPENAI_LLM_DEPLOYMENT_NAME env var instead + llm_model: # Optional model to use. Default is "gpt-35-turbo" +``` + +And run PrivateGPT loading that profile you just created: + +`PGPT_PROFILES=azopenai make run` + +or + +`PGPT_PROFILES=azopenai poetry run python -m private_gpt` + +When the server is started it will print a log *Application startup complete*. +Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. +You'll notice the speed and quality of response is higher, given you are using Azure OpenAI's servers for the heavy +computations. + ### Using AWS Sagemaker For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker. diff --git a/poetry.lock b/poetry.lock index 6da365eb1..38c2374c0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -274,6 +274,42 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +[[package]] +name = "azure-core" +version = "1.30.1" +description = "Microsoft Azure Core Library for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "azure-core-1.30.1.tar.gz", hash = "sha256:26273a254131f84269e8ea4464f3560c731f29c0c1f69ac99010845f239c1a8f"}, + {file = "azure_core-1.30.1-py3-none-any.whl", hash = "sha256:7c5ee397e48f281ec4dd773d67a0a47a0962ed6fa833036057f9ea067f688e74"}, +] + +[package.dependencies] +requests = ">=2.21.0" +six = ">=1.11.0" +typing-extensions = ">=4.6.0" + +[package.extras] +aio = ["aiohttp (>=3.0)"] + +[[package]] +name = "azure-identity" +version = "1.15.0" +description = "Microsoft Azure Identity Library for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "azure-identity-1.15.0.tar.gz", hash = "sha256:4c28fc246b7f9265610eb5261d65931183d019a23d4b0e99357facb2e6c227c8"}, + {file = "azure_identity-1.15.0-py3-none-any.whl", hash = "sha256:a14b1f01c7036f11f148f22cd8c16e05035293d714458d6b44ddf534d93eb912"}, +] + +[package.dependencies] +azure-core = ">=1.23.0,<2.0.0" +cryptography = ">=2.5" +msal = ">=1.24.0,<2.0.0" +msal-extensions = ">=0.3.0,<2.0.0" + [[package]] name = "backoff" version = "2.2.1" @@ -475,6 +511,70 @@ files = [ {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = true +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "cfgv" version = "3.4.0" @@ -832,6 +932,60 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "42.0.5" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = true +python-versions = ">=3.7" +files = [ + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"}, + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da"}, + {file = "cryptography-42.0.5-cp37-abi3-win32.whl", hash = "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74"}, + {file = "cryptography-42.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940"}, + {file = "cryptography-42.0.5-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30"}, + {file = "cryptography-42.0.5-cp39-abi3-win32.whl", hash = "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413"}, + {file = "cryptography-42.0.5-cp39-abi3-win_amd64.whl", hash = "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"}, + {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "cycler" version = "0.12.1" @@ -2080,6 +2234,22 @@ local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1. postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] +[[package]] +name = "llama-index-embeddings-azure-openai" +version = "0.1.6" +description = "llama-index embeddings azure openai integration" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_embeddings_azure_openai-0.1.6-py3-none-any.whl", hash = "sha256:a84a6d7d67296690e5d20070ce5d9920ec56b0d339338d276eae2a7b2f822b9e"}, + {file = "llama_index_embeddings_azure_openai-0.1.6.tar.gz", hash = "sha256:05092b1b31bd0f45257d161f1e5a17261c60e688f4c6a4fe316557349ac2aebc"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-embeddings-openai = ">=0.1.3,<0.2.0" +llama-index-llms-azure-openai = ">=0.1.3,<0.2.0" + [[package]] name = "llama-index-embeddings-huggingface" version = "0.1.4" @@ -2125,6 +2295,23 @@ files = [ [package.dependencies] llama-index-core = ">=0.10.1,<0.11.0" +[[package]] +name = "llama-index-llms-azure-openai" +version = "0.1.5" +description = "llama-index llms azure openai integration" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_llms_azure_openai-0.1.5-py3-none-any.whl", hash = "sha256:180805a7114198155aad7cc3abdf599142c59242d366b11ee8a9150de35b7773"}, + {file = "llama_index_llms_azure_openai-0.1.5.tar.gz", hash = "sha256:5a1c3d1a6a4fe4d03acb50b61594e6775dc86a431738afa291f3708029299a92"}, +] + +[package.dependencies] +azure-identity = ">=1.15.0,<2.0.0" +httpx = "*" +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + [[package]] name = "llama-index-llms-llama-cpp" version = "0.1.3" @@ -2572,6 +2759,44 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "msal" +version = "1.27.0" +description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +optional = true +python-versions = ">=2.7" +files = [ + {file = "msal-1.27.0-py2.py3-none-any.whl", hash = "sha256:572d07149b83e7343a85a3bcef8e581167b4ac76befcbbb6eef0c0e19643cdc0"}, + {file = "msal-1.27.0.tar.gz", hash = "sha256:3109503c038ba6b307152b0e8d34f98113f2e7a78986e28d0baf5b5303afda52"}, +] + +[package.dependencies] +cryptography = ">=0.6,<45" +PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} +requests = ">=2.0.0,<3" + +[package.extras] +broker = ["pymsalruntime (>=0.13.2,<0.15)"] + +[[package]] +name = "msal-extensions" +version = "1.1.0" +description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +optional = true +python-versions = ">=3.7" +files = [ + {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, + {file = "msal_extensions-1.1.0-py3-none-any.whl", hash = "sha256:01be9711b4c0b1a151450068eeb2c4f0997df3bba085ac299de3a66f585e382f"}, +] + +[package.dependencies] +msal = ">=0.4.1,<2.0.0" +packaging = "*" +portalocker = [ + {version = ">=1.0,<3", markers = "platform_system != \"Windows\""}, + {version = ">=1.6,<3", markers = "platform_system == \"Windows\""}, +] + [[package]] name = "multidict" version = "6.0.4" @@ -2948,6 +3173,7 @@ optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, + {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"}, {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"}, ] @@ -3708,6 +3934,17 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.6.0" +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + [[package]] name = "pydantic" version = "2.5.2" @@ -3902,6 +4139,26 @@ files = [ plugins = ["importlib-metadata"] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + [[package]] name = "pymupdf" version = "1.23.25" @@ -5938,10 +6195,12 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] +embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] +llms-azopenai = ["llama-index-llms-azure-openai"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-ollama = ["llama-index-llms-ollama"] llms-openai = ["llama-index-llms-openai"] @@ -5956,4 +6215,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "689df29f4f2209e7ae6638563f4bb25700d1454098d0c728a164a708d42fa377" +content-hash = "3d5f21e5e41ea66d655891a6d9b01bcdd8348b275e27a54e90b65ac9d5719981" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index f384262db..2967c38b9 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -72,6 +72,24 @@ def __init__(self, settings: Settings) -> None: model_name=ollama_settings.embedding_model, base_url=ollama_settings.api_base, ) + case "azopenai": + try: + from llama_index.embeddings.azure_openai import ( # type: ignore + AzureOpenAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`" + ) from e + + azopenai_settings = settings.azopenai + self.embedding_model = AzureOpenAIEmbedding( + model=azopenai_settings.embedding_model, + deployment_name=azopenai_settings.embedding_deployment_name, + api_key=azopenai_settings.api_key, + azure_endpoint=azopenai_settings.azure_endpoint, + api_version=azopenai_settings.api_version, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index d4e13a585..953209a86 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -132,5 +132,23 @@ def __init__(self, settings: Settings) -> None: context_window=settings.llm.context_window, additional_kwargs=settings_kwargs, ) + case "azopenai": + try: + from llama_index.llms.azure_openai import ( # type: ignore + AzureOpenAI, + ) + except ImportError as e: + raise ImportError( + "Azure OpenAI dependencies not found, install with `poetry install --extras llms-azopenai`" + ) from e + + azopenai_settings = settings.azopenai + self.llm = AzureOpenAI( + model=azopenai_settings.llm_model, + deployment_name=azopenai_settings.llm_deployment_name, + api_key=azopenai_settings.api_key, + azure_endpoint=azopenai_settings.azure_endpoint, + api_version=azopenai_settings.api_version, + ) case "mock": self.llm = MockLLM() diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 868a3cde4..4c274384a 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,9 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"] + mode: Literal[ + "llamacpp", "openai", "openailike", "azopenai", "sagemaker", "mock", "ollama" + ] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.", @@ -152,7 +154,7 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): - mode: Literal["huggingface", "openai", "sagemaker", "ollama", "mock"] + mode: Literal["huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock"] ingest_mode: Literal["simple", "batch", "parallel"] = Field( "simple", description=( @@ -239,6 +241,25 @@ class OllamaSettings(BaseModel): ) +class AzureOpenAISettings(BaseModel): + api_key: str + azure_endpoint: str + api_version: str = Field( + "2023_05_15", + description="The API version to use for this operation. This follows the YYYY-MM-DD format.", + ) + embedding_deployment_name: str + embedding_model: str = Field( + "text-embedding-ada-002", + description="OpenAI Model to use. Example: 'text-embedding-ada-002'.", + ) + llm_deployment_name: str + llm_model: str = Field( + "gpt-35-turbo", + description="OpenAI Model to use. Example: 'gpt-4'.", + ) + + class UISettings(BaseModel): enabled: bool path: str @@ -349,6 +370,7 @@ class Settings(BaseModel): sagemaker: SagemakerSettings openai: OpenAISettings ollama: OllamaSettings + azopenai: AzureOpenAISettings vectorstore: VectorstoreSettings nodestore: NodeStoreSettings qdrant: QdrantSettings | None = None diff --git a/pyproject.toml b/pyproject.toml index 1391fb214..d56899987 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,9 +21,11 @@ llama-index-llms-llama-cpp = {version = "^0.1.3", optional = true} llama-index-llms-openai = {version = "^0.1.6", optional = true} llama-index-llms-openai-like = {version ="^0.1.3", optional = true} llama-index-llms-ollama = {version ="^0.1.2", optional = true} +llama-index-llms-azure-openai = {version ="^0.1.5", optional = true} llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true} llama-index-embeddings-openai = {version ="^0.1.6", optional = true} +llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true} llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true} @@ -45,10 +47,12 @@ llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] llms-ollama = ["llama-index-llms-ollama"] llms-sagemaker = ["boto3"] +llms-azopenai = ["llama-index-llms-azure-openai"] embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] +embeddings-azopenai = ["llama-index-embeddings-azure-openai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] vector-stores-postgres = ["llama-index-vector-stores-postgres"] diff --git a/settings-azopenai.yaml b/settings-azopenai.yaml new file mode 100644 index 000000000..7e4b47c1c --- /dev/null +++ b/settings-azopenai.yaml @@ -0,0 +1,17 @@ +server: + env_name: ${APP_ENV:azopenai} + +llm: + mode: azopenai + +embedding: + mode: azopenai + +azopenai: + api_key: ${AZ_OPENAI_API_KEY:} + azure_endpoint: ${AZ_OPENAI_ENDPOINT:} + embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} + llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} + api_version: "2023-05-15" + embedding_model: text-embedding-ada-002 + llm_model: gpt-35-turbo \ No newline at end of file diff --git a/settings.yaml b/settings.yaml index 862844a19..0b4cb3414 100644 --- a/settings.yaml +++ b/settings.yaml @@ -89,3 +89,12 @@ ollama: llm_model: llama2 embedding_model: nomic-embed-text api_base: http://localhost:11434 + +azopenai: + api_key: ${AZ_OPENAI_API_KEY:} + azure_endpoint: ${AZ_OPENAI_ENDPOINT:} + embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} + llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} + api_version: "2023-05-15" + embedding_model: text-embedding-ada-002 + llm_model: gpt-35-turbo