Skip to content

Add encoding model to entity/claim extraction config sections (#740) #2

Add encoding model to entity/claim extraction config sections (#740)

Add encoding model to entity/claim extraction config sections (#740) #2

Workflow file for this run

name: Python CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
pull-requests: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
# Only run the for the latest commit
cancel-in-progress: true
env:
POETRY_VERSION: 1.8.3
jobs:
python-ci:
strategy:
matrix:
python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
os: [ubuntu-latest, windows-latest]
env:
DEBUG: 1
GRAPHRAG_LLM_TYPE: "azure_openai_chat"
GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
GRAPHRAG_CACHE_TYPE: "blob"
GRAPHRAG_CACHE_CONNECTION_STRING: ${{ secrets.BLOB_STORAGE_CONNECTION_STRING }}
GRAPHRAG_CACHE_CONTAINER_NAME: "cicache"
GRAPHRAG_CACHE_BASE_DIR": "cache"
GRAPHRAG_LLM_MODEL: gpt-3.5-turbo-16k
GRAPHRAG_EMBEDDING_MODEL: text-embedding-ada-002
# We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4
GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4
GRAPHRAG_LLM_RPM: 270 # 1,080 / 4
GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4
GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4
GRAPHRAG_CHUNK_SIZE: 1200
GRAPHRAG_CHUNK_OVERLAP: 0
# Azure AI Search config
AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: changes
with:
filters: |
python:
- 'graphrag/**/*'
- 'poetry.lock'
- 'pyproject.toml'
- '**/*.py'
- '**/*.toml'
- '**/*.ipynb'
- '.github/workflows/python*.yml'
- 'tests/smoke/*'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: abatilo/actions-poetry@v3.0.0
with:
poetry-version: $POETRY_VERSION
- name: Install dependencies
shell: bash
run: |
poetry self add setuptools wheel
poetry run python -m pip install gensim
poetry install
- name: Check Semversioner
run: |
poetry run semversioner check
- name: Check
run: |
poetry run poe check
- name: Build
run: |
poetry build
- name: Install Azurite
id: azuright
uses: potatoqualitee/azuright@v1.1
- name: Unit Test
run: |
poetry run poe test_unit
- name: Integration Test
run: |
poetry run poe test_integration
# - name: Smoke Test
# if: steps.changes.outputs.python == 'true'
# run: |
# poetry run poe test_smoke
# - uses: actions/upload-artifact@v4
# if: always()
# with:
# name: smoke-test-artifacts-${{ matrix.python-version }}-${{ matrix.poetry-version }}-${{ runner.os }}
# path: tests/fixtures/*/output
# - name: E2E Test
# if: steps.changes.outputs.python == 'true'
# run: |
# ./scripts/e2e-test.sh