Merge 3c34e81 into 876ad3c

hongbo-miao · Dec 25, 2024 · 6c391e4 · 6c391e4
2 parents 876ad3c + 3c34e81
commit 6c391e4
Show file tree

Hide file tree

Showing 25 changed files with 2,181 additions and 1 deletion.
diff --git a/.dockerignore b/.dockerignore
@@ -120,6 +120,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.github/workflows/.static-type-check.yml b/.github/workflows/.static-type-check.yml
@@ -154,19 +154,22 @@ jobs:
           uv run poe static-type-check-python --package=hm-xxhash
           uv run poe static-type-check-python --package=machine-learning.convolutional-neural-network
           uv run poe static-type-check-python --package=machine-learning.dali
+          uv run poe static-type-check-python --package=machine-learning.hm-docling
           uv run poe static-type-check-python --package=machine-learning.feature-store
           uv run poe static-type-check-python --package=machine-learning.graph-neural-network
           uv run poe static-type-check-python --package=machine-learning.hm-gradio.applications.classify-image
           uv run poe static-type-check-python --package=machine-learning.hm-kubeflow.pipelines.calculate
           uv run poe static-type-check-python --package=machine-learning.hm-kubeflow.pipelines.classify-mnist
           uv run poe static-type-check-python --package=machine-learning.hm-langchain.applications.chat-pdf
+          uv run poe static-type-check-python --package=machine-learning.hm-langgraph.applications.chat-pdf
           uv run poe static-type-check-python --package=machine-learning.hm-mlflow.experiments.classify-mnist
           uv run poe static-type-check-python --package=machine-learning.hm-mlflow.experiments.predict-diabetes
           uv run poe static-type-check-python --package=machine-learning.hm-rasa
           uv run poe static-type-check-python --package=machine-learning.hm-streamlit.applications.live-line-chart
           uv run poe static-type-check-python --package=machine-learning.hm-streamlit.applications.map
           uv run poe static-type-check-python --package=machine-learning.hm-supervision.detect-objects
           uv run poe static-type-check-python --package=machine-learning.hugging-face
+          uv run poe static-type-check-python --package=machine-learning.mineru
           uv run poe static-type-check-python --package=machine-learning.neural-forecasting.forecast-air-passenger-number
           uv run poe static-type-check-python --package=machine-learning.reinforcement-learning.cart-pole
           uv run poe static-type-check-python --package=machine-learning.triton.amazon-sagamaker-triton-resnet-50.deploy

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -67,6 +67,7 @@ jobs:
       hm-kubeflow-calculate: ${{ steps.filter.outputs.hm-kubeflow-calculate }}
       hm-kubeflow-classify-mnist: ${{ steps.filter.outputs.hm-kubeflow-classify-mnist }}
       hm-langchain-chat-pdf: ${{ steps.filter.outputs.hm-langchain-chat-pdf }}
+      hm-langgraph-chat-pdf: ${{ steps.filter.outputs.hm-langgraph-chat-pdf }}
       hm-llama-index-chat-pdf: ${{ steps.filter.outputs.hm-llama-index-chat-pdf }}
       hm-mlflow-classify-mnist: ${{ steps.filter.outputs.hm-mlflow-classify-mnist }}
       hm-mlflow-predict-diabetes: ${{ steps.filter.outputs.hm-mlflow-predict-diabetes }}
@@ -269,6 +270,9 @@ jobs:
             hm-langchain-chat-pdf:
               - '.github/workflows/test.yml'
               - 'machine-learning/hm-langchain/applications/chat-pdf/**'
+            hm-langgraph-chat-pdf:
+              - '.github/workflows/test.yml'
+              - 'machine-learning/hm-langgraph/applications/chat-pdf/**'
             hm-llama-index-chat-pdf:
               - '.github/workflows/test.yml'
               - 'machine-learning/hm-llama-index/applications/chat-pdf/**'
@@ -2028,6 +2032,39 @@ jobs:
         with:
           directory: machine-learning/hm-langchain/applications/chat-pdf
 
+  langgraph-chat-pdf-test:
+    name: LangGraph (chat-pdf) | Test
+    needs: detect-changes
+    if: ${{ needs.detect-changes.outputs.hm-langgraph-chat-pdf == 'true' }}
+    runs-on: ubuntu-24.04
+    environment: test
+    timeout-minutes: 10
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4.2.2
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5.1.0
+        with:
+          version: 0.5.11
+          enable-cache: true
+          cache-dependency-glob: machine-learning/hm-langgraph/applications/chat-pdf/uv.lock
+      - name: Set up Python
+        uses: actions/setup-python@v5.3.0
+        with:
+          python-version-file: machine-learning/hm-langgraph/applications/chat-pdf/pyproject.toml
+      - name: Install dependencies
+        working-directory: machine-learning/hm-langgraph/applications/chat-pdf
+        run: |
+          uv sync --dev
+      - name: Test
+        working-directory: machine-learning/hm-langgraph/applications/chat-pdf
+        run: |
+          uv run poe test-coverage
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5.1.2
+        with:
+          directory: machine-learning/hm-langgraph/applications/chat-pdf
+
   llama-index-chat-pdf-test:
     name: LlamaIndex (chat-pdf) | Test
     needs: detect-changes

diff --git a/.gitignore b/.gitignore
@@ -120,6 +120,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc
@@ -128,6 +128,7 @@
     "machine-learning/hm-autogluon/AutogluonModels",
     "machine-learning/hm-docling/data",
     "machine-learning/hm-langchain/applications/*/data",
+    "machine-learning/hm-langgraph/applications/*/data",
     "machine-learning/hm-llama-index/applications/*/data",
     "machine-learning/hm-mlflow/experiments/*/data",
     "machine-learning/hm-mlflow/experiments/*/lightning_logs",

diff --git a/.mergify.yml b/.mergify.yml
@@ -320,6 +320,9 @@ pull_request_rules:
       - or:
           - check-success=LangChain (chat-pdf) | Test
           - check-skipped=LangChain (chat-pdf) | Test
+      - or:
+          - check-success=LangGraph (chat-pdf) | Test
+          - check-skipped=LangGraph (chat-pdf) | Test
       - or:
           - check-success=LlamaIndex (chat-pdf) | Test
           - check-skipped=LlamaIndex (chat-pdf) | Test

diff --git a/.prettierignore b/.prettierignore
@@ -120,6 +120,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.rubocop.yml b/.rubocop.yml
@@ -125,6 +125,7 @@ AllCops:
     - 'machine-learning/hm-autogluon/AutogluonModels/**/*'
     - 'machine-learning/hm-docling/data/**/*'
     - 'machine-learning/hm-langchain/applications/*/data/**/*'
+    - 'machine-learning/hm-langgraph/applications/*/data/**/*'
     - 'machine-learning/hm-llama-index/applications/*/data/**/*'
     - 'machine-learning/hm-mlflow/experiments/*/data/**/*'
     - 'machine-learning/hm-mlflow/experiments/*/lightning_logs/**/*'

diff --git a/.ruff.toml b/.ruff.toml
@@ -119,6 +119,7 @@ exclude = [
   "machine-learning/hm-autogluon/AutogluonModels",
   "machine-learning/hm-docling/data",
   "machine-learning/hm-langchain/applications/*/data",
+  "machine-learning/hm-langgraph/applications/*/data",
   "machine-learning/hm-llama-index/applications/*/data",
   "machine-learning/hm-mlflow/experiments/*/data",
   "machine-learning/hm-mlflow/experiments/*/lightning_logs",

diff --git a/.solhintignore b/.solhintignore
@@ -118,6 +118,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.sqlfluffignore b/.sqlfluffignore
@@ -119,6 +119,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.stylelintignore b/.stylelintignore
@@ -120,6 +120,7 @@ machine-learning/graph-neural-network/wandb
 machine-learning/hm-autogluon/AutogluonModels
 machine-learning/hm-docling/data
 machine-learning/hm-langchain/applications/*/data
+machine-learning/hm-langgraph/applications/*/data
 machine-learning/hm-llama-index/applications/*/data
 machine-learning/hm-mlflow/experiments/*/data
 machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/.textlintignore b/.textlintignore
@@ -120,6 +120,7 @@ machine-learning/graph-neural-network/wandb/**/*
 machine-learning/hm-autogluon/AutogluonModels/**/*
 machine-learning/hm-docling/data/**/*
 machine-learning/hm-langchain/applications/*/data/**/*
+machine-learning/hm-langgraph/applications/*/data/**/*
 machine-learning/hm-llama-index/applications/*/data/**/*
 machine-learning/hm-mlflow/experiments/*/data/**/*
 machine-learning/hm-mlflow/experiments/*/lightning_logs/**/*

diff --git a/.yamllint.yaml b/.yamllint.yaml
@@ -125,6 +125,7 @@ ignore: |
   machine-learning/hm-autogluon/AutogluonModels
   machine-learning/hm-docling/data
   machine-learning/hm-langchain/applications/*/data
+  machine-learning/hm-langgraph/applications/*/data
   machine-learning/hm-llama-index/applications/*/data
   machine-learning/hm-mlflow/experiments/*/data
   machine-learning/hm-mlflow/experiments/*/lightning_logs

diff --git a/Makefile b/Makefile
@@ -372,19 +372,22 @@ static-type-check-python:
 	uv run poe static-type-check-python --package=hm-xxhash
 	uv run poe static-type-check-python --package=machine-learning.convolutional-neural-network
 	uv run poe static-type-check-python --package=machine-learning.dali
+	uv run poe static-type-check-python --package=machine-learning.hm-docling
 	uv run poe static-type-check-python --package=machine-learning.feature-store
 	uv run poe static-type-check-python --package=machine-learning.graph-neural-network
 	uv run poe static-type-check-python --package=machine-learning.hm-gradio.applications.classify-image
 	uv run poe static-type-check-python --package=machine-learning.hm-kubeflow.pipelines.calculate
 	uv run poe static-type-check-python --package=machine-learning.hm-kubeflow.pipelines.classify-mnist
 	uv run poe static-type-check-python --package=machine-learning.hm-langchain.applications.chat-pdf
+	uv run poe static-type-check-python --package=machine-learning.hm-langgraph.applications.chat-pdf
 	uv run poe static-type-check-python --package=machine-learning.hm-mlflow.experiments.classify-mnist
 	uv run poe static-type-check-python --package=machine-learning.hm-mlflow.experiments.predict-diabetes
 	uv run poe static-type-check-python --package=machine-learning.hm-rasa
 	uv run poe static-type-check-python --package=machine-learning.hm-streamlit.applications.live-line-chart
 	uv run poe static-type-check-python --package=machine-learning.hm-streamlit.applications.map
 	uv run poe static-type-check-python --package=machine-learning.hm-supervision.detect-objects
 	uv run poe static-type-check-python --package=machine-learning.hugging-face
+	uv run poe static-type-check-python --package=machine-learning.mineru
 	uv run poe static-type-check-python --package=machine-learning.neural-forecasting.forecast-air-passenger-number
 	uv run poe static-type-check-python --package=machine-learning.reinforcement-learning.cart-pole
 	uv run poe static-type-check-python --package=machine-learning.triton.amazon-sagamaker-triton-resnet-50.deploy

diff --git a/README.md b/README.md
@@ -421,7 +421,8 @@ The diagram illustrates the repository's architecture, which is considered overl
 
 #### Large language model (LLM)
 
-- **LlamaIndex** - LLM application framework
+- **LlamaIndex** - LLM data framework
+- **LangGraph** - LLM workflows library
 - **LangChain** - LLM application framework
 - **MinerU** - Document data extraction
 - **Docling** - Document data extraction

diff --git a/eslint.config.mjs b/eslint.config.mjs
@@ -132,6 +132,7 @@ export default [
       'machine-learning/hm-autogluon/AutogluonModels',
       'machine-learning/hm-docling/data',
       'machine-learning/hm-langchain/applications/*/data',
+      'machine-learning/hm-langgraph/applications/*/data',
       'machine-learning/hm-llama-index/applications/*/data',
       'machine-learning/hm-mlflow/experiments/*/data',
       'machine-learning/hm-mlflow/experiments/*/lightning_logs',

diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/.env.development b/machine-learning/hm-langgraph/applications/chat-pdf/.env.development
@@ -0,0 +1,2 @@
+OPENAI_API_KEY=xxx
+TOKENIZERS_PARALLELISM=true
diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/.env.production b/machine-learning/hm-langgraph/applications/chat-pdf/.env.production
@@ -0,0 +1,2 @@
+OPENAI_API_KEY=xxx
+TOKENIZERS_PARALLELISM=true
diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/Makefile b/machine-learning/hm-langgraph/applications/chat-pdf/Makefile
@@ -0,0 +1,13 @@
+uv-install-python::
+	uv python install
+uv-update-lock-file:
+	uv lock
+uv-install-dependencies:
+	uv sync --dev
+
+uv-run-dev:
+	uv run poe dev
+uv-run-test:
+	uv run poe test
+uv-run-test-coverage:
+	uv run poe test-coverage
diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/pyproject.toml b/machine-learning/hm-langgraph/applications/chat-pdf/pyproject.toml
@@ -0,0 +1,28 @@
+[project]
+name = "hm-langgraph-chat-pdf"
+version = "1.0.0"
+requires-python = "~=3.12.0"
+dependencies = [
+  "docling==2.14.0",
+  "faiss-cpu==1.9.0.post1",
+  "langgraph==0.2.60",
+  "openai==1.58.1",
+  "pydantic-settings==2.7.0",
+  "python-dotenv==1.0.1",
+  "sentence-transformers==3.3.1",
+]
+
+[dependency-groups]
+dev = [
+  "poethepoet==0.31.1",
+  "pytest==8.3.4",
+  "pytest-cov==6.0.0",
+]
+
+[tool.uv]
+package = false
+
+[tool.poe.tasks]
+dev = "python src/main.py"
+test = "pytest --verbose --verbose"
+test-coverage = "pytest --cov=. --cov-report=xml"
diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/src/config.py b/machine-learning/hm-langgraph/applications/chat-pdf/src/config.py
@@ -0,0 +1,16 @@
+import os
+
+from pydantic_settings import BaseSettings
+
+
+def get_env_file() -> str:
+    env = os.getenv("ENV")
+    return ".env.production" if env == "production" else ".env.development"
+
+
+class Config(BaseSettings):
+    OPENAI_API_KEY: str
+    TOKENIZERS_PARALLELISM: bool
+    model_config = {
+        "env_file": get_env_file(),
+    }
diff --git a/machine-learning/hm-langgraph/applications/chat-pdf/src/dummy_test.py b/machine-learning/hm-langgraph/applications/chat-pdf/src/dummy_test.py
@@ -0,0 +1,3 @@
+class TestDummy:
+    def test_dummy(self):
+        assert 1 + 1 == 2
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		OPENAI_API_KEY=xxx
		TOKENIZERS_PARALLELISM=true