From 1a94d95e47621c602cd662ba0b689e5796497487 Mon Sep 17 00:00:00 2001 From: cragwolfe Date: Sun, 5 Jan 2025 13:50:19 -0800 Subject: [PATCH] chore: dependency bumps, release commit for 0.16.12 (#3831) --- CHANGELOG.md | 6 ++--- requirements/base.txt | 8 +++---- requirements/dev.txt | 6 ++--- requirements/extra-paddleocr.txt | 8 +++---- requirements/extra-pdf-image.txt | 24 +++++++++---------- requirements/extra-pptx.txt | 2 +- requirements/huggingface.txt | 10 ++++---- requirements/test.txt | 22 ++++++++--------- .../outlook/21be155fb0c95885.eml.json | 2 +- .../outlook/497eba8c81c801c6.eml.json | 2 +- .../outlook/4a16a411f162ebbb.eml.json | 2 +- unstructured/__version__.py | 2 +- 12 files changed, 47 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbb31a75b4..165cd0e077 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,15 @@ -## 0.16.12-dev5 +## 0.16.12 ### Enhancements - **Prepare auto-partitioning for pluggable partitioners**. Move toward a uniform partitioner call signature so a custom or override partitioner can be registered without code changes. -- **Add NDJSON file type support** +- **Add NDJSON file type support.** ### Features ### Fixes -- Base image has been updated, trigger new workflows +- **Base image has been updated.** - **Upgrade ruff to latest.** Previously the ruff version was pinned to <0.5. Remove that pin and fix the handful of lint items that resulted. - **CSV with asserted XLS content-type is correctly identified as CSV.** Resolves a bug where a CSV file with an asserted content-type of `application/vnd.ms-excel` was incorrectly identified as an XLS file. - **Improve element-type mapping for Chinese text.** Fixes bug where Chinese text would produce large numbers of false-positive `Title` elements. diff --git a/requirements/base.txt b/requirements/base.txt index 2eb934db6c..6fecb30c04 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -20,11 +20,11 @@ cffi==1.17.1 # via cryptography chardet==5.2.0 # via -r ./base.in -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # requests # unstructured-client -click==8.1.7 +click==8.1.8 # via # nltk # python-oxmsg @@ -64,7 +64,7 @@ langdetect==1.0.9 # via -r ./base.in lxml==5.3.0 # via -r ./base.in -marshmallow==3.23.1 +marshmallow==3.23.2 # via # dataclasses-json # unstructured-client @@ -88,7 +88,7 @@ packaging==24.2 # via # marshmallow # unstructured-client -psutil==6.1.0 +psutil==6.1.1 # via -r ./base.in pycparser==2.22 # via cffi diff --git a/requirements/dev.txt b/requirements/dev.txt index 8f60a228db..30e42eb0ac 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -8,7 +8,7 @@ build==1.2.2.post1 # via pip-tools cfgv==3.4.0 # via pre-commit -click==8.1.7 +click==8.1.8 # via # -c ./base.txt # -c ./test.txt @@ -17,7 +17,7 @@ distlib==0.3.9 # via virtualenv filelock==3.16.1 # via virtualenv -identify==2.6.3 +identify==2.6.4 # via pre-commit importlib-metadata==8.5.0 # via @@ -51,7 +51,7 @@ tomli==2.2.1 # -c ./test.txt # build # pip-tools -virtualenv==20.28.0 +virtualenv==20.28.1 # via pre-commit wheel==0.45.1 # via pip-tools diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index 2a0a0ec835..d7c0fe7226 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -16,7 +16,7 @@ certifi==2024.12.14 # httpcore # httpx # requests -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # -c ./base.txt # requests @@ -58,7 +58,7 @@ imageio==2.36.1 # scikit-image imgaug==0.4.0 # via unstructured-paddleocr -importlib-resources==6.4.5 +importlib-resources==6.5.1 # via matplotlib kiwisolver==1.4.7 # via matplotlib @@ -104,7 +104,7 @@ paddlepaddle==3.0.0b1 # via -r ./extra-paddleocr.in pdf2image==1.17.0 # via unstructured-paddleocr -pillow==11.0.0 +pillow==11.1.0 # via # imageio # imgaug @@ -119,7 +119,7 @@ protobuf==4.25.5 # paddlepaddle pyclipper==1.3.0.post6 # via unstructured-paddleocr -pyparsing==3.2.0 +pyparsing==3.2.1 # via matplotlib python-dateutil==2.9.0.post0 # via diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index eb0b478502..81b61276ef 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -16,7 +16,7 @@ cffi==1.17.1 # via # -c ./base.txt # cryptography -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # -c ./base.txt # pdfminer-six @@ -40,11 +40,11 @@ filelock==3.16.1 # huggingface-hub # torch # transformers -flatbuffers==24.3.25 +flatbuffers==24.12.23 # via onnxruntime fonttools==4.55.3 # via matplotlib -fsspec==2024.10.0 +fsspec==2024.12.0 # via # huggingface-hub # torch @@ -79,11 +79,11 @@ idna==3.10 # via # -c ./base.txt # requests -importlib-resources==6.4.5 +importlib-resources==6.5.1 # via matplotlib iopath==0.1.10 # via layoutparser -jinja2==3.1.4 +jinja2==3.1.5 # via torch kiwisolver==1.4.7 # via matplotlib @@ -149,13 +149,13 @@ pdfminer-six==20231228 # via # -r ./extra-pdf-image.in # pdfplumber -pdfplumber==0.11.4 +pdfplumber==0.11.5 # via layoutparser pi-heif==0.21.0 # via -r ./extra-pdf-image.in -pikepdf==9.4.2 +pikepdf==9.5.0 # via -r ./extra-pdf-image.in -pillow==11.0.0 +pillow==11.1.0 # via # layoutparser # matplotlib @@ -165,7 +165,7 @@ pillow==11.0.0 # pikepdf # torchvision # unstructured-pytesseract -portalocker==3.0.0 +portalocker==3.1.1 # via iopath proto-plus==1.25.0 # via @@ -193,13 +193,13 @@ pycparser==2.22 # via # -c ./base.txt # cffi -pyparsing==3.2.0 +pyparsing==3.2.1 # via matplotlib pypdf==5.1.0 # via # -c ./base.txt # -r ./extra-pdf-image.in -pypdfium2==4.30.0 +pypdfium2==4.30.1 # via pdfplumber python-dateutil==2.9.0.post0 # via @@ -233,7 +233,7 @@ requests==2.32.3 # transformers rsa==4.9 # via google-auth -safetensors==0.4.5 +safetensors==0.5.0 # via # timm # transformers diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt index 87119d3047..5740ad2f5a 100644 --- a/requirements/extra-pptx.txt +++ b/requirements/extra-pptx.txt @@ -6,7 +6,7 @@ # lxml==5.3.0 # via python-pptx -pillow==11.0.0 +pillow==11.1.0 # via python-pptx python-pptx==1.0.2 # via -r ./extra-pptx.in diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 3d662ea51b..7051a2233b 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -8,11 +8,11 @@ certifi==2024.12.14 # via # -c ./base.txt # requests -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # -c ./base.txt # requests -click==8.1.7 +click==8.1.8 # via # -c ./base.txt # sacremoses @@ -21,7 +21,7 @@ filelock==3.16.1 # huggingface-hub # torch # transformers -fsspec==2024.10.0 +fsspec==2024.12.0 # via # huggingface-hub # torch @@ -33,7 +33,7 @@ idna==3.10 # via # -c ./base.txt # requests -jinja2==3.1.4 +jinja2==3.1.5 # via torch joblib==1.4.2 # via @@ -74,7 +74,7 @@ requests==2.32.3 # transformers sacremoses==0.1.1 # via -r ./huggingface.in -safetensors==0.4.5 +safetensors==0.5.0 # via transformers sentencepiece==0.2.0 # via -r ./huggingface.in diff --git a/requirements/test.txt b/requirements/test.txt index 1bc2a98271..a7e1d2cfa2 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -12,7 +12,7 @@ anyio==4.7.0 # httpx appdirs==1.4.4 # via label-studio-sdk -argcomplete==3.5.2 +argcomplete==3.5.3 # via datamodel-code-generator attrs==24.3.0 # via @@ -30,16 +30,16 @@ certifi==2024.12.14 # httpcore # httpx # requests -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # -c ./base.txt # requests -click==8.1.7 +click==8.1.8 # via # -c ./base.txt # black # nltk -coverage[toml]==7.6.9 +coverage[toml]==7.6.10 # via # -r ./test.in # pytest-cov @@ -98,7 +98,7 @@ iniconfig==2.0.0 # via pytest isort==5.13.2 # via datamodel-code-generator -jinja2==3.1.4 +jinja2==3.1.5 # via datamodel-code-generator joblib==1.4.2 # via @@ -126,7 +126,7 @@ mccabe==0.7.0 # via flake8 multidict==6.1.0 # via yarl -mypy==1.13.0 +mypy==1.14.1 # via -r ./test.in mypy-extensions==1.0.0 # via @@ -152,7 +152,7 @@ pandas==2.2.3 # via label-studio-sdk pathspec==0.12.1 # via black -pillow==11.0.0 +pillow==11.1.0 # via label-studio-sdk platformdirs==4.3.6 # via black @@ -164,13 +164,13 @@ pycodestyle==2.12.1 # via # flake8 # flake8-print -pydantic[email]==2.10.3 +pydantic[email]==2.10.4 # via # -r ./test.in # datamodel-code-generator # jsf # label-studio-sdk -pydantic-core==2.27.1 +pydantic-core==2.27.2 # via pydantic pyflakes==3.2.0 # via @@ -218,7 +218,7 @@ rpds-py==0.22.3 # referencing rstr==3.2.2 # via jsf -ruff==0.8.3 +ruff==0.8.5 # via -r ./test.in semantic-version==2.10.0 # via liccheck @@ -279,7 +279,7 @@ urllib3==1.26.20 # -c ./base.txt # requests # vcrpy -vcrpy==6.0.2 +vcrpy==7.0.0 # via -r ./test.in wrapt==1.17.0 # via diff --git a/test_unstructured_ingest/expected-structured-output/outlook/21be155fb0c95885.eml.json b/test_unstructured_ingest/expected-structured-output/outlook/21be155fb0c95885.eml.json index e093ce0335..f141adb731 100644 --- a/test_unstructured_ingest/expected-structured-output/outlook/21be155fb0c95885.eml.json +++ b/test_unstructured_ingest/expected-structured-output/outlook/21be155fb0c95885.eml.json @@ -26,7 +26,7 @@ }, "date_created": "1689435368.0", "date_modified": "1689435537.0", - "filesize_bytes": 9189 + "filesize_bytes": 9179 } } } diff --git a/test_unstructured_ingest/expected-structured-output/outlook/497eba8c81c801c6.eml.json b/test_unstructured_ingest/expected-structured-output/outlook/497eba8c81c801c6.eml.json index d4beb7d76a..228e8f2ab5 100644 --- a/test_unstructured_ingest/expected-structured-output/outlook/497eba8c81c801c6.eml.json +++ b/test_unstructured_ingest/expected-structured-output/outlook/497eba8c81c801c6.eml.json @@ -26,7 +26,7 @@ }, "date_created": "1690248382.0", "date_modified": "1690248401.0", - "filesize_bytes": 9207 + "filesize_bytes": 9197 } } } diff --git a/test_unstructured_ingest/expected-structured-output/outlook/4a16a411f162ebbb.eml.json b/test_unstructured_ingest/expected-structured-output/outlook/4a16a411f162ebbb.eml.json index 8906138fd6..786108b092 100644 --- a/test_unstructured_ingest/expected-structured-output/outlook/4a16a411f162ebbb.eml.json +++ b/test_unstructured_ingest/expected-structured-output/outlook/4a16a411f162ebbb.eml.json @@ -26,7 +26,7 @@ }, "date_created": "1688960344.0", "date_modified": "1689460572.0", - "filesize_bytes": 9254 + "filesize_bytes": 9244 } } } diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 07eda39112..dcd9ca00b7 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.16.12-dev5" # pragma: no cover +__version__ = "0.16.12" # pragma: no cover