Skip to content

Commit

Permalink
Online video support for VLMs (#10020)
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: litianjian <litianjian@bytedance.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent 97b8475 commit 28b2877
Show file tree
Hide file tree
Showing 12 changed files with 598 additions and 31 deletions.
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def setup(app):
"soundfile",
"gguf",
"lark",
"decord",
]

for mock_target in autodoc_mock_imports:
Expand Down
6 changes: 4 additions & 2 deletions requirements-test.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@ pytest-shard

# testing utils
awscli
decord # required for video tests
einops # required for MPT, qwen-vl and Mamba
httpx
librosa # required for audio tests
opencv-python # required for video tests
peft
requests
ray[adag]==2.35
sentence-transformers # required for embedding
soundfile # required for audio test
sentence-transformers # required for embedding tests
soundfile # required for audio tests
timm # required for internvl test
torch==2.5.1
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.4.4 # required for pixtral test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.4 # required for model evaluation test

Expand Down
57 changes: 50 additions & 7 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile --output-file=requirements-test.txt requirements-test.in
# pip-compile requirements-test.in
#
absl-py==2.1.0
# via rouge-score
Expand All @@ -28,6 +28,10 @@ anyio==4.6.2.post1
# via httpx
argcomplete==3.5.1
# via datamodel-code-generator
async-timeout==4.0.3
# via
# aiohttp
# redis
attrs==24.2.0
# via
# aiohttp
Expand Down Expand Up @@ -90,6 +94,8 @@ datasets==3.0.2
# lm-eval
decorator==5.1.1
# via librosa
decord==0.6.0
# via -r requirements-test.in
dill==0.3.8
# via
# datasets
Expand All @@ -106,6 +112,10 @@ email-validator==2.2.0
# via pydantic
evaluate==0.4.3
# via lm-eval
exceptiongroup==1.2.2
# via
# anyio
# pytest
fastrlock==0.8.2
# via cupy-cuda12x
filelock==3.16.1
Expand Down Expand Up @@ -156,6 +166,8 @@ idna==3.10
# httpx
# requests
# yarl
importlib-resources==6.4.5
# via matplotlib
inflect==5.6.2
# via datamodel-code-generator
iniconfig==2.0.0
Expand All @@ -178,7 +190,9 @@ joblib==1.4.2
jsonlines==4.0.0
# via lm-eval
jsonschema==4.23.0
# via ray
# via
# mistral-common
# ray
jsonschema-specifications==2024.10.1
# via jsonschema
kiwisolver==1.4.7
Expand All @@ -204,6 +218,10 @@ mbstrdecoder==1.1.3
# dataproperty
# pytablewriter
# typepy
mistral-common[opencv]==1.4.4
# via
# -r requirements-test.in
# mistral-common
more-itertools==10.5.0
# via lm-eval
mpmath==1.3.0
Expand Down Expand Up @@ -238,12 +256,15 @@ numpy==1.26.4
# contourpy
# cupy-cuda12x
# datasets
# decord
# evaluate
# librosa
# matplotlib
# mistral-common
# numba
# numexpr
# opencv-python
# opencv-python-headless
# pandas
# peft
# rouge-score
Expand Down Expand Up @@ -288,6 +309,8 @@ nvidia-nvtx-cu12==12.4.127
# via torch
opencv-python==4.10.0.84
# via -r requirements-test.in
opencv-python-headless==4.10.0.84
# via mistral-common
packaging==24.1
# via
# accelerate
Expand Down Expand Up @@ -317,9 +340,10 @@ peft==0.13.2
# via
# -r requirements-test.in
# lm-eval
pillow==11.0.0
pillow==10.4.0
# via
# matplotlib
# mistral-common
# sentence-transformers
# torchvision
platformdirs==4.3.6
Expand Down Expand Up @@ -354,7 +378,9 @@ pybind11==2.13.6
pycparser==2.22
# via cffi
pydantic[email]==2.9.2
# via datamodel-code-generator
# via
# datamodel-code-generator
# mistral-common
pydantic-core==2.23.4
# via pydantic
pyparsing==3.2.0
Expand Down Expand Up @@ -420,6 +446,7 @@ requests==2.32.3
# evaluate
# huggingface-hub
# lm-eval
# mistral-common
# pooch
# ray
# tiktoken
Expand Down Expand Up @@ -456,6 +483,8 @@ scipy==1.13.1
# sentence-transformers
sentence-transformers==3.2.1
# via -r requirements-test.in
sentencepiece==0.2.0
# via mistral-common
six==1.16.0
# via
# python-dateutil
Expand Down Expand Up @@ -486,12 +515,20 @@ tensorizer==2.9.0
# via -r requirements-test.in
threadpoolctl==3.5.0
# via scikit-learn
tiktoken==0.8.0
# via lm-eval
tiktoken==0.7.0
# via
# lm-eval
# mistral-common
timm==1.0.11
# via -r requirements-test.in
tokenizers==0.20.1
# via transformers
toml==0.10.2
# via datamodel-code-generator
tomli==2.0.2
# via
# black
# pytest
torch==2.5.1
# via
# -r requirements-test.in
Expand Down Expand Up @@ -535,8 +572,12 @@ typepy[datetime]==1.3.2
# tabledata
typing-extensions==4.12.2
# via
# anyio
# black
# huggingface-hub
# librosa
# mistral-common
# multidict
# pydantic
# pydantic-core
# torch
Expand All @@ -554,6 +595,8 @@ xxhash==3.5.0
# evaluate
yarl==1.17.1
# via aiohttp
zipp==3.20.2
# via importlib-resources
zstandard==0.23.0
# via lm-eval

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,8 @@ def _read_requirements(filename: str) -> List[str]:
ext_modules=ext_modules,
extras_require={
"tensorizer": ["tensorizer>=2.9.0"],
"audio": ["librosa", "soundfile"] # Required for audio processing
"audio": ["librosa", "soundfile"], # Required for audio processing
"video": ["decord"] # Required for video processing
},
cmdclass={"build_ext": cmake_build_ext} if len(ext_modules) > 0 else {},
package_data=package_data,
Expand Down
Loading

0 comments on commit 28b2877

Please sign in to comment.