Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda authored Apr 17, 2024
2 parents 2465cc8 + be39708 commit 0bc80a7
Show file tree
Hide file tree
Showing 65 changed files with 621 additions and 184 deletions.
8 changes: 4 additions & 4 deletions .azure/gpu-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ jobs:
matrix:
"torch | 1.x":
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda11.8.0-py3.9-torch1.13"
torch-ver: "1.13.1"
torch-ver: "1.13"
requires: "oldest"
"torch | 2.x":
docker-image: "pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime"
torch-ver: "2.2.1"
docker-image: "pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime"
torch-ver: "2.2"
# how long to run the job before automatically cancelling
timeoutInMinutes: "40"
# how much time to give 'run always even if cancelled tasks' before stopping them
Expand Down Expand Up @@ -91,7 +91,7 @@ jobs:
- bash: |
set -e
pip list
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: {ver}'"
python -c "from torch import __version__ as ver ; assert '.'.join(str(ver).split('.')[:2]) == '$(torch-ver)', f'PyTorch: {ver}'"
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
displayName: "Sanity check"
Expand Down
68 changes: 41 additions & 27 deletions .azure/gpu-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,21 @@ jobs:
- job: unitest_GPU
strategy:
matrix:
"PyTorch | 1.10":
"PyTorch | 1.10 oldest":
# Torch does not have build wheels with old Torch versions for newer CUDA
docker-image: "pytorchlightning/torchmetrics:ubuntu20.04-cuda11.3.1-py3.9-torch1.10"
torch-ver: "1.10.2"
"PyTorch | 1.X":
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda11.8.0-py3.9-torch1.13"
torch-ver: "1.13.1"
"PyTorch | 2.X":
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda12.1.1-py3.11-torch2.2"
torch-ver: "2.2.1"
docker-image: "ubuntu20.04-cuda11.3.1-py3.9-torch1.10"
torch-ver: "1.10"
"PyTorch | 1.X LTS":
docker-image: "ubuntu22.04-cuda11.8.0-py3.9-torch1.13"
torch-ver: "1.13"
"PyTorch | 2.X stable":
docker-image: "ubuntu22.04-cuda12.1.1-py3.11-torch2.2"
torch-ver: "2.2"
"PyTorch | 2.X future":
docker-image: "ubuntu22.04-cuda12.1.1-py3.11-torch2.3"
torch-ver: "2.3"
# how long to run the job before automatically cancelling
timeoutInMinutes: "120"
timeoutInMinutes: "180"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: "2"

Expand All @@ -47,11 +50,12 @@ jobs:
# MKL_THREADING_LAYER: "GNU"
MKL_SERVICE_FORCE_INTEL: "1"
TEST_DIRS: "unittests"
CACHED_REFERENCES: "/var/tmp/cached-references.zip"
# todo: consider unfreeze for master too
FREEZE_REQUIREMENTS: 1

container:
image: "$(docker-image)"
image: "pytorchlightning/torchmetrics:$(docker-image)"
options: "--gpus=all --shm-size=8g -v /var/tmp:/var/tmp"

workspace:
Expand Down Expand Up @@ -119,22 +123,32 @@ jobs:
set -e
pip list
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: installed {ver} but expected $(torch-ver)'"
python -c "from torch import __version__ as ver ; assert '.'.join(str(ver).split('.')[:2]) == '$(torch-ver)', f'PyTorch: installed {ver} but expected $(torch-ver)'"
displayName: "Sanity check"
- bash: |
pip install -q py-tree
py-tree /var/tmp/torch
py-tree /var/tmp/hf
# this gives more the 60k lines and takes a few minutes to run
#py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# make sure the cache exists even it is empty
mkdir -p /var/tmp/cached-references
# copy the cache to the tests folder to be used in the next steps
cp -r /var/tmp/cached-references tests/_cache-references
du -h --max-depth=1 tests/
displayName: "Show caches"
- bash: |
# Check if the file references exists
if [ -f $(CACHED_REFERENCES) ]; then
# Create a directory if it doesn't already exist
mkdir -p tests/_cache-references
# Unzip 'ref.zip' into the directory inside tests folder
unzip -q $(CACHED_REFERENCES) -d tests/_cache-references
ls -lh tests/_cache-references/
else
echo "The file '$(CACHED_REFERENCES)' does not exist."
fi
du -h --max-depth=1 tests/
timeoutInMinutes: "5"
# if pull request, copy the cache to the tests folder to be used in the next steps
condition: eq(variables['Build.Reason'], 'PullRequest')
displayName: "Copy cached refs"
- bash: |
python -m pytest torchmetrics --cov=torchmetrics \
--timeout=240 --durations=50 \
Expand All @@ -143,23 +157,23 @@ jobs:
env:
DOCTEST_DOWNLOAD_TIMEOUT: "180"
SKIP_SLOW_DOCTEST: "1"
workingDirectory: src
workingDirectory: "src/"
timeoutInMinutes: "40"
displayName: "DocTesting"
- bash: |
wget https://pl-public-data.s3.amazonaws.com/metrics/data.zip
unzip -o data.zip
ls -l _data/*
workingDirectory: tests
workingDirectory: "tests/"
displayName: "Pull testing data from S3"
- bash: |
python -m pytest $(TEST_DIRS) \
-m "not DDP" --numprocesses=5 --dist=loadfile \
--cov=torchmetrics --timeout=240 --durations=100 \
--reruns 3 --reruns-delay 1
workingDirectory: tests
workingDirectory: "tests/"
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "90"
Expand All @@ -171,16 +185,16 @@ jobs:
--timeout=240 --durations=100
env:
USE_PYTEST_POOL: "1"
workingDirectory: tests
workingDirectory: "tests/"
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "90"
displayName: "UnitTesting DDP"
- bash: |
# archive potentially updated cache to the machine filesystem to be reused with next jobs
zip -q -r $(CACHED_REFERENCES) tests/_cache-references
du -h --max-depth=1 tests/
# copy potentially updated cache to the machine filesystem to be reused with next jobs
cp -r --update tests/_cache-references /var/tmp/cached-references
# set as extra step to not pollute general cache when jobs fails or crashes
# so do this update only with successful jobs on master
condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest'))
Expand All @@ -192,7 +206,7 @@ jobs:
python -m codecov --token=$(CODECOV_TOKEN) --name="GPU-coverage" \
--commit=$(Build.SourceVersion) --flags=gpu,unittest --env=linux,azure
ls -l
workingDirectory: tests
workingDirectory: "tests/"
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
displayName: "Statistics"
Expand All @@ -205,7 +219,7 @@ jobs:
echo "Processing $fn example..."
python $fn
done
workingDirectory: examples
workingDirectory: "examples/"
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
displayName: "Examples"
Expand Down
7 changes: 6 additions & 1 deletion .github/actions/pull-caches/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ runs:
shell: bash

- name: Cache References
# do not use this cache for dispatch and crone, to enable rebuild caches if needed
if: github.event_name != 'workflow_dispatch' && github.event_name != 'schedule'
continue-on-error: true
uses: actions/cache/restore@v3
with:
Expand All @@ -90,5 +92,8 @@ runs:

- name: Restored References
continue-on-error: true
run: py-tree tests/_cache-references/ --show_hidden
working-directory: tests/
run: |
mkdir -p _cache-references
ls -lh _cache-references/
shell: bash
12 changes: 6 additions & 6 deletions .github/workflows/ci-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ concurrency:

jobs:
check-code:
uses: Lightning-AI/utilities/.github/workflows/check-typing.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/check-typing.yml@v0.11.2
with:
actions-ref: v0.10.1
actions-ref: v0.11.2
extra-typing: "typing"

check-schema:
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.2

check-package:
if: github.event.pull_request.draft == false
uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.2
with:
actions-ref: v0.10.1
actions-ref: v0.11.2
artifact-name: dist-packages-${{ github.sha }}
import-name: "torchmetrics"
testing-matrix: |
Expand All @@ -35,7 +35,7 @@ jobs:
}
check-md-links:
uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.2
with:
base-branch: master
config-file: ".github/markdown-links-config.json"
12 changes: 6 additions & 6 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,24 @@ jobs:
- "1.13.1"
- "2.0.1"
- "2.1.2"
- "2.2.1"
- "2.2.2"
include:
# cover additional python nad PR combinations
- { os: "ubuntu-22.04", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.1" }
- { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.2.1" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.2" }
- { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.2.2" }
# standard mac machine, not the M1
- { os: "macOS-12", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "macOS-12", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "macOS-12", python-version: "3.11", pytorch-version: "2.2.1" }
- { os: "macOS-12", python-version: "3.11", pytorch-version: "2.2.2" }
# using the ARM based M1 machine
- { os: "macOS-14", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "macOS-14", python-version: "3.11", pytorch-version: "2.2.1" }
- { os: "macOS-14", python-version: "3.11", pytorch-version: "2.2.2" }
# some windows
- { os: "windows-2022", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "windows-2022", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "windows-2022", python-version: "3.11", pytorch-version: "2.2.1" }
- { os: "windows-2022", python-version: "3.11", pytorch-version: "2.2.2" }
env:
PYTORCH_URL: "https://download.pytorch.org/whl/cpu/torch_stable.html"
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/clear-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ on:
jobs:
cron-clear:
if: github.event_name == 'schedule'
uses: Lightning-AI/utilities/.github/workflows/clear-cache.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/clear-cache.yml@v0.11.2
with:
pattern: "pip-latest"

direct-clear:
if: github.event_name == 'workflow_dispatch'
uses: Lightning-AI/utilities/.github/workflows/clear-cache.yml@v0.10.1
uses: Lightning-AI/utilities/.github/workflows/clear-cache.yml@v0.11.2
with:
pattern: ${{ inputs.pattern }}
8 changes: 3 additions & 5 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,11 @@ jobs:
# These are the base images for PL release docker images,
# so include at least all the combinations in release-dockers.yml.
- { python: "3.9", pytorch: "1.10", cuda: "11.3.1", ubuntu: "20.04" }
- { python: "3.9", pytorch: "1.11", cuda: "11.8.0", ubuntu: "22.04" }
#- { python: "3.9", pytorch: "1.11", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.9", pytorch: "1.13", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.10", pytorch: "2.0", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.10", pytorch: "2.0", cuda: "12.1.1", ubuntu: "22.04" }
- { python: "3.10", pytorch: "2.1", cuda: "12.1.1", ubuntu: "22.04" }
- { python: "3.11", pytorch: "2.1", cuda: "12.1.1", ubuntu: "22.04" }
- { python: "3.10", pytorch: "2.2", cuda: "12.1.1", ubuntu: "22.04" }
- { python: "3.11", pytorch: "2.2", cuda: "12.1.1", ubuntu: "22.04" }
- { python: "3.11", pytorch: "2.3", cuda: "12.1.1", ubuntu: "22.04" }
steps:
- uses: actions/checkout@v4

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docs-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ env:
TOKENIZERS_PARALLELISM: false
SPHINX_MOCK_REQUIREMENTS: 0
SPHINX_FETCH_ASSETS: 0
SPHINX_PIN_RELEASE_VERSIONS: 1

jobs:
docs-make:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/publish-pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- run: ls -lh dist/
# We do this, since failures on test.pypi aren't that bad
- name: Publish to Test PyPI
uses: pypa/gh-action-pypi-publish@v1.8.12
uses: pypa/gh-action-pypi-publish@v1.8.14
with:
user: __token__
password: ${{ secrets.test_pypi_password }}
Expand All @@ -94,7 +94,7 @@ jobs:
path: dist
- run: ls -lh dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@v1.8.12
uses: pypa/gh-action-pypi-publish@v1.8.14
with:
user: __token__
password: ${{ secrets.pypi_password }}
Expand Down
7 changes: 4 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ci:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
Expand All @@ -46,7 +46,7 @@ repos:
exclude: pyproject.toml

- repo: https://github.com/crate-ci/typos
rev: v1.16.26
rev: v1.20.7
hooks:
- id: typos
# empty to do not write fixes
Expand Down Expand Up @@ -83,6 +83,7 @@ repos:
rev: v3.1.0
hooks:
- id: prettier
files: \.(json|yml|yaml|toml)
# https://prettier.io/docs/en/options.html#print-width
args: ["--print-width=120"]

Expand Down Expand Up @@ -111,7 +112,7 @@ repos:
- id: text-unicode-replacement-char

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
rev: v0.3.5
hooks:
# try to fix what is possible
- id: ruff
Expand Down
2 changes: 0 additions & 2 deletions .prettierignore

This file was deleted.

Loading

0 comments on commit 0bc80a7

Please sign in to comment.