Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unittest job to CircleCI #2328

Merged
merged 1 commit into from
Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
432 changes: 272 additions & 160 deletions .circleci/config.yml

Large diffs are not rendered by default.

273 changes: 159 additions & 114 deletions .circleci/config.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -155,106 +155,7 @@ jobs:
- store_test_results:
path: build_results/

binary_linux_conda_cuda:
<<: *binary_common
machine:
image: ubuntu-1604:201903-01
resource_class: gpu.medium
steps:
- checkout_merge
- run:
name: Setup environment
command: |
set -ex

curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -

sudo apt-get update

sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common

curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -

sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"

sudo apt-get update
export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial"
sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3

# Add the package repositories
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list

export NVIDIA_CONTAINER_VERSION="1.0.3-1"
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION}
sudo systemctl restart docker

DRIVER_FN="NVIDIA-Linux-x86_64-440.59.run"
wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
nvidia-smi

- run:
name: Pull docker image
command: |
set -ex
export DOCKER_IMAGE=pytorch/conda-cuda
echo Pulling docker image $DOCKER_IMAGE
docker pull $DOCKER_IMAGE >/dev/null

- run:
name: Build and run tests
command: |
set -ex

cd ${HOME}/project/

export DOCKER_IMAGE=pytorch/conda-cuda
export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION"

docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh

binary_win_conda:
<<: *binary_common
executor: windows-cpu
steps:
- checkout_merge
- run:
command: |
set -ex
source packaging/windows/internal/vc_install_helper.sh
eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
conda activate base
conda install -yq conda-build "conda-package-handling!=1.5.0"
packaging/build_conda.sh
- store_test_results:
path: build_results/

binary_win_conda_cuda:
<<: *binary_common
executor: windows-gpu
steps:
- checkout_merge
- run:
command: |
set -ex
source packaging/windows/internal/vc_install_helper.sh
eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
conda activate base
conda install -yq conda-build "conda-package-handling!=1.5.0"
packaging/build_conda.sh

binary_win_conda_release:
<<: *binary_common
executor: windows-cpu
steps:
Expand All @@ -279,7 +180,7 @@ jobs:
- store_test_results:
path: build_results/

binary_win_wheel_release:
binary_win_wheel:
<<: *binary_common
executor: windows-cpu
steps:
Expand Down Expand Up @@ -385,34 +286,178 @@ jobs:
aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read
done

unittest_linux_cpu:
<<: *binary_common
docker:
- image: "pytorch/manylinux-cuda102"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why cuda image is used to run cpu tests?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I added the original version of this to torchaudio, I could not find CPU version. If you know what's an appropriate image for CPU, let me know, I will also update on torchaudio side.

resource_class: 2xlarge+
steps:
- checkout
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: echo "$(date +"%Y-%U")" > .circleci-weekly
- restore_cache:
{% raw %}
keys:
- env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
- run:
name: Setup
command: .circleci/unittest/linux/scripts/setup_env.sh
- save_cache:
{% raw %}
key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
paths:
- conda
- env
- run:
name: Install torchvision
command: .circleci/unittest/linux/scripts/install.sh
- run:
name: Run tests
command: .circleci/unittest/linux/scripts/run_test.sh
- run:
name: Post process
command: .circleci/unittest/linux/scripts/post_process.sh
- store_test_results:
path: test-results

unittest_linux_gpu:
<<: *binary_common
machine:
image: ubuntu-1604-cuda-10.1:201909-23
resource_class: gpu.small
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't this job and the unittest_linux_cpu basically the same job?

Should we just consolidate them and then add an extra parameter to designate resource_class?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seemethere When I worked on this, I could not find a Docker runner with GPU. My understanding is that for GPU, CircleCI gives Virtual Machine, so we need to run the same script with docker run argument. While non-GPU environment CIrcleCI gives is Docker container.
Let me know if you know a way to request GPU Docker environment (not VM).

environment:
image_name: "pytorch/manylinux-cuda101"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why you prefer CUDA 10.1 vs CUDA 10.2

steps:
- checkout
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: echo "$(date +"%Y-%U")" > .circleci-weekly
- restore_cache:
{% raw %}
keys:
- env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
- run:
name: Setup
command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
- save_cache:
{% raw %}
key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
paths:
- conda
- env
- run:
name: Install torchvision
command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
- run:
name: Run tests
command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
- run:
name: Post Process
command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
- store_test_results:
path: test-results

unittest_windows_cpu:
<<: *binary_common
executor:
name: windows-cpu
steps:
- checkout
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: echo "$(date +"%Y-%U")" > .circleci-weekly
- restore_cache:
{% raw %}
keys:
- env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
- run:
name: Setup
command: .circleci/unittest/windows/scripts/setup_env.sh
- save_cache:
{% raw %}
key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
paths:
- conda
- env
- run:
name: Install torchvision
command: .circleci/unittest/windows/scripts/install.sh
- run:
name: Run tests
command: .circleci/unittest/windows/scripts/run_test.sh
- run:
name: Post process
command: .circleci/unittest/windows/scripts/post_process.sh
- store_test_results:
path: test-results

unittest_windows_gpu:
<<: *binary_common
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here about consolidating build jobs

executor:
name: windows-gpu
environment:
CUDA_VERSION: "10.1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above: why CUDA 10.1 rather than CUDA 10.2?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use CUDA 10.1 before this change just because it is already installed in the image. If you want, we may do it in a follow-up PR.

steps:
- checkout
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: echo "$(date +"%Y-%U")" > .circleci-weekly
- restore_cache:
{% raw %}
keys:
- env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
- run:
name: Setup
command: .circleci/unittest/windows/scripts/setup_env.sh
- save_cache:
{% raw %}
key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
{% endraw %}
paths:
- conda
- env
- run:
name: Install torchvision
command: .circleci/unittest/windows/scripts/install.sh
- run:
name: Run tests
command: .circleci/unittest/windows/scripts/run_test.sh
- run:
name: Post process
command: .circleci/unittest/windows/scripts/post_process.sh
- store_test_results:
path: test-results

workflows:
build:
{%- if True %}
jobs:
- circleci_consistency
{{ workflows(windows_latest_only=True) }}
- binary_linux_conda_cuda:
name: torchvision_linux_py3.8_cu102_cuda
python_version: "3.8"
cu_version: "cu102"
- binary_win_conda:
name: torchvision_win_py3.6_cpu
python_version: "3.6"
cu_version: "cpu"
- binary_win_conda_cuda:
name: torchvision_win_py3.6_cu101
python_version: "3.6"
cu_version: "cu101"
{{ build_workflows(windows_latest_only=True) }}
- python_lint
- python_type_check
- clang_format

unittest:
jobs:
{{ unittest_workflows() }}
nightly:
{%- endif %}
jobs:
- circleci_consistency
- python_lint
- python_type_check
- clang_format
{{ workflows(prefix="nightly_", filter_branch="nightly", upload=True) }}
{{ build_workflows(prefix="nightly_", filter_branch="nightly", upload=True) }}
40 changes: 36 additions & 4 deletions .circleci/regenerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
import os.path


def workflows(prefix='', filter_branch=None, upload=False, indentation=6, windows_latest_only=False):
PYTHON_VERSIONS = ["3.6", "3.7", "3.8"]


def build_workflows(prefix='', filter_branch=None, upload=False, indentation=6, windows_latest_only=False):
w = []
for btype in ["wheel", "conda"]:
for os_type in ["linux", "macos", "win"]:
python_versions = ["3.6", "3.7", "3.8"]
python_versions = PYTHON_VERSIONS
cu_versions = (["cpu", "cu92", "cu101", "cu102"] if os_type == "linux" or os_type == "win" else ["cpu"])
for python_version in python_versions:
for cu_version in cu_versions:
Expand Down Expand Up @@ -97,10 +100,14 @@ def generate_base_workflow(base_workflow_name, python_version, cu_version,
}
}

w = f"binary_{os_type}_{btype}_release" if os_type == "win" else f"binary_{os_type}_{btype}"
w = f"binary_{os_type}_{btype}"
return {w: d}


def gen_filter_branch_tree(*branches):
return {"branches": {"only": [b for b in branches]}}


def generate_upload_workflow(base_workflow_name, os_type, btype, cu_version, *, filter_branch=None):
d = {
"name": f"{base_workflow_name}_upload",
Expand Down Expand Up @@ -131,6 +138,28 @@ def indent(indentation, data_list):
yaml.dump(data_list, default_flow_style=False).splitlines())


def unittest_workflows(indentation=6):
jobs = []
for os_type in ["linux", "windows"]:
for device_type in ["cpu", "gpu"]:
for i, python_version in enumerate(PYTHON_VERSIONS):
job = {
"name": f"unittest_{os_type}_{device_type}_py{python_version}",
"python_version": python_version,
}

if device_type == 'gpu':
if python_version != "3.8":
job['filters'] = gen_filter_branch_tree('master', 'nightly')
job['cu_version'] = 'cu101'
else:
job['cu_version'] = 'cpu'

jobs.append({f"unittest_{os_type}_{device_type}": job})

return indent(indentation, jobs)


if __name__ == "__main__":
d = os.path.dirname(__file__)
env = jinja2.Environment(
Expand All @@ -140,4 +169,7 @@ def indent(indentation, data_list):
)

with open(os.path.join(d, 'config.yml'), 'w') as f:
f.write(env.get_template('config.yml.in').render(workflows=workflows))
f.write(env.get_template('config.yml.in').render(
build_workflows=build_workflows,
unittest_workflows=unittest_workflows,
))
14 changes: 14 additions & 0 deletions .circleci/unittest/linux/scripts/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
channels:
- defaults
dependencies:
- numpy
- pytest
- pytest-cov
- codecov
- pip
- ca-certificates
- pip:
- future
- pillow>=4.1.1
- scipy
- av
Loading