Skip to content

Commit

Permalink
Merge branch 'awslabs:main' into sharding
Browse files Browse the repository at this point in the history
  • Loading branch information
Tonny-Gu authored Jul 31, 2022
2 parents c1ff936 + 9f26c8e commit 6acdac5
Show file tree
Hide file tree
Showing 108 changed files with 3,037 additions and 632 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/ci_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ jobs:
if: github.repository == 'awslabs/raf'
runs-on: ubuntu-latest
outputs:
cpu_image: "metaprojdev/raf:ci_cpu-v0.20"
gpu_image: "metaprojdev/raf:ci_gpu-v0.22"
cpu_image: "metaprojdev/raf:ci_cpu-v0.21"
gpu_image: "metaprojdev/raf:ci_gpu-v0.23"
skip_ci: ${{ steps.job_info.outputs.skip_ci }}
ref: ${{ steps.job_info.outputs.ref }}
repo: ${{ steps.job_info.outputs.repo }}
Expand All @@ -48,6 +48,8 @@ jobs:
echo "::set-output name=ref::${ref}"
repo=$(head -n 1 artifact/repo.txt)
echo "::set-output name=repo::${repo}"
lint=${{ github.event.workflow_run.conclusion }}
echo "Linting result: ${lint}"
- name: Parse PR job info
id: pr_job_info
continue-on-error: true
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ python/raf/_op/sym.py
python/raf/_op/imp.py
python/raf/ir/op.py
python/raf/_ffi
python/raf/version.py

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/tvm
Submodule tvm updated from 609d6a to 75ec1c
50 changes: 29 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,26 +90,19 @@ set_property(
)

file(GLOB_RECURSE RAF_CXX_SOURCE_FILES
${CMAKE_CURRENT_LIST_DIR}/src/analysis/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/common/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/device_api/cpu/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/memory_pool/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/schema/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/declare/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/regs/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/grad/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/tvm/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/base_ops.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/from_relay/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/ty/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/pass/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/impl/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/profiler/memory_profiler.cc
${CMAKE_CURRENT_LIST_DIR}/src/profiler/op_profiler.cc
${CMAKE_CURRENT_LIST_DIR}/src/profiler/scope_timer.cc
${CMAKE_CURRENT_LIST_DIR}/src/profiler/base/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/distributed/common/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/*/*.cc
)
file(GLOB_RECURSE RAF_EXCLUDE_CXX_SOURCE_FILES
${CMAKE_CURRENT_LIST_DIR}/src/device_api/cuda/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/distributed/cuda/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/cublas/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/cuda/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/cudnn/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/cutlass/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/op/dialect/nccl/*.cc
${CMAKE_CURRENT_LIST_DIR}/src/profiler/cuda/*.cc
)
list(REMOVE_ITEM RAF_CXX_SOURCE_FILES ${RAF_EXCLUDE_CXX_SOURCE_FILES})

if (${RAF_USE_CUDA} STREQUAL "OFF")
set(RAF_CUDA_SOURCE_FILES "")
Expand All @@ -121,8 +114,15 @@ else ()
enable_language(CUDA)
set(CMAKE_CUDA_ARCHITECTURES ${RAF_CUDA_ARCH})
set(RAF_CXX_FLAGS ${RAF_CXX_FLAGS} -DRAF_USE_CUDA)
set(RAF_CUDA_FLAGS ${RAF_CUDA_FLAGS} -DRAF_USE_CUDA
-gencode=arch=compute_${RAF_CUDA_ARCH},code=sm_${RAF_CUDA_ARCH})

foreach(ARCH ${RAF_CUDA_ARCH})
set(CODES)
list(APPEND CODES sm_${ARCH})
list(APPEND CODES compute_${ARCH})
list(JOIN CODES "," CODES_STR)
list(APPEND RAF_CUDA_ARCH_LIST "-gencode=arch=compute_${ARCH},code=\[${CODES_STR}\]")
endforeach()
set(RAF_CUDA_FLAGS ${RAF_CUDA_FLAGS} -DRAF_USE_CUDA ${RAF_CUDA_ARCH_LIST})

file(GLOB_RECURSE RAF_CUDA_SOURCE_FILES
${CMAKE_CURRENT_LIST_DIR}/src/device_api/cuda/*.cc
Expand Down Expand Up @@ -232,6 +232,14 @@ set_target_properties(raf PROPERTIES
)
raf_target_add_sanitizer(raf)

install(
DIRECTORY "include/."
DESTINATION "include"
FILES_MATCHING
PATTERN "*.h")
install(TARGETS raf
LIBRARY DESTINATION lib)

################# Apps #################
add_subdirectory(${PROJECT_SOURCE_DIR}/apps/include_raf/)

Expand Down
18 changes: 17 additions & 1 deletion ci/task_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,20 @@ git checkout --recurse-submodules .

# build
mkdir -p $BUILD_DIR
cd $BUILD_DIR && cmake .. && make $MAKE_FLAGS && make raf-cpptest $MAKE_FLAGS && cd ..
pushd .
cd $BUILD_DIR && cmake .. && make $MAKE_FLAGS && make raf-cpptest $MAKE_FLAGS
popd

# test build wheels
export TVM_LIBRARY_PATH=${PWD}/build/lib
pushd .
cd 3rdparty/tvm/python
python3 setup.py bdist_wheel -d ../build/pip/public/tvm_latest
python3 -m pip install ../build/pip/public/tvm_latest/*.whl --upgrade --force-reinstall --no-deps
popd
pushd .
cd python
TVM_FFI=auto python3 setup.py bdist_wheel -d ../build/pip/public/raf
python3 -m pip install ../build/pip/public/raf/*.whl --upgrade --force-reinstall --no-deps
popd

24 changes: 19 additions & 5 deletions docker/Dockerfile.ci_cpu
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04

# Base scripts
# Install basic dependencies.
RUN apt-get update --fix-missing

COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
RUN bash /install/ubuntu_install_core.sh

# Install docker
RUN apt-get install -y docker.io

# Install Python packages.
COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
RUN bash /install/ubuntu_install_python.sh cpu
RUN bash /install/ubuntu_install_python.sh

# Install LLVM.
COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
RUN bash /install/ubuntu_install_llvm.sh

# AWS Batch setup
COPY batch/entry.sh /batch/entry.sh
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y awscli
RUN pip3 install --upgrade awscli
RUN python3 -m pip install --upgrade awscli

# Environment variables
ENV PATH=/usr/local/cuda/bin:${PATH}
ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH}
ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}

# Install PyTorch without CUDA
COPY install/ubuntu_install_torch.sh /install/ubuntu_install_torch.sh
RUN bash /install/ubuntu_install_torch.sh cpu

# To prevent `black` command line errors caused by ASCII encoding
ENV LC_ALL=C.UTF-8
Expand Down
21 changes: 16 additions & 5 deletions docker/Dockerfile.ci_gpu
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04

# Base scripts
# Install basic dependencies.
RUN apt-get update --fix-missing

COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
RUN bash /install/ubuntu_install_core.sh

# Install docker
RUN apt-get install -y docker.io

# Install Python packages.
COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
RUN bash /install/ubuntu_install_python.sh gpu
RUN bash /install/ubuntu_install_python.sh

# Install LLVM.
COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
RUN bash /install/ubuntu_install_llvm.sh

# AWS Batch setup
COPY batch/entry.sh /batch/entry.sh
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y awscli
RUN pip3 install --upgrade awscli
RUN python3 -m pip install --upgrade awscli

# Environment variables
ENV PATH=/usr/local/cuda/bin:${PATH}
Expand All @@ -27,6 +30,14 @@ ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}

# Install PyTorch with CUDA
COPY install/ubuntu_install_torch.sh /install/ubuntu_install_torch.sh
RUN bash /install/ubuntu_install_torch.sh cu113

# Install apex
COPY install/ubuntu_install_apex.sh /install/ubuntu_install_apex.sh
RUN bash /install/ubuntu_install_apex.sh

# To prevent `black` command line errors caused by ASCII encoding
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
12 changes: 12 additions & 0 deletions docker/install/ubuntu_install_apex.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

set -e
set -u
set -o pipefail

git clone https://github.com/szhengac/apex --branch lans
cd apex
pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
cd ../..
6 changes: 6 additions & 0 deletions docker/install/ubuntu_install_core.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ cmake -DZSTD_FROM_INTERNET=ON -DCMAKE_BUILD_TYPE=Release ..
make -j
make install
popd

# update symbolic links
for t in `ls -d -- /usr/bin/{gcc,gcc-[0-9+],g++,g++-[0-9+],clang,clang-[0-9+],clang++-[0-9+]}`; do
ln -fvs /usr/local/bin/ccache /usr/local/bin/$(basename $t);
done

39 changes: 19 additions & 20 deletions docker/install/ubuntu_install_python.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
#!/bin/bash
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

set -e
set -u
set -o pipefail

apt-get update
apt-get install -y python-dev python3-dev
apt-get install -y python-pip python3-pip
pip3 install pip --upgrade
pip3 install cmake
pip3 install scikit-build==0.11.1
pip3 install pylint==2.4.3 cpplint black==22.3.0
pip3 install six numpy pytest cython decorator scipy tornado typed_ast pytest mypy orderedset \
antlr4-python3-runtime attrs requests Pillow packaging psutil dataclasses pycparser \
pydot
pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
pip3 install transformers==4.3
pip3 install mxnet==1.6.0
pip3 install gluoncv==0.10.1
apt-get install -y build-essential
apt-get install -y python3 python3-dev python3-pip
apt-get install -y python3.7 python3.7-dev python3.7-venv
rm /usr/bin/python3
ln -s /usr/bin/python3.7 /usr/bin/python3

if [[ "$1" == "gpu" ]]; then
apt-get install -y ninja-build
git clone https://github.com/szhengac/apex --branch lans
cd apex
pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
cd ..
fi
python3 -m pip install -U --force-reinstall pip
python3 -m pip install cmake
python3 -m pip install scikit-build==0.11.1
python3 -m pip install pylint==2.4.3 cpplint black==22.3.0
python3 -m pip install six numpy pytest cython decorator scipy tornado typed_ast \
pytest mypy orderedset antlr4-python3-runtime attrs requests \
Pillow packaging psutil dataclasses pycparser pydot filelock
python3 -m pip install astunparse numpy ninja pyyaml mkl mkl-include setuptools cffi \
typing_extensions future glob2 pygithub boto3
python3 -m pip install mxnet==1.6.0
python3 -m pip install gluoncv==0.10.1
python3 -m pip install datasets==1.15.1
python3 -m pip install transformers==4.17
19 changes: 19 additions & 0 deletions docker/install/ubuntu_install_torch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

set -ex

if [ "$#" -lt 1 ]; then
echo "Usage: ubuntu_install_torch.sh <cpu|cu113>"
exit 1
fi
PLATFORM=$1

PT_VERSION=1.12.0
TV_VERSION=0.13.0

# Install PyTorch and torchvision
python3 -m pip install --force-reinstall torch==$PT_VERSION+$PLATFORM torchvision==$TV_VERSION+$PLATFORM \
-f https://download.pytorch.org/whl/$PLATFORM/torch_stable.html

Loading

0 comments on commit 6acdac5

Please sign in to comment.