Skip to content

Commit

Permalink
Merge branch 'meta-project:main' into multi-comm
Browse files Browse the repository at this point in the history
  • Loading branch information
Tonny-Gu authored Feb 27, 2022
2 parents efea2ff + df572d2 commit c924e80
Show file tree
Hide file tree
Showing 40 changed files with 587 additions and 455 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
if: github.repository == 'meta-project/meta'
runs-on: self-hosted
container:
image: metaprojdev/meta:ci_cpu-v0.18
image: metaprojdev/raf:ci_cpu-v0.18
steps:
- name: Checkout repository
uses: actions/checkout@v2
Expand Down
36 changes: 34 additions & 2 deletions .github/workflows/ci_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ jobs:
if: github.repository == 'meta-project/meta'
runs-on: self-hosted
outputs:
cpu_image: "metaprojdev/meta:ci_cpu-v0.18"
gpu_image: "metaprojdev/meta:ci_gpu-v0.20"
cpu_image: "metaprojdev/raf:ci_cpu-v0.18"
gpu_image: "metaprojdev/raf:ci_gpu-v0.20"
skip_ci: ${{ steps.job_info.outputs.skip_ci }}
ref: ${{ steps.job_info.outputs.ref }}
repo: ${{ steps.job_info.outputs.repo }}
Expand Down Expand Up @@ -205,3 +205,35 @@ jobs:
--command "bash ./ci/batch/cli.sh config_cmake GPU 75 &&
bash ./ci/batch/cli.sh compile build multi-GPU ${{ needs.check_status.outputs.job_tag }} &&
bash ./ci/batch/cli.sh unit_test multi-GPU"
update_ci_badge:
needs: [test_on_cpu, test_on_gpu, test_on_multi_gpu]
if: github.repository == 'meta-project/meta'
runs-on: self-hosted
steps:
- uses: haya14busa/action-workflow_run-status@v1
- name: Checkout repository
# No need to checkout submodules because we only need to get the HEAD commit hash.
uses: actions/checkout@v2
- name: Generate CI badge
id: badge
run: |
# env vars are unavailable in job.if so we have to implement it here.
if [ "${{ needs.check_status.outputs.pr }}" != "" ]; then
echo "No need to update badge for PR CI. Skip."
exit 0
fi
head_commit=$(git rev-parse --short HEAD)
echo "::set-output name=gist_id::630a36600930c8d68e6b15f16333b532"
echo "::set-output name=message::${head_commit}"
- name: Update CI badge
# Intentionally fail this step with empty gist_id.
uses: schneegans/dynamic-badges-action@v1.1.0
continue-on-error: true
with:
auth: ${{ secrets.DEPLOY_ACCESS_TOKEN }}
gistID: ${{ steps.badge.outputs.gist_id }}
filename: raf-ci-badge-last-pass.json
label: CI-Last-Success
message: ${{ steps.badge.outputs.message }}
color: blue
2 changes: 1 addition & 1 deletion .github/workflows/deploy_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
with:
context: docker
file: docker/Dockerfile.${{ github.event.inputs.type }}
tags: metaprojdev/meta:${{ github.event.inputs.type }}-${{ github.event.inputs.tag }}
tags: metaprojdev/raf:${{ github.event.inputs.type }}-${{ github.event.inputs.tag }}
push: true
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
<!--- Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -->
<!--- SPDX-License-Identifier: Apache-2.0 -->

RAF
===

[![CI-Lint](https://github.com/meta-project/meta/actions/workflows/ci_lint.yml/badge.svg)](https://github.com/meta-project/meta/actions/workflows/ci_lint.yml)
[![CI-UnitTest](https://github.com/meta-project/meta/actions/workflows/ci_unit_test.yml/badge.svg)](https://github.com/meta-project/meta/actions/workflows/ci_unit_test.yml)
RAF: RAF Accelerates deep learning Frameworks
=============================================

![CI-Lass-Pass](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/aire-meta-bot/630a36600930c8d68e6b15f16333b532/raw/raf-ci-badge-last-pass.json)


Please refer to our [wiki](docs/wiki) for more information.


6 changes: 3 additions & 3 deletions ci/batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ AWS Batch has to be properly configured to make the above flow working as expect
"type": "container",
"parameters": {},
"containerProperties": {
"image": "metaprojdev/meta:ci_gpu-v0.20",
"image": "metaprojdev/raf:ci_gpu-v0.20",
"command": [],
"jobRoleArn": ***,
"executionRoleArn": ***,
Expand Down Expand Up @@ -210,7 +210,7 @@ AWS Batch has to be properly configured to make the above flow working as expect
"type": "container",
"parameters": {},
"containerProperties": {
"image": "metaprojdev/meta:ci_cpu-v0.18",
"image": "metaprojdev/raf:ci_cpu-v0.18",
"command": [],
"jobRoleArn": ***,
"executionRoleArn": ***,
Expand Down Expand Up @@ -249,7 +249,7 @@ AWS Batch has to be properly configured to make the above flow working as expect
"type": "container",
"parameters": {},
"containerProperties": {
"image": "metaprojdev/meta:ci_gpu-v0.20",
"image": "metaprojdev/raf:ci_gpu-v0.20",
"command": [],
"jobRoleArn": ***,
"executionRoleArn": ***,
Expand Down
2 changes: 1 addition & 1 deletion ci/batch/backup-ccache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ MODE=$1 # upload or download
PLATFORM=$2 # CPU, GPU, or multi-GPU
TAG=$3 # e.g., refs/heads/main, pr-7

S3_BUCKET="ci-meta"
S3_BUCKET="ci-raf"
S3_FOLDER=`echo cache-${TAG} | sed 's/\//_/g'`
S3_PATH="s3://$S3_BUCKET/$S3_FOLDER"

Expand Down
2 changes: 1 addition & 1 deletion docker/batch/entry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SOURCE_REF=$1
REPO=$2
COMMAND=$3
SAVE_OUTPUT=$4
REMOTE_FOLDER=$5 # e.g., s3://ci-meta/pr-7
REMOTE_FOLDER=$5 # e.g., s3://ci-raf/pr-7

echo "Job Info"
echo "-------------------------------------"
Expand Down
2 changes: 1 addition & 1 deletion docker/push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ PASSWORD="$1"
shift 1

LOCAL_IMAGE_NAME=raf.${CONTAINER_TYPE}:latest
REMOTE_IMAGE_NAME=${DOCKER_HUB_ACCOUNT}/meta:${CONTAINER_TYPE}-${VERSION}
REMOTE_IMAGE_NAME=${DOCKER_HUB_ACCOUNT}/raf:${CONTAINER_TYPE}-${VERSION}

echo "Login docker hub"
docker login -u ${DOCKER_HUB_ACCOUNT} -p ${PASSWORD}
Expand Down
4 changes: 2 additions & 2 deletions docs/wiki/3_dev_guide/Memory-Pool.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This document introduces the Memory Pool of RAF.

## Strategies

Currently, there are two types of memory pool in meta: (1) no_pool, (2) page_unit_pool.
Currently, there are two types of memory pool in RAF: (1) no_pool, (2) page_unit_pool.
By default, we choose page_unit_pool as our memory pool, which could bring down the running time by almost 50% for rn50/vgg/etc compared with no_pool.

The memory usage of these two strategies are similar. Here is an experiment on ResNet50 with Tesla T4 (15109MB)
Expand Down Expand Up @@ -115,4 +115,4 @@ Then you can create the Pool Class that derived from `raf::memory_pool::MemoryPo
Remember to register your pool in the cpp file you created, the code should be like:
`RAF_REGISTER_GLOBAL("raf.memory_pool._make.your_pool").set_body_typed(YourPool::make);`

After re-make meta, you can enable your pool by calling `InitPool(contxt, pool_name)`.
After re-make RAF, you can enable your pool by calling `InitPool(contxt, pool_name)`.
7 changes: 4 additions & 3 deletions include/raf/op_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ inline bool IsInOpSet(const Expr& op, const OpSet& op_set) {
inline bool IsReshapeOp(const Op& op) {
static std::unordered_set<Op, ObjectPtrHash, ObjectPtrEqual> reshape_ops{
Op::Get("raf.op.reshape"), Op::Get("raf.op.expand_dims"), Op::Get("raf.op.squeeze"),
Op::Get("raf.op.batch_flatten")};
Op::Get("raf.op.batch_flatten"), Op::Get("raf.op.reshape_like")};
return IsInOpSet(op, reshape_ops);
}

Expand Down Expand Up @@ -179,8 +179,9 @@ inline Array<tvm::PrimExpr> GetShapeExprFromValue(const Value& value) {
ICHECK(value.defined());
Array<tvm::PrimExpr> shape;
if (auto ttv = value.as<TensorTypeValueObj>()) {
auto ndim = ttv->type->shape.size();
for (size_t i = 0; i < ndim; ++i) {
auto ndim = ttv->type->shape[0].as<ir::IntImmNode>();
ICHECK(ndim) << "Expected IntImm, but got " << ttv->type->shape[0]->GetTypeKey();
for (size_t i = 0; i < ndim->value; ++i) {
shape.push_back(Any());
}
} else {
Expand Down
1 change: 1 addition & 0 deletions python/raf/_tvm_op/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def fcompute(*args):
_reg.register_injective_schedule("raf.op.tvm.batch_flatten")
_reg.register_injective_schedule("raf.op.tvm.arange")
_reg.register_injective_schedule("raf.op.tvm.strided_slice")
_reg.register_reduce_schedule("raf.op.tvm.collapse_sum_like")


@register_compute("raf.op.tvm.take_dx")
Expand Down
1 change: 1 addition & 0 deletions python/raf/amp/type_hints.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def _gen(args, ret_type, amp_dtype):
register_op_cast_rule("raf.op.trunc", infer_cast(1))
register_op_cast_rule("raf.op.mesh_grid", infer_cast(2))
register_op_cast_rule("raf.op.reshape", infer_cast(1))
register_op_cast_rule("raf.op.reshape_like", infer_cast(1))
register_op_cast_rule("raf.op.resize2d", infer_cast(1))
register_op_cast_rule("raf.op.ndarray_size", infer_cast(1))
register_op_cast_rule("raf.op.transpose", infer_cast(1))
Expand Down
112 changes: 112 additions & 0 deletions python/raf/testing/mlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

"""MLP model"""
# pylint: disable=protected-access, attribute-defined-outside-init, too-many-locals
# pylint: disable=missing-class-docstring, too-many-arguments, missing-function-docstring
import torch.nn as nn
import torch.nn.functional as F

import raf
from raf.model import Linear
from .common import check, randn_torch, t2m_param, one_hot_torch
from .utils import get_param, set_param


class TorchMlp(nn.Module): # pylint: disable=abstract-method
def __init__(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2):
super(TorchMlp, self).__init__()
self.fc1 = nn.Linear(num_inputs, num_hiddens1)
self.fc2 = nn.Linear(num_hiddens1, num_hiddens2)
self.fc3 = nn.Linear(num_hiddens2, num_outputs)

def forward_infer(self, x):
y = self.fc1(x)
y = F.relu(y)
y = self.fc2(y)
y = F.relu(y)
y = self.fc3(y)
return y

def forward(self, x, y_true=None): # pylint: disable=arguments-differ
y = self.forward_infer(x)
if self.training:
y_pred = F.log_softmax(y, dim=-1)
loss = F.nll_loss(y_pred, y_true)
return loss
return y


class RAFMlp(raf.Model):
# pylint: disable=attribute-defined-outside-init
def build(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2):
self.fc1 = Linear(num_inputs, num_hiddens1)
self.fc2 = Linear(num_hiddens1, num_hiddens2)
self.fc3 = Linear(num_hiddens2, num_outputs)

@raf.model.trace
def forward_infer(self, x):
y = self.fc1(x)
y = raf.relu(y)
y = self.fc2(y)
y = raf.relu(y)
y = self.fc3(y)
return y

@raf.model.trace
def forward(self, x, y_true):
y = self.forward_infer(x)
y_pred = raf.log_softmax(y)
loss = raf.nll_loss(y_true, y_pred)
return loss


def _param_map(t_model):
"""maps from m_model parameter name to t_model parameter value"""
res = {
"fc1.w": t_model.fc1.weight,
"fc1.b": t_model.fc1.bias,
"fc2.w": t_model.fc2.weight,
"fc2.b": t_model.fc2.bias,
"fc3.w": t_model.fc3.weight,
"fc3.b": t_model.fc3.bias,
}
return res


def _init(m_model, t_model, device="cpu"):
"""initialize meta model with parameters of torch model"""
# pylint: disable=no-member, line-too-long, too-many-statements
for m_name, t_w in _param_map(t_model).items():
set_param(m_model, m_name, t2m_param(t_w, device=device))


def check_params(m_model, t_model, atol=1e-4, rtol=1e-4):
"""check the parameters of m_model and t_model"""
# pylint: disable=no-member, line-too-long, too-many-statements
for m_name, t_w in _param_map(t_model).items():
m_w = get_param(m_model, m_name)
check(m_w, t_w, atol=atol, rtol=rtol)


def get_model(config, train=True):
"""get MLP model"""
m_model = RAFMlp(*config)
t_model = TorchMlp(*config)
_init(m_model, t_model)
if train:
m_model.train_mode()
t_model.train()
else:
m_model.infer_mode()
t_model.eval()
return m_model, t_model


def get_input(config, batch_size=1, device="cpu", train=True):
"""get MLP input"""
m_x, t_x = randn_torch([batch_size, config[0]], device=device, requires_grad=True)
if not train:
return [(m_x,), (t_x,)]
m_y, t_y = one_hot_torch(batch_size, num_classes=config[1], device=device)
return [(m_x, m_y), (t_x, t_y)]
11 changes: 6 additions & 5 deletions scripts/src_codegen/def_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@
Op(name="cross_entropy_dpred", schema_name="loss"),
Op(name="cross_entropy_dtrue", schema_name="loss"),
Op(name="reshape", schema_name="reshape"),
Op(name="reshape_like", schema_name="binary_like"),
Op(name="resize2d", schema_name="resize2d"),
Op(name="resize2d_dx", schema_name="resize2d_dx"),
Op(name="ndarray_size", schema_name="unary"),
Op(name="transpose", schema_name="transpose"),
Op(name="transpose_dx", schema_name="transpose_dx"),
Op(name="collapse_sum_like", schema_name="collapse_like"),
Op(name="transpose_dx", schema_name="transpose"),
Op(name="sum", schema_name="sum"),
Op(name="sum_dx", schema_name="sum_dx"),
Op(name="cumsum", schema_name="cumsum"),
Expand Down Expand Up @@ -135,8 +135,9 @@
Op(name="sequence_mask", schema_name="sequence_mask"),
Op(name="reverse_sequence", schema_name="reverse_sequence"),
Op(name="reverse", schema_name="reverse"),
Op(name="broadcast_to", schema_name="broadcast_to"),
Op(name="broadcast_to_like", schema_name="broadcast_to_like"),
Op(name="broadcast_to", schema_name="binary_to"),
Op(name="broadcast_to_like", schema_name="binary_like"),
Op(name="collapse_sum_like", schema_name="binary_like"),
Op(name="concatenate", schema_name="concatenate"),
Op(name="squeeze", schema_name="squeeze"),
Op(name="stack", schema_name="stack"),
Expand All @@ -159,7 +160,7 @@
Op(name="fuse_tensor", schema_name="fuse_tensor"),
Op(name="defuse_tensor", schema_name="defuse_tensor"),
Op(name="cast", schema_name="cast"),
Op(name="cast_like", schema_name="cast_like"),
Op(name="cast_like", schema_name="binary_like"),
Op(name="gather", schema_name="gather"),
Op(name="gather_dx", schema_name="gather_dx"),
Op(name="gather_nd", schema_name="gather_nd"),
Expand Down
Loading

0 comments on commit c924e80

Please sign in to comment.