Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into float64_fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
xinyu-intel committed Aug 26, 2019
2 parents 455bd1a + 9410cc4 commit 4fefd84
Show file tree
Hide file tree
Showing 41 changed files with 4,436 additions and 198 deletions.
6 changes: 3 additions & 3 deletions 3rdparty/mshadow/mshadow/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ struct Shape {
* \param idx dimension index
* \return the corresponding dimension size
*/
MSHADOW_XINLINE index_t &operator[](index_t idx) {
MSHADOW_XINLINE index_t &operator[](int idx) {
return shape_[idx];
}
/*!
* \brief get corresponding index
* \param idx dimension index
* \return the corresponding dimension size
*/
MSHADOW_XINLINE const index_t &operator[](index_t idx) const {
MSHADOW_XINLINE const index_t &operator[](int idx) const {
return shape_[idx];
}
/*!
Expand Down Expand Up @@ -484,7 +484,7 @@ struct Tensor: public TRValue<Tensor<Device, dimension, DType>,
* \param idx the dimension count from the highest dimensin
* \return the size
*/
MSHADOW_XINLINE index_t size(index_t idx) const {
MSHADOW_XINLINE index_t size(int idx) const {
return shape_[idx];
}
/*!
Expand Down
3 changes: 2 additions & 1 deletion CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ CMakeLists.txt @szha @pllarroy

# MXNet CI
dev_menu.py @pllarroy
/ci/ @pllarroy
/ci/ @pllarroy @marcoabreu
/docker/ @marcoabreu
/tests/ci_build/ @marcoabreu
Jenkinsfile @marcoabreu
.travis.yml @marcoabreu
Expand Down
4 changes: 2 additions & 2 deletions ci/jenkins/Jenkinsfile_clang
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ core_logic: {
custom_steps.compile_unix_clang_3_9_cpu(),
custom_steps.compile_unix_clang_6_cpu(),
custom_steps.compile_unix_clang_tidy_cpu(),
custom_steps.compile_unix_clang_3_9_mkldnn_cpu(),
custom_steps.compile_unix_clang_6_mkldnn_cpu()
custom_steps.compile_unix_clang_3_9_mkldnn_cpu()
// custom_steps.compile_unix_clang_6_mkldnn_cpu()
])
}
,
Expand Down
1 change: 1 addition & 0 deletions contrib/tvmop/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
# coding: utf-8
from .opdef import defop
from .utils import AllTypes, RealTypes
from .utils import assign_by_req, reduce_axes

from . import basic
54 changes: 52 additions & 2 deletions contrib/tvmop/basic/ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# coding: utf-8
import tvm
from .. import defop, AllTypes
from .. import assign_by_req, reduce_axes

def compute_add(dtype, ndim):
A = tvm.placeholder([tvm.var() for _ in range(ndim)], name='A', dtype=dtype)
Expand All @@ -27,8 +28,9 @@ def compute_add(dtype, ndim):
s = tvm.create_schedule(C.op)
return s, A, B, C


@defop(name="vadd", target="cpu", auto_broadcast=True,
dtype=AllTypes, ndim=list(range(1, 6)))
dtype=AllTypes, ndim=[5])
def vadd(dtype, ndim):
s, A, B, C = compute_add(dtype, ndim)
axes = [axis for axis in C.op.axis]
Expand All @@ -37,8 +39,9 @@ def vadd(dtype, ndim):

return s, [A, B, C]


@defop(name="cuda_vadd", target="cuda", auto_broadcast=True,
dtype=["float32", "float64"], ndim=list(range(1, 6)))
dtype=["float32", "float64"], ndim=[5])
def vadd_gpu(dtype, ndim):
s, A, B, C = compute_add(dtype, ndim)
s = tvm.create_schedule(C.op)
Expand All @@ -48,3 +51,50 @@ def vadd_gpu(dtype, ndim):
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
return s, [A, B, C]


def compute_backward_vadd(dtype, ndim, reduce1st, req):
# The backward of broadcast op is basically a reduction on broadcast axes.
# We label the reduce axes as 1 and other axes as 0, and they form a bit string.
# Each bit string correponds to a kernel, so the number of kernels is as many as `2^n`
# To reduce it, the bit string is compressed by combining consecutive 0s or 1s.
# In this way, the number of bit string (the number of kernels) is reduced to `2 * n`
# They compressed bit string is stored in `axes`. And `reduce1st` represents the first bit
# of the compressed bit string. Credit to @junrushao1994 and @yzhliu.
axes = ([reduce1st, 1 - reduce1st] * ndim)[:ndim]
X = tvm.placeholder([tvm.var() for _ in range(ndim)], name='X', dtype=dtype)
reducer = tvm.comm_reducer(lambda x, y: x + y,
lambda t: tvm.const(0, dtype=t), name="sum")
ret = reduce_axes(X, axes, reducer)
in_grad_a, in_grad = assign_by_req(ret, req)
s = tvm.create_schedule(in_grad.op)
return s, X, in_grad_a, in_grad, [ret, in_grad]


@defop(name="backward_vadd", target="cpu", dtype=AllTypes,
ndim=[5], reduce1st=[0, 1],
req=["kWriteTo", "kAddTo"], attrs=["reduce1st", "req"])
def backward_vadd(dtype, ndim, reduce1st, req):
s, X, in_grad_a, in_grad, c_list = compute_backward_vadd(dtype, ndim, reduce1st, req)
for t in c_list:
axes = [axis for axis in t.op.axis]
fused = s[t].fuse(*axes)
s[t].parallel(fused)
return s, [X, in_grad_a, in_grad]


@defop(name="cuda_backward_vadd", target="gpu", dtype=["float32", "float64"],
ndim=[5], reduce1st=[0, 1],
req=["kWriteTo", "kAddTo"], attrs=["reduce1st", "req"])
def backward_vadd_gpu(dtype, ndim, reduce1st, req):
s, X, in_grad_a, in_grad, c_list = compute_backward_vadd(dtype, ndim, reduce1st, req)
num_thread = 64
for t in c_list:
block_x = tvm.thread_axis("blockIdx.x")
thread_x = tvm.thread_axis("threadIdx.x")
axes = [axis for axis in t.op.axis]
fused = s[t].fuse(*axes)
bx, tx = s[t].split(fused, factor=num_thread)
s[t].bind(bx, block_x)
s[t].bind(tx, thread_x)
return s, [X, in_grad_a, in_grad]
29 changes: 29 additions & 0 deletions contrib/tvmop/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,34 @@
# under the License.

# coding: utf-8
import tvm

AllTypes = ["float32", "float64", "float16", "uint8", "int8", "int32", "int64"]
RealTypes = ["float32", "float64", "float16"]

def assign_by_req(a, req):
b = tvm.placeholder(a.shape, name='assign_by_req_b', dtype=a.dtype)
if (req == "kAddTo"):
c = tvm.compute(a.shape, lambda *idx: a[idx] + b[idx])
else:
c = tvm.compute(a.shape, lambda *idx: a[idx])
return b, c


def reduce_axes(X, axes, reducer):
def get_index(idx, ridx):
j = 0
k = 0
ret = []
for val in axes:
ret.append(idx[j] if val == 0 else ridx[k])
j += (val == 0)
k += (val != 0)
return tuple(ret)

ishape = X.shape
odim = (len(ishape) + 1 - axes[0]) // 2
oshape = [tvm.var() for _ in range(odim)]
ridx = [tvm.reduce_axis((0, ishape[i])) for (i, val) in enumerate(axes) if val == 1]
ret = tvm.compute(oshape, lambda *idx: reducer(X[get_index(idx, ridx)], axis=ridx), name='ret')
return ret
1 change: 1 addition & 0 deletions docs/api/python/symbol/symbol.md
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ Composite multiple symbols into a new one by an operator.
random.normal
random.poisson
random.randint
random.randn
random.shuffle
random.uniform
mxnet.random.seed
Expand Down
3 changes: 2 additions & 1 deletion example/quantization/imagenet_gen_qsym_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ def save_params(fname, arg_params, aux_params, logger=None):
if exclude_first_conv:
excluded_sym_names += ['resnetv10_conv0_fwd']
elif args.model.find('resnet') != -1 and args.model.find('v2') != -1:
excluded_sym_names += ['resnetv20_flatten0_flatten0']
# resnetv20_stage1_batchnorm0_fwd is excluded for the sake of accuracy
excluded_sym_names += ['resnetv20_flatten0_flatten0', 'resnetv20_stage1_batchnorm0_fwd']
if exclude_first_conv:
excluded_sym_names += ['resnetv20_conv0_fwd']
elif args.model.find('vgg') != -1:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Let us first load and display the demo image (try to use other images you like). You will need to install `Images.jl` and `Colors.jl` to load the image."
"Let us first load and display the demo image (download it from [here](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/python/predict_image/cat.jpg) or try to use other images you like). You will need to install `Images.jl` and `Colors.jl` to load the image."
]
},
{
Expand Down Expand Up @@ -46,7 +46,7 @@
],
"source": [
"using Images, Colors, ImageMagick\n",
"img = load(\"cat.png\")"
"img = load(\"cat.jpg\")"
]
},
{
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion make/maven/maven_darwin_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
ENABLE_CUDA_RTC = 0
USE_NVRTC = 0

# use openmp for parallelization
USE_OPENMP = 0
Expand Down
3 changes: 1 addition & 2 deletions make/maven/maven_linux_cu90mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ USE_NCCL = 1
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
ENABLE_CUDA_RTC = 1

USE_NVTX=1
USE_NVRTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
3 changes: 1 addition & 2 deletions make/maven/maven_linux_cu92mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ USE_NCCL = 1
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
ENABLE_CUDA_RTC = 1

USE_NVTX=1
USE_NVRTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
ENABLE_CUDA_RTC = 0
USE_NVRTC = 0

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
7 changes: 1 addition & 6 deletions python/mxnet/gluon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,7 @@
import warnings
import collections
import weakref
try:
import requests
except ImportError:
class requests_failed_to_import(object):
pass
requests = requests_failed_to_import
import requests

import numpy as np

Expand Down
Loading

0 comments on commit 4fefd84

Please sign in to comment.