Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Minor][MemHammer] Minor tweaks in code review #14

Merged
merged 4 commits into from
Jan 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 0 additions & 158 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,158 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Github code owners file
# This file is used as a convenient tool to map
# committers' areas of expertise and faciliate the review process.
#
# This may not be the non-comprehensive list and is meant to be
# updated over time.

# Per ASF policy, committer have global write permission.
# We normally recommend committers to shepherd code in their area of expertise.
* @apache/tvm-committers

# Order is important; the last matching pattern takes the most precedence.
# The sub modules should be ordered first by depth.
# Making sure we append new sub-module rules after exisiting modules rules.

##############################
# Top-level Fallbacks
##############################
include/** @tqchen @jroesch @yzhliu @icemelon @junrushao1994 @comaniac @zhiics
src/** @tqchen @jroesch @yzhliu @icemelon @junrushao1994 @comaniac @zhiics
apps/** @tqchen @jroesch @yzhliu @icemelon @junrushao1994 @comaniac @zhiics
python/** @tqchen @jroesch @yzhliu @icemelon @junrushao1994 @comaniac @zhiics

# Thirdparty license audit
3rdparty/** @tqchen @jroesch
licenses/** @tqchen @jroesch

# JVM language
jvm/** @yzhliu

# Golang
golang/** @srkreddy1238

# WASM
web/** @tqchen @jroesch

# Docker
docker/** @areusch @leandron @jroesch

# Conda
conda/** @tqchen @junrushao1994 @comaniac

# CMake
cmake/** @jroesch @tqchen @areusch @junrushao1994 @comaniac

# rust bindings
rust/** @jroesch @nhynes @nhynes

# vta
vta/** @tmoreau89 @vegaluisjose

# docs
docs/** @comaniac @junrushao1994 @tqchen @jroesch @areusch @yzhliu @merrymercy @icemelon
tutorials/** @comaniac @junrushao1994 @tqchen @jroesch @areusch @yzhliu @merrymercy @icemelon

# tests
tests/** @comaniac @junrushao1994 @tqchen @jroesch @areusch @yzhliu @merrymercy @icemelon

##############################
# Specific modules
##############################

# automation related
src/auto_scheduler/** @merrymercy @jcf94 @comaniac @junrushao1994 @vinx13 @Hzfengsy
include/tvm/auto_scheduler/** @merrymercy @jcf94 @comaniac @junrushao1994 @vinx13 @Hzfengsy
python/tvm/auto_scheduler/** @merrymercy @jcf94 @comaniac @junrushao1994 @vinx13 @Hzfengsy

python/tvm/autotvm/** @merrymercy @jcf94 @comaniac @junrushao1994 @vinx13

# node system and reflection
src/node/** @junrushao1994 @vinx13 @tqchen @jroesch @comaniac
include/tvm/node/** @junrushao1994 @vinx13 @tqchen @jroesch @comaniac

# ir: Common IR
src/ir/** @junrushao1994 @vinx13 @tqchen @jroesch @comaniac
include/tvm/ir/** @junrushao1994 @vinx13 @tqchen @jroesch @comaniac
python/tvm/ir/** @junrushao1994 @vinx13 @tqchen @jroesch @comaniac

# tir
src/tir/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were @Hzfengsy
include/tvm/tir/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were @Hzfengsy
python/tvm/tir/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were @Hzfengsy

# te
src/te/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were
include/tvm/te/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were
python/tvm/te/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi @were

# target
src/target/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi
include/tvm/target/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi
python/tvm/target/** @junrushao1994 @vinx13 @tqchen @kparzysz-quic @ZihengJiang @masahi

# arith: Arithmetic module and simplifiers
src/arith/** @tqchen @junrushao1994 @vinx13
include/tvm/arith/** @tqchen @junrushao1994 @vinx13
python/tvm/arith/** @tqchen @junrushao1994 @vinx13

# parser
src/parser/** @jroesch @slyubomirsky

# runtime
src/runtime/** @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtulloch @masahi @kazum @ZihengJiang @junrushao1994
include/tvm/runtime/** @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtulloch @masahi @kazum @ZihengJiang @junrushao1994
python/tvm/runtime/** @vinx13 @tqchen @FronzenGene @liangfu @areusch @tmoreau89 @ajtulloch @masahi @kazum @ZihengJiang @junrushao1994

# runtime/micro
src/runtime/micro/** @areusch @liangfu @tmoreau89 @manupa-arm
src/runtime/crt/** @areusch @liangfu @tmoreau89 @manupa-arm
include/tvm/runtime/crt/** @areusch @liangfu @tmoreau89 @manupa-arm
include/tvm/runtime/micro/** @areusch @liangfu @tmoreau89 @manupa-arm
python/tvm/micro/** @areusch @liangfu @tmoreau89 @manupa-arm

# relay
src/relay/** @jroesch @slyubomirsky @icemelon @MarisaKirisame @ZihengJiang @yzhliu @vinx13 @mbrookhart @jwfromm @zhiics @anijain2305 @wweic @eqy @junrushao1994
include/tvm/relay/** @jroesch @slyubomirsky @icemelon @MarisaKirisame @ZihengJiang @yzhliu @vinx13 @mbrookhart @jwfromm @zhiics @anijain2305 @wweic @eqy @junrushao1994
python/tvm/relay/** @jroesch @slyubomirsky @icemelon @MarisaKirisame @ZihengJiang @yzhliu @vinx13 @mbrookhart @jwfromm @zhiics @anijain2305 @wweic @eqy @junrushao1994


# relay/qnn
src/relay/qnn/** @jwfromm @anijain2305 @ZihengJiang
inlcude/tvm/relay/qnn/** @jwfromm @anijain2305 @ZihengJiang
python/tvm/relay/qnn/** @jwfromm @anijain2305 @ZihengJiang

# relay/backend/contrib: BYOC
src/relay/backend/contrib/** @zhiics @trevor-m @comaniac @mbaret @manupa-arm

# relay/frontends
python/tvm/relay/frontend/** @jwfromm @mbrookhart @srkreddy1238 @siju-samuel @Huyuwei @hlu1 @kazum @PariksheetPinjari909

# topi: Operator definitions
src/topi/** @Laurawly @Huyuwei @kevinthesun @jwfromm @vinx13 @masahi @FronzenGene @yzhliu @mbrookhart @ZihengJiang @jcf94
include/tvm/topi/** @Laurawly @Huyuwei @kevinthesun @jwfromm @vinx13 @masahi @FronzenGene @yzhliu @mbrookhart @ZihengJiang @jcf94
python/tvm/topi/** @Laurawly @Huyuwei @kevinthesun @jwfromm @vinx13 @masahi @FronzenGene @yzhliu @mbrookhart @ZihengJiang @jcf94


# tvm/driver/
python/tvm/driver/** @leandron @jwfromm @tqchen @jroesch

# tvm/driver/tvmc
python/tvm/driver/tvmc/** @leandron @jwfromm
4 changes: 3 additions & 1 deletion python/tvm/meta_schedule/testing/te_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ def conv3d_ndhwc( # pylint: disable=invalid-name,missing-docstring
groups: int = 1,
) -> Tuple[te.Tensor, te.Tensor, te.Tensor]:
inputs = te.placeholder((N, D, H, W, CI), name="inputs")
weight = te.placeholder((kernel_size, kernel_size, kernel_size, CI // groups, CO), name="weight")
weight = te.placeholder(
(kernel_size, kernel_size, kernel_size, CI // groups, CO), name="weight"
)
batch_size, in_d, in_h, in_w, _ = inputs.shape
k_d, k_h, k_w, channel_per_group, out_channel = weight.shape
out_channel_per_group = out_channel // groups
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/meta_schedule/testing/tir_tensor_intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,4 @@ def wmma_store_impl(a: T.handle, c: T.handle) -> None:
"wmma_store",
wmma_store_desc,
wmma_store_impl,
)
)
3 changes: 1 addition & 2 deletions python/tvm/script/tir/special_stmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,7 @@ def block_attr(attrs: Mapping[str, Object], span: Span = None):
span,
)
attrs = {
key: String(val) if isinstance(val, str) else val
for key, val in attrs.items()
key: String(val) if isinstance(val, str) else val for key, val in attrs.items()
}
block_scope.annotations = attrs

Expand Down
1 change: 0 additions & 1 deletion src/arith/int_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,6 @@ Array<IntSet> EvalSet(const Array<PrimExpr>& exprs, const Map<Var, IntSet>& dom_
return result;
}


IntSet EvalSet(Range r, const Map<Var, IntSet>& dom_map) {
Analyzer ana;
IntervalSetEvaluator m(&ana, dom_map);
Expand Down
5 changes: 3 additions & 2 deletions src/arith/iter_affine_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ Array<IterSumExpr> DetectIterMap(const Array<PrimExpr>& indices, const Map<Var,
<< "Fail to collect constraints from iteration predicate: " << predicate);
return Array<IterSumExpr>();
}

// We have to make sure when we visit an iterator, all the constraints related with its successors
// in the iter var graph has been visited, where the expression of this iterator will contain the
// expression of its successor, so we sort them by their sizes.
Expand Down Expand Up @@ -1308,7 +1308,8 @@ class IterMapToExprNormalizer : public ExprMutator {
} else if (analyzer_->CanProve(expr->source->extent == expr->lower_factor * expr->extent)) {
return floordiv(source, expr->lower_factor) * expr->scale;
} else {
return floordiv(floormod(source, expr->lower_factor * expr->extent), expr->lower_factor) * expr->scale;
return floordiv(floormod(source, expr->lower_factor * expr->extent), expr->lower_factor) *
expr->scale;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/arith/modular_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ class ModularSetAnalyzer::Impl : public ExprFunctor<ModularSetAnalyzer::Entry(co
Entry a = VisitExpr(op->a);
int64_t coeff = ZeroAwareGCD(a.coeff, c2);
return Entry(coeff, a.base % c2);
}
}
return Everything();
}

Expand Down
1 change: 0 additions & 1 deletion src/driver/driver_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#include <algorithm>
#include <mutex>
#include <stack>
#include <tvm/ir/transform.h>

#include "../printer/text_printer.h"

Expand Down
3 changes: 1 addition & 2 deletions src/tir/ir/stmt_functor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -700,8 +700,7 @@ Array<Range> Substitute(const Array<Range>& region, const Map<Var, PrimExpr>& vm
}

void PreOrderVisit(const ObjectRef& stmt_or_expr,
const std::function<bool(const ObjectRef&)>& fvisit,
bool visit_init_block) {
const std::function<bool(const ObjectRef&)>& fvisit, bool visit_init_block) {
class PreOrderVisitor : public StmtExprVisitor {
public:
explicit PreOrderVisitor(const std::function<bool(const ObjectRef&)>& f, bool visit_init_block)
Expand Down
5 changes: 3 additions & 2 deletions src/tir/schedule/primitive/blockize_tensorize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,9 @@ StmtSRef Blockize(ScheduleState self, const StmtSRef& loop_sref) {
}
arith::Analyzer analyzer;
DiagnosticContext diag_ctx(DiagnosticContext::Default(IRModule()));
Array<Array<arith::IterMark>> division = arith::SubspaceDivide(
block_realize->iter_values, iters, inner_iters, block_realize->predicate, false, &analyzer, diag_ctx);
Array<Array<arith::IterMark>> division =
arith::SubspaceDivide(block_realize->iter_values, iters, inner_iters,
block_realize->predicate, false, &analyzer, diag_ctx);
if (division.empty()) {
// It is possible to blockize if we can not do perfect subspace division if we can divide
// the block var bindings into two categories
Expand Down
5 changes: 2 additions & 3 deletions src/tir/transforms/compact_buffer_region.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ Region SimplifyAndNarrowBufferRegionFromNDIntSet(const NDIntSet& nd_int_set,
for (size_t i = 0; i < nd_int_set.size(); ++i) {
const arith::IntSet& int_set = nd_int_set[i];
Range range = int_set.CoverRange(Range(/*begin=*/0, /*end=*/original_shape[i]));
result.push_back(
Range::FromMinExtent(analyzer->Simplify(range->min),
analyzer->Simplify(min(original_shape[i], range->extent))));
result.push_back(Range::FromMinExtent(
analyzer->Simplify(range->min), analyzer->Simplify(min(original_shape[i], range->extent))));
}
return result;
}
Expand Down
31 changes: 15 additions & 16 deletions src/tir/transforms/memhammer_lower_auto_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,18 @@
* under the License.
*/

#include <tvm/arith/iter_affine_map.h>
#include <tvm/runtime/registry.h>
#include <tvm/target/target.h>
#include <tvm/tir/expr.h>
#include <tvm/tir/op.h>
#include <tvm/tir/stmt_functor.h>
#include <tvm/tir/transform.h>

#include "../../runtime/thread_storage_scope.h"
#include "../schedule/utils.h"
#include "ir_utils.h"
#include "memhammer_rewrite_rule.h"
#include "tvm/arith/iter_affine_map.h"
#include "tvm/runtime/registry.h"
#include "tvm/target/target.h"
#include "tvm/tir/expr.h"
#include "tvm/tir/op.h"
#include "tvm/tir/stmt_functor.h"
#include "tvm/tir/transform.h"
/*!
* \brief Automatically do memory optimizations for auto copy blocks
* \file memhammer_lower_auto_copy.cc
*/
#include "./ir_utils.h"
#include "./memhammer_rewrite_rule.h"

namespace tvm {
namespace tir {
Expand All @@ -54,7 +51,7 @@ static WmmaToShared wmma_to_shared;
* and choose the one with minimal conflict. However, this algorithm has exponential complexity.
* Suppose we have d dimensions and the padding size is 0-31, we need to calculate bank
* conflict for 32^{d-1} times.
* We propose a fast incremental algorithm that works for affine inputs, and it only caluculate
* We propose a fast incremental algorithm that works for affine inputs, and it only calculate
* bank conflict for 32*{d-1} times. To be specific, we first decide the optimal padding size for
* dimension d-2, then for dimension d-3, ..., finally for dimension 0. It involves 2 steps.
*
Expand Down Expand Up @@ -550,7 +547,7 @@ class AutoPadder {
}
StmtExprVisitor::VisitExpr_(op);
}

/*!
* \brief Take a typical warp and collect the iteration space for load_matrix_sync and
* store_matrix_sync
Expand Down Expand Up @@ -684,7 +681,7 @@ class AutoCopyMutator : public StmtExprMutator {

Stmt VisitStmt_(const BlockNode* op) final {
Block block;
//only rewrite the block annotated with "auto_copy"
// only rewrite the block annotated with "auto_copy"
if (is_one(Downcast<PrimExpr>(op->annotations.Get("auto_copy").value_or(Integer(0))))) {
block = runtime::Downcast<Block>(StmtMutator::VisitStmt_(op));
ICHECK(block->reads.size() == 1 && block->writes.size() == 1);
Expand Down Expand Up @@ -764,6 +761,7 @@ class ThreadExtentCollector : public StmtVisitor {
};

namespace transform {

Pass LowerAutoCopy() {
auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) {
auto* n = f.CopyOnWrite();
Expand All @@ -776,6 +774,7 @@ Pass LowerAutoCopy() {
}

TVM_REGISTER_GLOBAL("tir.transform.LowerAutoCopy").set_body_typed(LowerAutoCopy);

} // namespace transform
} // namespace tir
} // namespace tvm
Loading