Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Adreno] Add markup pass of relay tensors for static texture planning #11878

Merged
merged 15 commits into from
Aug 2, 2022
Merged
44 changes: 29 additions & 15 deletions src/relay/transforms/annotate_texture_storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
*
* - AnnotateMemoryScope calls *target.CollectStorageInfo for all target been represented
* in the graph and rewrites graph modifying or inserting of VirtualDevice with required
* memory_scop collected from the CollectStorageInfo
* memory_scope collected from the CollectStorageInfo
*/

#include <tvm/relay/attrs/nn.h>
Expand Down Expand Up @@ -119,9 +119,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
if (call->checked_type().as<TensorTypeNode>()) {
std::string scope = "global.texture";
if (const auto* ttype = call->checked_type().as<TensorTypeNode>()) {
if (ttype->shape.size() == 5) {
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(call)));
}
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(call)));
}
storage_scope_[call].push_back(scope);
} else {
Expand Down Expand Up @@ -175,8 +173,26 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
}
}

/**
* Defines the name of the memory scope which can fit the tensor of required shape
*
* The scope stands for "global" if tensor does not satisfy current flattening rules for textures
* (texture currently has to be 5d tensors with value eq 4 in the last dimension)
*
* The packing layout inside the texture scope (the part after the dash) is defined
* during the shape itself. Hardware can have limitations on the texture spatial dimensions
* we must not exceed these sizes. In addition to the fitting of h/w limitation we want to
* get balanced packing where final spatial sizes of textures will not be too different
* @param shape shape to be analyzed
* @param vd VirtualDevice for the tensors determined of memory scope
* @return string representing memory scope either "global" or "global.texture-layout"
*/
std::string Scope(Array<PrimExpr> shape, const VirtualDevice& vd) {
elvin-n marked this conversation as resolved.
Show resolved Hide resolved
if (vd != VirtualDevice::FullyUnconstrained()) {
// currently we support only textures been made from 5d tensors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO(@csullivan, @elvin-n): Support more layouts with Buffer.axis_separators lowering.

// 5d requirement is not limitation of textures in general, it is limitation how
// we are representing memory scopes/layout and flattening of textures in tir
if (vd != VirtualDevice::FullyUnconstrained() && shape.size() == 5 &&
shape[4].as<IntImmNode>()->value == 4) {
std::map<int, std::string> diffs;
int limit =
vd->target->GetAttr<Integer>("texture_spatial_limit").value_or(Integer(16384))->value;
Expand Down Expand Up @@ -220,13 +236,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {

bool expr_is_rgba_vectorizable = false;
if (const auto* ttype = expr->checked_type().as<TensorTypeNode>()) {
if (ttype->shape.size() == 5) {
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(expr)));
if (scope != "global") {
auto inner_dim = ttype->shape.back().as<IntImmNode>();
if (inner_dim && inner_dim->value == 4) {
expr_is_rgba_vectorizable = true;
}
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(expr)));
if (scope != "global") {
auto inner_dim = ttype->shape.back().as<IntImmNode>();
if (inner_dim && inner_dim->value == 4) {
expr_is_rgba_vectorizable = true;
}
}
}
Expand Down Expand Up @@ -347,11 +361,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
* Currently this workflow supports analysis and rewriting of VirtualDevice for
* Constants and function Variables
*/
class VDRewriter : public transform::DeviceAwareExprMutator {
class RewriteVDStorageScopes : public transform::DeviceAwareExprMutator {
using VarMap = std::unordered_map<Expr, Var, ObjectPtrHash, ObjectPtrEqual>;

public:
explicit VDRewriter(const Map<Expr, Array<String>>& storage_scope)
explicit RewriteVDStorageScopes(const Map<Expr, Array<String>>& storage_scope)
: transform::DeviceAwareExprMutator(Optional<IRModule>()), storage_scope_(storage_scope) {}

Function Rewrite(const Expr& expr) { return Downcast<Function>(Mutate(expr)); }
Expand Down Expand Up @@ -486,7 +500,7 @@ Map<Expr, Array<String>> CollectStorageInfo(const Expr& expr) {
Expr AnnotateMemoryScopeExpr(const Expr& expr, const IRModule& mod, CompilationConfig config) {
auto storage_scope = CollectStorageInfo(expr);
if (storage_scope.size()) {
return VDRewriter(storage_scope).Rewrite(expr);
return RewriteVDStorageScopes(storage_scope).Rewrite(expr);
} else {
return expr;
}
Expand Down
5 changes: 2 additions & 3 deletions tests/python/relay/test_conv2d_nchw_texture.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,6 @@ def test_residual_block():
build_run_compare(mod, params1, {"data": input_shape}, dtype, target, static_memory_scope)



@tvm.testing.requires_opencl
def test_concat():
"""
Expand Down Expand Up @@ -737,7 +736,7 @@ def test_pooling_branching_texture_params():
\ /
add <- to have the only one output, will be fused
| <- buffer
layout_transform (NCHW4c->NCHW)
layout_transform (NCHW4c->NCHW)
"""
target = "opencl --device=adreno"
dtype = "float16"
Expand Down Expand Up @@ -865,7 +864,7 @@ def test_branching_texture_params():
\ /
add <- to have the only one output
| <- buffer
layout_transform (NCHW4c->NCHW)
layout_transform (NCHW4c->NCHW)
"""
target = "opencl --device=adreno"
dtype = "float16"
Expand Down