diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h index 6518f122b72e..0a3246ecf5a1 100644 --- a/include/tvm/relay/attrs/nn.h +++ b/include/tvm/relay/attrs/nn.h @@ -987,6 +987,22 @@ struct MatmulAttrs : public tvm::AttrsNode { } }; +/*! \brief Attributes for dense operator */ +struct DenseAttrs : public tvm::AttrsNode { + IndexExpr units; + tvm::String auto_scheduler_rewritten_layout; // The layout after auto-scheduler's layout rewrite + DataType out_dtype; + + TVM_DECLARE_ATTRS(DenseAttrs, "relay.attrs.DenseAttrs") { + TVM_ATTR_FIELD(units).describe("Number of hidden units of the dense transformation."); + + // use 0 bits to indicate none. + TVM_ATTR_FIELD(out_dtype) + .set_default(NullValue()) + .describe("Output data type, set to explicit type under mixed precision setting"); + } +}; + /*! \brief Attributes for batch matmul operator */ struct BatchMatmulAttrs : public tvm::AttrsNode { tvm::String auto_scheduler_rewritten_layout; // The layout after auto-scheduler's layout rewrite diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py index a6cf147cc564..d8d36d3a544d 100644 --- a/python/tvm/relay/op/nn/nn.py +++ b/python/tvm/relay/op/nn/nn.py @@ -1510,7 +1510,7 @@ def matmul(data, weight, units=None, out_dtype="", data_transposed=False, weight return _make.matmul(data, weight, units, out_dtype, data_transposed, weight_transposed) -def dense(data, weight, units=None, out_dtype="", data_transposed=False, weight_transposed=True): +def dense(data, weight, units=None, out_dtype=""): """Dense operator. Applies a linear transformation @@ -1535,21 +1535,11 @@ def dense(data, weight, units=None, out_dtype="", data_transposed=False, weight_ Specifies the output data type for mixed precision dense, of shape `(d_1, d_2, ..., d_n, units)`. - data_transposed : bool, optional - Whether the data tensor is in transposed format. Expected to be False. - - weight_transposed : bool, optional - Whether the weight tensor is in transposed format. Expected to be True. - Returns ------- result : tvm.relay.Expr The computed result. """ - # Add data_transposed & weight_transposed parameters for some API requires to apply - # attrs to this function - assert not data_transposed - assert weight_transposed return _make.dense(data, weight, units, out_dtype) diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py index 323190e309b8..68c745beb027 100644 --- a/python/tvm/relay/op/op_attrs.py +++ b/python/tvm/relay/op/op_attrs.py @@ -66,7 +66,12 @@ class BiasAddAttrs(Attrs): @tvm._ffi.register_object("relay.attrs.MatmulAttrs") class MatmulAttrs(Attrs): - """Attributes for nn.matmul and nn.dense""" + """Attributes for nn.matmul""" + + +@tvm._ffi.register_object("relay.attrs.DenseAttrs") +class DenseAttrs(Attrs): + """Attributes for nn.dense""" @tvm._ffi.register_object("relay.attrs.SoftmaxAttrs") diff --git a/rust/tvm/src/ir/relay/attrs/nn.rs b/rust/tvm/src/ir/relay/attrs/nn.rs index e77972e45f86..e0a1d5bf02cd 100644 --- a/rust/tvm/src/ir/relay/attrs/nn.rs +++ b/rust/tvm/src/ir/relay/attrs/nn.rs @@ -66,6 +66,16 @@ pub struct MatmulAttrsNode { pub weight_transposed: bool, } +#[repr(C)] +#[derive(Object, Debug)] +#[ref_name = "DenseAttrs"] +#[type_key = "relay.attrs.DenseAttrs"] +pub struct DenseAttrsNode { + pub base: BaseAttrsNode, + pub units: IndexExpr, + pub out_dtype: DataType, +} + #[repr(C)] #[derive(Object, Debug)] #[ref_name = "GlobalPool2DAttrs"] diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc index 20179a4597ed..1cd825618473 100644 --- a/src/relay/op/nn/nn.cc +++ b/src/relay/op/nn/nn.cc @@ -164,18 +164,22 @@ Useful for // ------------------- relay.nn.matmul TVM_REGISTER_NODE_TYPE(MatmulAttrs); +TVM_REGISTER_NODE_TYPE(DenseAttrs); Expr MakeMatmul(Expr data, Expr weight, IndexExpr units, DataType out_dtype, bool data_transposed, bool weight_transposed) { - auto attrs = make_object(); - attrs->units = units; - attrs->out_dtype = out_dtype; - attrs->data_transposed = data_transposed; - attrs->weight_transposed = weight_transposed; if (!data_transposed && weight_transposed) { + auto attrs = make_object(); + attrs->units = units; + attrs->out_dtype = out_dtype; static const Op& dense_op = Op::Get("nn.dense"); return Call(dense_op, {data, weight}, Attrs(attrs), {}); } else { + auto attrs = make_object(); + attrs->units = units; + attrs->out_dtype = out_dtype; + attrs->data_transposed = data_transposed; + attrs->weight_transposed = weight_transposed; static const Op& matmul_op = Op::Get("nn.matmul"); return Call(matmul_op, {data, weight}, Attrs(attrs), {}); } @@ -215,22 +219,20 @@ RELAY_REGISTER_OP("nn.dense") - **out**: `(x1, x2, ..., xn, units)`. )code" TVM_ADD_FILELINE) - .set_attrs_type() + .set_attrs_type() .set_num_inputs(2) .add_argument("data", "nD Tensor", "Input data.") .add_argument("weight", "2D Tensor", "Weight matrix.") .set_support_level(1) - .add_type_rel("Dense", MatmulRel); + .add_type_rel("Dense", MatmulRel); // ------------------- relay.nn.dense // ------------------- relay.nn.contrib_dense_pack // Positional relay function to create dense_pack operator used by frontend FFI. Expr MakeDensePack(Expr data, Expr weight, IndexExpr units, DataType out_dtype) { - auto attrs = make_object(); + auto attrs = make_object(); attrs->units = units; attrs->out_dtype = out_dtype; - attrs->data_transposed = false; - attrs->weight_transposed = true; static const Op& op = Op::Get("nn.contrib_dense_pack"); return Call(op, {data, weight}, Attrs(attrs), {}); } @@ -245,12 +247,12 @@ RELAY_REGISTER_OP("nn.contrib_dense_pack") - **out**: `(x1, x2, ..., xn, units)`. )code" TVM_ADD_FILELINE) - .set_attrs_type() + .set_attrs_type() .set_num_inputs(2) .add_argument("data", "nD Tensor", "Input data.") .add_argument("weight", "3D Tensor", "Packed weight matrix.") .set_support_level(10) - .add_type_rel("DensePack", DensePackRel); + .add_type_rel("DensePack", DensePackRel); // ------------------- relay.nn.contrib_dense_pack // relay.leaky_relu diff --git a/src/relay/op/nn/nn.h b/src/relay/op/nn/nn.h index 60bff5d3c69a..97591981c2a3 100644 --- a/src/relay/op/nn/nn.h +++ b/src/relay/op/nn/nn.h @@ -42,23 +42,28 @@ bool MatmulRel(const Array& types, int num_inputs, const Attrs& attrs, const auto* data = types[0].as(); const auto* weight = types[1].as(); if (data == nullptr) return false; + ICHECK(static_cast(data->shape.size()) != 0); const AttrType* param = attrs.as(); ICHECK(param != nullptr); - - ICHECK(static_cast(data->shape.size()) != 0); + bool data_transposed = false; + bool weight_transposed = true; + if (attrs->IsInstance()) { + data_transposed = param->data_transposed; + weight_transposed = param->weight_transposed; + } const Array& dshape = data->shape; Array oshape = dshape; tvm::PrimExpr reduce = dshape[dshape.size() - 1]; - if (param->data_transposed) { + if (data_transposed) { reduce = dshape[dshape.size() - 2]; oshape.Set((oshape.size() - 2), dshape[oshape.size() - 1]); } if (param->units.defined()) { // validate the weight shape is proper if defined // Assign weight type - const Array& wshape = param->weight_transposed + const Array& wshape = weight_transposed ? Array({param->units, reduce}) : Array({reduce, param->units}); // It is possible for weight to be nullptr in which case we will use @@ -90,12 +95,12 @@ bool MatmulRel(const Array& types, int num_inputs, const Attrs& attrs, } else { ICHECK(static_cast(weight->shape.size()) == 2); if (!data->shape.back().as()) { - ICHECK((param->weight_transposed && reporter->AssertEQ(reduce, weight->shape[1])) || - (!param->weight_transposed && reporter->AssertEQ(reduce, weight->shape[0]))) + ICHECK((weight_transposed && reporter->AssertEQ(reduce, weight->shape[1])) || + (!weight_transposed && reporter->AssertEQ(reduce, weight->shape[0]))) << "MatmulRel: input dimension doesn't match," << " data shape=" << data->shape << ", weight shape=" << weight->shape; } - oshape.Set((oshape.size() - 1), param->weight_transposed ? wshape[0] : wshape[1]); + oshape.Set((oshape.size() - 1), weight_transposed ? wshape[0] : wshape[1]); } } diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc index f47fe71ebab7..6284524bff27 100644 --- a/src/relay/qnn/op/dense.cc +++ b/src/relay/qnn/op/dense.cc @@ -45,8 +45,8 @@ bool QnnDenseRel(const Array& types, int num_inputs, const Attrs& attrs, const auto* data = types[0].as(); const auto* weight = types[1].as(); if (data == nullptr || weight == nullptr) return false; - const auto* param = attrs.as(); - ICHECK(param != nullptr) << "MatmulAttrs cannot be nullptr."; + const auto* param = attrs.as(); + ICHECK(param != nullptr) << "DenseAttrs cannot be nullptr."; ICHECK(data->dtype == DataType::Int(8) || data->dtype == DataType::UInt(8)) << "Expected quantized dense type(int8, uint8) for input but was " << data->dtype; ICHECK(weight->dtype == DataType::Int(8) || weight->dtype == DataType::UInt(8)) @@ -70,24 +70,22 @@ bool QnnDenseRel(const Array& types, int num_inputs, const Attrs& attrs, // Collect the input tensor and output tensor devoid of scale and zero points to reuse Relay // Dense infer type function. Array tensor_types = {types[0], types[1], types[6]}; - return MatmulRel(tensor_types, 3, attrs, reporter); + return DenseRel(tensor_types, 3, attrs, reporter); } // Positional relay function to create quantized dense operator used by frontend FFI. Expr MakeQuantizedDense(Expr data, Expr weight, Expr input_zero_point, Expr kernel_zero_point, Expr input_scale, Expr kernel_scale, IndexExpr units, DataType out_dtype) { - auto attrs = make_object(); + auto attrs = make_object(); attrs->units = std::move(units); attrs->out_dtype = out_dtype; - attrs->data_transposed = false; - attrs->weight_transposed = true; static const Op& op = Op::Get("qnn.dense"); return Call(op, {data, weight, input_zero_point, kernel_zero_point, input_scale, kernel_scale}, Attrs(attrs), {}); } Expr DenseFirstTerm(const Expr& quantized_data, const Expr& quantized_kernel, - const MatmulAttrs* attrs) { + const DenseAttrs* attrs) { return Dense(quantized_data, quantized_kernel, attrs->units, attrs->out_dtype); } @@ -163,7 +161,7 @@ Expr QnnDenseCanonicalize(const Attrs& attrs, const Array& new_args, const auto in_shape = get_shape(arg_types[0]); const int reduction_dim_size = get_const_int(in_shape[1]); - const auto* qnn_dense_attrs = attrs.as(); + const auto* qnn_dense_attrs = attrs.as(); auto term1 = DenseFirstTerm(quantized_data, quantized_kernel, qnn_dense_attrs); auto term2 = DenseSecondTerm(quantized_data, kernel_zero_point); @@ -206,7 +204,7 @@ RELAY_REGISTER_OP("qnn.dense") - **weight**: quantized(int8, unit8) `(units, input_dim)` - **out**: quantized(int32) `(x1, x2, ..., xn, units)`. )code" TVM_ADD_FILELINE) - .set_attrs_type() + .set_attrs_type() .set_num_inputs(6) .add_argument("data", "quantized nD Tensor", "Input data.") .add_argument("weight", "quantized 2D Tensor", "Weight matrix.") diff --git a/src/relay/quantize/realize.cc b/src/relay/quantize/realize.cc index 09572b04ed4b..968628fbfe39 100644 --- a/src/relay/quantize/realize.cc +++ b/src/relay/quantize/realize.cc @@ -280,13 +280,11 @@ Expr DenseRealize(const Call& ref_call, const Array& new_args, const Objec } Expr rdata = Cast(rhs->data, cfg->dtype_weight); - const auto ref_attrs = ref_call->attrs.as(); - auto attrs = make_object(); + const auto ref_attrs = ref_call->attrs.as(); + auto attrs = make_object(); *attrs = *ref_attrs; DataType out_dtype = cfg->dtype_activation; attrs->out_dtype = out_dtype; - attrs->data_transposed = false; - attrs->weight_transposed = true; Expr ret = Call(ref_call->op, {ldata, rdata}, Attrs(attrs), ref_call->type_args); Expr mul = Multiply(lhs->dom_scale, rhs->dom_scale); diff --git a/src/relay/transforms/auto_scheduler_layout_rewrite.cc b/src/relay/transforms/auto_scheduler_layout_rewrite.cc index 8aa5117dee1e..da0bd35a332a 100644 --- a/src/relay/transforms/auto_scheduler_layout_rewrite.cc +++ b/src/relay/transforms/auto_scheduler_layout_rewrite.cc @@ -89,6 +89,8 @@ class FuncMutator : public ExprMutator { updated_attrs = CopyAttrsWithNewLayout(pattr, new_layout); } else if (auto pattr = call->attrs.as()) { updated_attrs = CopyAttrsWithNewLayout(pattr, new_layout); + } else if (auto pattr = call->attrs.as()) { + updated_attrs = CopyAttrsWithNewLayout(pattr, new_layout); } else if (auto pattr = call->attrs.as()) { updated_attrs = CopyAttrsWithNewLayout(pattr, new_layout); } else { @@ -168,6 +170,8 @@ TVM_REGISTER_GLOBAL("relay.attrs.get_auto_scheduler_rewritten_layout") return attrs.as()->auto_scheduler_rewritten_layout; } else if (attrs->IsInstance()) { return attrs.as()->auto_scheduler_rewritten_layout; + } else if (attrs->IsInstance()) { + return attrs.as()->auto_scheduler_rewritten_layout; } else if (attrs->IsInstance()) { return attrs.as()->auto_scheduler_rewritten_layout; } else { diff --git a/src/relay/transforms/combine_parallel_dense.cc b/src/relay/transforms/combine_parallel_dense.cc index 966210c3c882..3cd9cca4fec4 100644 --- a/src/relay/transforms/combine_parallel_dense.cc +++ b/src/relay/transforms/combine_parallel_dense.cc @@ -70,15 +70,15 @@ class ParallelDenseToBatchCombiner : public ParallelOpBatchCombiner { } CHECK_EQ(num_args, 2); - const auto* origin_attrs = branches[0][0]->attrs.as(); + const auto* origin_attrs = branches[0][0]->attrs.as(); ICHECK(origin_attrs); return Downcast(MakeBatchMatmul(new_args[0], new_args[1], origin_attrs->out_dtype)); } virtual bool CanOpsBeCombined(const CallNode* a, const CallNode* b) { StructuralEqual eq; - const auto* attrs_a = a->attrs.as(); - const auto* attrs_b = b->attrs.as(); + const auto* attrs_a = a->attrs.as(); + const auto* attrs_b = b->attrs.as(); ICHECK(attrs_a); ICHECK(attrs_b); const auto* weight_a = a->args[1]->type_as(); @@ -103,8 +103,8 @@ class ParallelDenseToDenseCombiner : public ParallelOpCombiner { bool CanOpsBeCombined(const CallNode* a, const CallNode* b) { StructuralEqual eq; - const auto* attrs_a = a->attrs.as(); - const auto* attrs_b = b->attrs.as(); + const auto* attrs_a = a->attrs.as(); + const auto* attrs_b = b->attrs.as(); const auto* weight_a = a->args[1]->type_as(); const auto* weight_b = b->args[1]->type_as(); ICHECK(attrs_a != nullptr && attrs_b != nullptr && weight_a != nullptr && weight_b != nullptr); @@ -119,13 +119,11 @@ class ParallelDenseToDenseCombiner : public ParallelOpCombiner { IndexExpr new_output_dims; // concat all weights into one std::tie(new_weight, new_output_dims) = TransformWeight(branches); - const auto* origin_attrs = branches[0][0]->attrs.as(); + const auto* origin_attrs = branches[0][0]->attrs.as(); ICHECK(origin_attrs); - const auto dense_attrs = make_object(); + const auto dense_attrs = make_object(); dense_attrs->units = new_output_dims; dense_attrs->out_dtype = origin_attrs->out_dtype; - dense_attrs->data_transposed = false; - dense_attrs->weight_transposed = true; return Call(dense_op, {input, new_weight}, Attrs{dense_attrs}, {}); } diff --git a/tests/python/contrib/test_arm_compute_lib/test_dense.py b/tests/python/contrib/test_arm_compute_lib/test_dense.py index 007a6b9bf8d0..e6620a4bc1cb 100644 --- a/tests/python/contrib/test_arm_compute_lib/test_dense.py +++ b/tests/python/contrib/test_arm_compute_lib/test_dense.py @@ -123,8 +123,6 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False): "shape": [[list(output_shape)]], "dtype": [[dtype]], "units": [[str(units)]], - "data_transposed": [["0"]], - "weight_transposed": [["1"]], }, }