diff --git a/backends/arm/operators/op_avg_pool2d.py b/backends/arm/operators/op_avg_pool2d.py index d84fe40d99..e6d07610c8 100644 --- a/backends/arm/operators/op_avg_pool2d.py +++ b/backends/arm/operators/op_avg_pool2d.py @@ -10,8 +10,8 @@ NodeVisitor, register_node_visitor, ) -from executorch.backends.arm.operators.op_common import build_avg_pool_2d_common from executorch.backends.arm.tosa_mapping import TosaArg +from executorch.backends.arm.tosa_utils import build_avg_pool_2d_common @register_node_visitor diff --git a/backends/arm/operators/op_common.py b/backends/arm/operators/op_common.py deleted file mode 100644 index eadf00c294..0000000000 --- a/backends/arm/operators/op_common.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2024 Arm Limited and/or its affiliates. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import serializer.tosa_serializer as ts -import torch -from executorch.backends.arm.tosa_mapping import TosaArg -from executorch.backends.arm.tosa_quant_utils import get_quant_node_args -from serializer.tosa_serializer import TosaOp - - -def build_avg_pool_2d_common( - node: torch.fx.Node, - tosa_graph: ts.TosaSerializer, - input_tensor: TosaArg, - kernel_size: list, - stride: list, - padding: list, - is_quant_node: bool, - output: TosaArg, -): - accumulator_type = input_tensor.dtype - - if is_quant_node: - # Accumulator type always is int32 when input tensor is an integer type. - accumulator_type = ts.DType.INT32 - - # Initilize zero point to zero. - input_zp = 0 - output_zp = 0 - - if is_quant_node: - input_zp = get_quant_node_args(node.args[0]).zp - output_zp = get_quant_node_args(list(node.users)[0]).zp - - attr = ts.TosaSerializerAttribute() - attr.PoolAttribute( - kernel=kernel_size, - stride=stride, - pad=padding, - input_zp=input_zp, - output_zp=output_zp, - accum_dtype=accumulator_type, - ) - - tosa_graph.addOperator( - TosaOp.Op().AVG_POOL2D, - [input_tensor.name], - [output.name], - attr, - ) diff --git a/backends/arm/operators/op_mean_dim.py b/backends/arm/operators/op_mean_dim.py index 5e8e3d74c0..20e1b2b8d7 100644 --- a/backends/arm/operators/op_mean_dim.py +++ b/backends/arm/operators/op_mean_dim.py @@ -10,8 +10,8 @@ NodeVisitor, register_node_visitor, ) -from executorch.backends.arm.operators.op_common import build_avg_pool_2d_common from executorch.backends.arm.tosa_mapping import TosaArg +from executorch.backends.arm.tosa_utils import build_avg_pool_2d_common @register_node_visitor diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py index 68d090653a..a692b3a270 100644 --- a/backends/arm/tosa_utils.py +++ b/backends/arm/tosa_utils.py @@ -6,11 +6,12 @@ import logging import os -import executorch.backends.arm.tosa_quant_utils as tosa_quant_utils - import numpy as np import serializer.tosa_serializer as ts +import torch from executorch.backends.arm.tosa_mapping import TosaArg + +from executorch.backends.arm.tosa_quant_utils import get_quant_node_args, q_op from executorch.exir.dialects._ops import ops as exir_ops from serializer.tosa_serializer import TosaOp @@ -158,7 +159,7 @@ def is_bias_node_for_addmm(node): # consumer node is addmm is_rank2_linear_bias = ( consumer_node.target == exir_ops.edge.aten.addmm.default - and list(consumer_node.users)[0].target == tosa_quant_utils.q_op + and list(consumer_node.users)[0].target == q_op ) # rank>2 linear layers @@ -170,7 +171,7 @@ def is_bias_node_for_addmm(node): ): consumer_consumer_node = list(consumer_node.users)[0] is_rank_greater_than_2_linear_bias = ( - list(consumer_consumer_node.users)[0].target == tosa_quant_utils.q_op + list(consumer_consumer_node.users)[0].target == q_op ) return is_rank2_linear_bias or is_rank_greater_than_2_linear_bias @@ -189,3 +190,45 @@ def is_consumer_node_depthwise_conv2d(node): return True return False + + +def build_avg_pool_2d_common( + node: torch.fx.Node, + tosa_graph: ts.TosaSerializer, + input_tensor: TosaArg, + kernel_size: list, + stride: list, + padding: list, + is_quant_node: bool, + output: TosaArg, +): + accumulator_type = input_tensor.dtype + + if is_quant_node: + # Accumulator type always is int32 when input tensor is an integer type. + accumulator_type = ts.DType.INT32 + + # Initilize zero point to zero. + input_zp = 0 + output_zp = 0 + + if is_quant_node: + input_zp = get_quant_node_args(node.args[0]).zp + output_zp = get_quant_node_args(list(node.users)[0]).zp + + attr = ts.TosaSerializerAttribute() + attr.PoolAttribute( + kernel=kernel_size, + stride=stride, + pad=padding, + input_zp=input_zp, + output_zp=output_zp, + accum_dtype=accumulator_type, + ) + + tosa_graph.addOperator( + TosaOp.Op().AVG_POOL2D, + [input_tensor.name], + [output.name], + attr, + ) diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py index 90d858f15c..78984f34e5 100644 --- a/backends/vulkan/partitioner/vulkan_partitioner.py +++ b/backends/vulkan/partitioner/vulkan_partitioner.py @@ -4,14 +4,19 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# pyre-strict + import logging -from typing import Any, Dict, final, List, Optional +from typing import Any, Dict, final, List, Mapping, Optional import executorch.backends.vulkan.serialization.vulkan_graph_schema as vk_graph_schema import torch -from executorch.backends.vulkan.partitioner.supported_ops import enumerate_supported_ops +from executorch.backends.vulkan.partitioner.supported_ops import ( + enumerate_supported_ops, + OpList, +) from executorch.backends.vulkan.vulkan_preprocess import VulkanBackend from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.backend.partitioner import ( @@ -30,12 +35,13 @@ class VulkanSupportedOperators(OperatorSupportBase): - _ops = enumerate_supported_ops() + _ops: OpList = enumerate_supported_ops() - def __init__(self, require_dynamic_shape: bool = False): + def __init__(self, require_dynamic_shape: bool = False) -> None: super().__init__() self.require_dynamic_shapes = require_dynamic_shape + # pyre-ignore def node_val_is_compatible(self, node_val: Any) -> bool: # Skip nodes that don't have a value if node_val is None: @@ -94,7 +100,17 @@ def is_linear_permute(self, node: torch.fx.Node) -> bool: return False - def is_node_supported(self, submodules, node: torch.fx.Node) -> bool: + def is_node_supported( + self, submodules: Mapping[str, torch.nn.Module], node: torch.fx.Node + ) -> bool: + r = self._is_node_supported(submodules, node) + if not r and node.op == "call_function": + logging.info(f"Skipping node in Vulkan partitioning: {node.format_node()}") + return r + + def _is_node_supported( + self, submodules: Mapping[str, torch.nn.Module], node: torch.fx.Node + ) -> bool: if self.is_linear_permute(node): return True diff --git a/backends/vulkan/runtime/api/QueryPool.cpp b/backends/vulkan/runtime/api/QueryPool.cpp index e11c44c357..ec6e15404c 100644 --- a/backends/vulkan/runtime/api/QueryPool.cpp +++ b/backends/vulkan/runtime/api/QueryPool.cpp @@ -199,19 +199,22 @@ std::string QueryPool::generate_string_report() { std::stringstream ss; int kernel_name_w = 40; - int global_size_w = 15; + int global_size_w = 25; + int local_size_w = 25; int duration_w = 25; ss << std::left; ss << std::setw(kernel_name_w) << "Kernel Name"; - ss << std::setw(global_size_w) << "Workgroup Size"; + ss << std::setw(global_size_w) << "Global Workgroup Size"; + ss << std::setw(local_size_w) << "Local Workgroup Size"; ss << std::right << std::setw(duration_w) << "Duration (ns)"; ss << std::endl; ss << std::left; ss << std::setw(kernel_name_w) << "==========="; - ss << std::setw(global_size_w) << "=============="; - ss << std::right << std::setw(duration_w) << "==========="; + ss << std::setw(global_size_w) << "====================="; + ss << std::setw(local_size_w) << "===================="; + ss << std::right << std::setw(duration_w) << "============="; ss << std::endl; for (ShaderDuration& entry : shader_durations_) { @@ -221,6 +224,7 @@ std::string QueryPool::generate_string_report() { ss << std::left; ss << std::setw(kernel_name_w) << entry.kernel_name; ss << std::setw(global_size_w) << stringize(entry.global_workgroup_size); + ss << std::setw(local_size_w) << stringize(entry.local_workgroup_size); ss << std::right << std::setw(duration_w) << exec_duration_ns.count(); ss << std::endl; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 471fd2c007..21493833e9 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -99,14 +99,11 @@ ValueRef prepack_biases( api::ShaderInfo shader = get_nchw_to_tensor_shader(*t); - api::utils::uvec3 global_size = t->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - graph.prepack_nodes().emplace_back(new PrepackNode( graph, shader, - global_size, - local_size, + graph.create_global_wg_size(v), + graph.create_local_wg_size(v), vref, v, {t->sizes_ubo()}, @@ -203,17 +200,14 @@ ValueRef prepack_weights( final_sizes, graph.dtype_of(vref), api::kTexture2D, api::kChannelsPacked); vTensorPtr t = graph.get_tensor(v); - api::utils::uvec3 global_size = t->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - api::ShaderInfo shader = get_conv2d_shader(graph, *t, /*prepack_weights = */ true, method, vref); graph.prepack_nodes().emplace_back(new PrepackNode( graph, shader, - global_size, - local_size, + graph.create_global_wg_size(v), + graph.create_local_wg_size(v), vref, v, {t->sizes_ubo(), @@ -343,9 +337,6 @@ void add_conv2d_node( } check_conv_args(*t_in, *t_out); - api::utils::uvec3 global_size = t_out->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - Kernel2dParams kernel_params = create_kernel2d_params( graph, weight, @@ -366,8 +357,8 @@ void add_conv2d_node( graph.execute_nodes().emplace_back(new ExecuteNode( graph, shader, - global_size, - local_size, + graph.create_global_wg_size(out), + graph.create_local_wg_size(out), // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}}, diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index 412dbe9df6..d42be482a6 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -4,6 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# pyre-unsafe + import ctypes import unittest from typing import Tuple @@ -117,7 +119,9 @@ def run_test(memory_layout): program: ExportedProgram = export( model, sample_inputs, dynamic_shapes=dynamic_shapes ) - edge_program: EdgeProgramManager = to_edge(program) + edge_program: EdgeProgramManager = to_edge( + program, compile_config=self._edge_compile_config + ) edge_program = edge_program.transform([I64toI32(), MeanToSumDiv()]) diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index 91174c08f7..80abd46409 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -262,14 +262,6 @@ deps = [ "executorch_no_prim_ops", ] -[targets.xnnpack_dynamic_quant_utils] -buck_targets = [ - "//backends/xnnpack:dynamic_quant_utils", -] -filters = [ - ".cpp$", -] - [targets.xnnpack_schema] buck_targets = [ "//backends/xnnpack/serialization:xnnpack_flatbuffer_header", diff --git a/exir/capture/_config.py b/exir/capture/_config.py index dd0ed94094..d959f10403 100644 --- a/exir/capture/_config.py +++ b/exir/capture/_config.py @@ -4,6 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# pyre-unsafe + from dataclasses import dataclass, field from typing import Dict, List, Optional, Union @@ -38,7 +40,8 @@ class EdgeCompileConfig: _use_edge_ops: bool = True _skip_type_promotion: bool = False # TODO(gasoonjia): remove this - _skip_dim_order: bool = False + # TODO(T192537614): reenanle dim order as default + _skip_dim_order: bool = True @compatibility(is_backward_compatible=False) diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py index 06e9e589e6..ca8ffde813 100644 --- a/exir/emit/test/test_emit.py +++ b/exir/emit/test/test_emit.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# pye-strict +# pyre-unsafe import typing import unittest @@ -866,7 +866,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Success if you use dim_order to_edge( export(model, inputs), - compile_config=exir.EdgeCompileConfig(_check_ir_validity=False), + compile_config=exir.EdgeCompileConfig( + _check_ir_validity=False, _skip_dim_order=False + ), ).to_executorch() def test_emit_multiple_entry_points(self) -> None: diff --git a/exir/sym_util.py b/exir/sym_util.py index 2a55d51a81..64f4b64a32 100644 --- a/exir/sym_util.py +++ b/exir/sym_util.py @@ -4,7 +4,9 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import List, Optional, Set, Union +# pyre-strict + +from typing import Iterable, List, Optional, Set, Union import sympy @@ -39,12 +41,14 @@ def eval_upper_bound(maybe_symint: Union[int, torch.SymInt]) -> int: node = maybe_symint.node shape_env = node.shape_env expr = node.expr - var_range: ValueRanges = bound_sympy(expr, shape_env.var_to_range) + var_range: ValueRanges = bound_sympy( # pyre-ignore[24] + expr, shape_env.var_to_range + ) upper_bound = var_range.upper # This import is needed temporarily until we update the pinned torch version. try: - from torch.utils._sympy.numbers import int_oo # @manual # pyre-ignore + from torch.utils._sympy.numbers import int_oo # @manual except ImportError: int_oo = None @@ -54,15 +58,15 @@ def eval_upper_bound(maybe_symint: Union[int, torch.SymInt]) -> int: concrete_upper, int ), f"Expect upper bound to be a concrete int but got {concrete_upper}" return concrete_upper - elif int_oo is not None and upper_bound is int_oo: # pyre-ignore - return int_oo # pyre-ignore + elif int_oo is not None and upper_bound is int_oo: + return int_oo else: raise RuntimeError( f"Expect upper bound to be sympy.Integer or int_oo. but got {upper_bound}" ) -def eval_shape(shape): +def eval_shape(shape: Iterable[Union[int, torch.SymInt]]): # pyre-ignore[3] """ Shape maybe immutable so we return a new shape. Return None for dimensions that are unbacked e.g. first dimension of nonzero's output. @@ -73,14 +77,16 @@ def eval_shape(shape): return new_shape -def eval_shape_upper_bound(shape) -> List[int]: +def eval_shape_upper_bound(shape: Iterable[Union[int, torch.SymInt]]) -> List[int]: new_shape = [] for _, s in enumerate(shape): new_shape.append(eval_upper_bound(s)) return new_shape -def collect_free_symbols(shape) -> Set[sympy.Symbol]: +def collect_free_symbols( + shape: Iterable[Union[int, torch.SymInt]] +) -> Set[sympy.Symbol]: symset = set() for sz in shape: if not isinstance(sz, torch.SymInt): diff --git a/exir/tests/test_memory_format_ops_pass_utils.py b/exir/tests/test_memory_format_ops_pass_utils.py index 6a97fb96ea..93d790d491 100644 --- a/exir/tests/test_memory_format_ops_pass_utils.py +++ b/exir/tests/test_memory_format_ops_pass_utils.py @@ -4,12 +4,15 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# pyre-unsafe + import unittest from dataclasses import dataclass from typing import Any, Tuple import torch from executorch.exir import to_edge +from executorch.exir.capture._config import EdgeCompileConfig from executorch.exir.dim_order_utils import ( is_channel_last_dim_order, @@ -70,7 +73,7 @@ def memory_format_test_runner( edge_op_str ).run(before.graph_module.code) - epm = to_edge(before) + epm = to_edge(before, compile_config=EdgeCompileConfig(_skip_dim_order=False)) # check op strings FileCheck().check_not(aten_op_str).check_count( diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py index 292cdfd471..61d3af8afb 100644 --- a/exir/tests/test_passes.py +++ b/exir/tests/test_passes.py @@ -1134,7 +1134,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: add = Add() - edge = to_edge(export(add, (torch.ones(1),))) + edge = to_edge( + export(add, (torch.ones(1),)), + compile_config=EdgeCompileConfig(_skip_dim_order=False), + ) edge = edge.transform([ScalarToTensorPass(), RemoveMixedTypeOperators()]) exported_program = lift_constant_tensor_pass(edge.exported_program()) diff --git a/exir/verification/test/test_verifier.py b/exir/verification/test/test_verifier.py index eaf05c6b63..b2e31dbc59 100644 --- a/exir/verification/test/test_verifier.py +++ b/exir/verification/test/test_verifier.py @@ -4,8 +4,11 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# pyre-unsafe + import unittest from contextlib import contextmanager +from typing import Any import torch from executorch.exir import EdgeCompileConfig, to_edge @@ -20,7 +23,7 @@ class TestEdgeDialectVerifier(unittest.TestCase): @contextmanager - def assertNotRaises(self, exc_type): + def assertNotRaises(self, exc_type: Any) -> Any: try: yield None except exc_type: @@ -81,8 +84,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: export_model = export(m, example_input) - # In default we use dim order. - compile_config_without_edge_op = EdgeCompileConfig(_use_edge_ops=False) + compile_config_without_edge_op = EdgeCompileConfig( + _use_edge_ops=False, _skip_dim_order=False + ) edge_manager = to_edge( export_model, compile_config=compile_config_without_edge_op @@ -128,8 +132,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: export_model = export(m, example_input) - # In default we use dim order. - compile_config_with_dim_order = EdgeCompileConfig() + compile_config_with_dim_order = EdgeCompileConfig(_skip_dim_order=False) compile_config_with_stride = EdgeCompileConfig(_skip_dim_order=True) dim_order_edge_model = to_edge( diff --git a/kernels/quantized/cpu/op_dequantize.cpp b/kernels/quantized/cpu/op_dequantize.cpp index 5310feaaef..0722a733cb 100644 --- a/kernels/quantized/cpu/op_dequantize.cpp +++ b/kernels/quantized/cpu/op_dequantize.cpp @@ -196,8 +196,8 @@ Tensor& dequantize_per_channel_out( "Failed to resize out Tensor in dequantize_per_channel_out"); ET_CHECK_MSG( - scale.scalar_type() == ScalarType::Double, - "scale.scalar_type() %" PRId8 " is not double type", + scale.scalar_type() == ScalarType::Float, + "scale.scalar_type() %" PRId8 " is not float type", static_cast(scale.scalar_type())); ET_CHECK_MSG( @@ -224,15 +224,15 @@ Tensor& dequantize_per_channel_out( input, quant_min, quant_max, dtype, out_dtype, out); // a list contains all dimensions except axis - int64_t dims[input.dim() - 1]; + int64_t dims[kTensorDimensionLimit]; for (int64_t i = 0; i < input.dim() - 1; i++) { if (i < axis) { dims[i] = i; } else { - dims[i] = i - 1; + dims[i] = i + 1; } } - const double* scale_data = scale.const_data_ptr(); + const float* scale_data = scale.const_data_ptr(); const int64_t* zero_point_data; if (opt_zero_points.has_value()) { zero_point_data = opt_zero_points.value().const_data_ptr(); @@ -253,8 +253,34 @@ Tensor& dequantize_per_channel_out( // in other words you are dequantizing in_data[in_ix] #define DEQUANTIZE_IMPL(CTYPE_IN, CTYPE_OUT, out_dtype) \ case ScalarType::out_dtype: \ + if (input.dim() == 1) { \ + auto* out_data_ptr = out.mutable_data_ptr(); \ + const auto* input_data_ptr = input.const_data_ptr(); \ + ET_CHECK_MSG( \ + axis == 0, "Axis must be 0 for a single dimensional tensors"); \ + const optional dim; \ + apply_over_dim( \ + [input_data_ptr, out_data_ptr, scale_data, zero_point_data]( \ + size_t numel, size_t stride, size_t base_ix) { \ + for (size_t i = 0; i < numel; i++) { \ + size_t current_ix = base_ix * stride + i; \ + float _scale = scale_data[current_ix]; \ + int64_t zero_point = 0; \ + if (zero_point_data != nullptr) { \ + zero_point = zero_point_data[current_ix]; \ + } \ + out_data_ptr[current_ix] = \ + static_cast( \ + input_data_ptr[current_ix] - zero_point) * \ + _scale; \ + } \ + }, \ + input, \ + dim); \ + break; \ + } \ for (size_t channel_ix = 0; channel_ix < input.size(axis); ++channel_ix) { \ - double _scale = scale_data[channel_ix]; \ + float _scale = scale_data[channel_ix]; \ int64_t _zero_point = 0; \ if (zero_point_data != nullptr) { \ _zero_point = zero_point_data[channel_ix]; \ diff --git a/kernels/quantized/test/op_dequantize_test.cpp b/kernels/quantized/test/op_dequantize_test.cpp index 1004126d04..4cdebb662b 100644 --- a/kernels/quantized/test/op_dequantize_test.cpp +++ b/kernels/quantized/test/op_dequantize_test.cpp @@ -116,11 +116,11 @@ TEST(OpDequantizeOutTest, TensorArgOverload) { TEST(OpDequantizeOutTest, DequantizePerChannel) { TensorFactory tf_byte; - TensorFactory tf_double; + TensorFactory tf_float; TensorFactory tf_long; Tensor input = tf_byte.full({3, 2}, 100); - Tensor scale = tf_double.make({2}, {0.5, 1}); + Tensor scale = tf_float.make({2}, {0.5, 1}); Tensor zero_point = tf_long.make({2}, {30, 60}); int64_t quant_min = 0; int64_t quant_max = 255; @@ -145,7 +145,7 @@ TEST(OpDequantizeOutTest, DequantizePerChannel) { // Test with a different axis out = tfo.zeros({3, 2}); - scale = tf_double.make({3}, {0.5, 0.75, 1}); + scale = tf_float.make({3}, {0.5, 0.75, 1}); zero_point = tf_long.make({3}, {30, 50, 60}); // (100 - 30) * 0.5 // (100 - 50) * 0.75 @@ -163,4 +163,25 @@ TEST(OpDequantizeOutTest, DequantizePerChannel) { out); EXPECT_TENSOR_EQ(out, expected); + + // Test with a different axis + out = tfo.zeros({3}); + input = tf_byte.make({3}, {100, 100, 100}); + scale = tf_float.make({3}, {0.5, 0.75, 1}); + zero_point = tf_long.make({3}, {30, 50, 60}); + // (100 - 30) * 0.5 + // (100 - 50) * 0.75 + // (100 - 60) * 1 + expected = tfo.make({3}, {35, 37.5, 40}); + dequantize_per_channel_out( + input, + scale, + zero_point, + /*axis=*/0, + quant_min, + quant_max, + ScalarType::Byte, + optional(), + out); + EXPECT_TENSOR_EQ(out, expected); } diff --git a/shim/xplat/executorch/build/env_interface.bzl b/shim/xplat/executorch/build/env_interface.bzl index de82ad3ef9..9a97be98e8 100644 --- a/shim/xplat/executorch/build/env_interface.bzl +++ b/shim/xplat/executorch/build/env_interface.bzl @@ -129,6 +129,7 @@ def _remove_unsupported_kwargs(kwargs): kwargs.pop("tags", None) # tags = ["long_running"] doesn't work in oss kwargs.pop("types", None) # will have to find a different way to handle .pyi files in oss kwargs.pop("resources", None) # doesn't support resources in python_library/python_binary yet + kwargs.pop("feature", None) # internal-only, used for Product-Feature Hierarchy (PFH) return kwargs def _patch_headers(kwargs): diff --git a/shim/xplat/executorch/codegen/codegen.bzl b/shim/xplat/executorch/codegen/codegen.bzl index 3fadd20f0d..34a8f81e87 100644 --- a/shim/xplat/executorch/codegen/codegen.bzl +++ b/shim/xplat/executorch/codegen/codegen.bzl @@ -350,7 +350,7 @@ def copy_portable_header_files(name): default_outs = ["."], ) -def build_portable_lib(name, oplist_header_name): +def build_portable_lib(name, oplist_header_name, feature = None): """Build portable lib from source. We build from source so that the generated header file, selected_op_variants.h, can be used to selectively build the lib for different dtypes. """ @@ -400,6 +400,7 @@ def build_portable_lib(name, oplist_header_name): # via static initializers that run at program startup. # @lint-ignore BUCKLINT link_whole link_whole = True, + feature = feature, ) def executorch_generated_lib( @@ -421,7 +422,8 @@ def executorch_generated_lib( platforms = get_default_executorch_platforms(), compiler_flags = [], kernel_deps = [], - dtype_selective_build = False): + dtype_selective_build = False, + feature = None): """Emits 0-3 C++ library targets (in fbcode or xplat) containing code to dispatch the operators specified in the provided yaml files. @@ -469,6 +471,7 @@ def executorch_generated_lib( fbcode_deps: Additional fbcode deps, can be used to provide custom operator library. compiler_flags: compiler_flags args to runtime.cxx_library dtype_selective_build: In additional to operator selection, dtype selective build further selects the dtypes for each operator. Can be used with model or dict selective build APIs, where dtypes can be specified. Note: this is only available in xplat. + feature: Product-Feature Hierarchy (PFH). For internal use only, required for FoA in production. See: https://fburl.com/wiki/2wzjpyqy """ if functions_yaml_target and aten_mode: fail("{} is providing functions_yaml_target in ATen mode, it will be ignored. `native_functions.yaml` will be the source of truth.".format(name)) @@ -560,7 +563,7 @@ def executorch_generated_lib( # Build portable lib. portable_lib_name = name + "_portable_lib" - build_portable_lib(portable_lib_name, oplist_header_name) + build_portable_lib(portable_lib_name, oplist_header_name, feature) portable_lib = [":{}".format(portable_lib_name)] # Exports headers that declare the function signatures of the C++ functions @@ -583,6 +586,7 @@ def executorch_generated_lib( "//executorch/codegen:macros", "//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix, ], + feature = feature, ) if name in libs: @@ -624,6 +628,7 @@ def executorch_generated_lib( # of //executorch. _is_external_target = True, platforms = platforms, + feature = feature, ) if custom_ops_yaml_target and custom_ops_requires_aot_registration: diff --git a/version.txt b/version.txt index c181bf5996..f28aaa5cd4 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.3.0a0 +0.4.0a0