2024-06-18 nightly release (c6fb9da)

pytorch · Jun 18, 2024 · f774775 · f774775
1 parent e9be241
commit f774775
Show file tree

Hide file tree

Showing 20 changed files with 193 additions and 122 deletions.
diff --git a/backends/arm/operators/op_avg_pool2d.py b/backends/arm/operators/op_avg_pool2d.py
@@ -10,8 +10,8 @@
     NodeVisitor,
     register_node_visitor,
 )
-from executorch.backends.arm.operators.op_common import build_avg_pool_2d_common
 from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_utils import build_avg_pool_2d_common
 
 
 @register_node_visitor

diff --git a/backends/arm/operators/op_common.py b/backends/arm/operators/op_common.py
diff --git a/backends/arm/operators/op_mean_dim.py b/backends/arm/operators/op_mean_dim.py
@@ -10,8 +10,8 @@
     NodeVisitor,
     register_node_visitor,
 )
-from executorch.backends.arm.operators.op_common import build_avg_pool_2d_common
 from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_utils import build_avg_pool_2d_common
 
 
 @register_node_visitor

diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py
@@ -6,11 +6,12 @@
 import logging
 import os
 
-import executorch.backends.arm.tosa_quant_utils as tosa_quant_utils
-
 import numpy as np
 import serializer.tosa_serializer as ts
+import torch
 from executorch.backends.arm.tosa_mapping import TosaArg
+
+from executorch.backends.arm.tosa_quant_utils import get_quant_node_args, q_op
 from executorch.exir.dialects._ops import ops as exir_ops
 from serializer.tosa_serializer import TosaOp
 
@@ -158,7 +159,7 @@ def is_bias_node_for_addmm(node):
     # consumer node is addmm
     is_rank2_linear_bias = (
         consumer_node.target == exir_ops.edge.aten.addmm.default
-        and list(consumer_node.users)[0].target == tosa_quant_utils.q_op
+        and list(consumer_node.users)[0].target == q_op
     )
 
     # rank>2 linear layers
@@ -170,7 +171,7 @@ def is_bias_node_for_addmm(node):
     ):
         consumer_consumer_node = list(consumer_node.users)[0]
         is_rank_greater_than_2_linear_bias = (
-            list(consumer_consumer_node.users)[0].target == tosa_quant_utils.q_op
+            list(consumer_consumer_node.users)[0].target == q_op
         )
 
     return is_rank2_linear_bias or is_rank_greater_than_2_linear_bias
@@ -189,3 +190,45 @@ def is_consumer_node_depthwise_conv2d(node):
             return True
 
     return False
+
+
+def build_avg_pool_2d_common(
+    node: torch.fx.Node,
+    tosa_graph: ts.TosaSerializer,
+    input_tensor: TosaArg,
+    kernel_size: list,
+    stride: list,
+    padding: list,
+    is_quant_node: bool,
+    output: TosaArg,
+):
+    accumulator_type = input_tensor.dtype
+
+    if is_quant_node:
+        # Accumulator type always is int32 when input tensor is an integer type.
+        accumulator_type = ts.DType.INT32
+
+    # Initilize zero point to zero.
+    input_zp = 0
+    output_zp = 0
+
+    if is_quant_node:
+        input_zp = get_quant_node_args(node.args[0]).zp
+        output_zp = get_quant_node_args(list(node.users)[0]).zp
+
+    attr = ts.TosaSerializerAttribute()
+    attr.PoolAttribute(
+        kernel=kernel_size,
+        stride=stride,
+        pad=padding,
+        input_zp=input_zp,
+        output_zp=output_zp,
+        accum_dtype=accumulator_type,
+    )
+
+    tosa_graph.addOperator(
+        TosaOp.Op().AVG_POOL2D,
+        [input_tensor.name],
+        [output.name],
+        attr,
+    )
diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py
@@ -4,14 +4,19 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# pyre-strict
+
 import logging
-from typing import Any, Dict, final, List, Optional
+from typing import Any, Dict, final, List, Mapping, Optional
 
 import executorch.backends.vulkan.serialization.vulkan_graph_schema as vk_graph_schema
 
 import torch
 
-from executorch.backends.vulkan.partitioner.supported_ops import enumerate_supported_ops
+from executorch.backends.vulkan.partitioner.supported_ops import (
+    enumerate_supported_ops,
+    OpList,
+)
 from executorch.backends.vulkan.vulkan_preprocess import VulkanBackend
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.backend.partitioner import (
@@ -30,12 +35,13 @@
 
 
 class VulkanSupportedOperators(OperatorSupportBase):
-    _ops = enumerate_supported_ops()
+    _ops: OpList = enumerate_supported_ops()
 
-    def __init__(self, require_dynamic_shape: bool = False):
+    def __init__(self, require_dynamic_shape: bool = False) -> None:
         super().__init__()
         self.require_dynamic_shapes = require_dynamic_shape
 
+    # pyre-ignore
     def node_val_is_compatible(self, node_val: Any) -> bool:
         # Skip nodes that don't have a value
         if node_val is None:
@@ -94,7 +100,17 @@ def is_linear_permute(self, node: torch.fx.Node) -> bool:
 
         return False
 
-    def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
+    def is_node_supported(
+        self, submodules: Mapping[str, torch.nn.Module], node: torch.fx.Node
+    ) -> bool:
+        r = self._is_node_supported(submodules, node)
+        if not r and node.op == "call_function":
+            logging.info(f"Skipping node in Vulkan partitioning: {node.format_node()}")
+        return r
+
+    def _is_node_supported(
+        self, submodules: Mapping[str, torch.nn.Module], node: torch.fx.Node
+    ) -> bool:
         if self.is_linear_permute(node):
             return True
 

diff --git a/backends/vulkan/runtime/api/QueryPool.cpp b/backends/vulkan/runtime/api/QueryPool.cpp
@@ -199,19 +199,22 @@ std::string QueryPool::generate_string_report() {
   std::stringstream ss;
 
   int kernel_name_w = 40;
-  int global_size_w = 15;
+  int global_size_w = 25;
+  int local_size_w = 25;
   int duration_w = 25;
 
   ss << std::left;
   ss << std::setw(kernel_name_w) << "Kernel Name";
-  ss << std::setw(global_size_w) << "Workgroup Size";
+  ss << std::setw(global_size_w) << "Global Workgroup Size";
+  ss << std::setw(local_size_w) << "Local Workgroup Size";
   ss << std::right << std::setw(duration_w) << "Duration (ns)";
   ss << std::endl;
 
   ss << std::left;
   ss << std::setw(kernel_name_w) << "===========";
-  ss << std::setw(global_size_w) << "==============";
-  ss << std::right << std::setw(duration_w) << "===========";
+  ss << std::setw(global_size_w) << "=====================";
+  ss << std::setw(local_size_w) << "====================";
+  ss << std::right << std::setw(duration_w) << "=============";
   ss << std::endl;
 
   for (ShaderDuration& entry : shader_durations_) {
@@ -221,6 +224,7 @@ std::string QueryPool::generate_string_report() {
     ss << std::left;
     ss << std::setw(kernel_name_w) << entry.kernel_name;
     ss << std::setw(global_size_w) << stringize(entry.global_workgroup_size);
+    ss << std::setw(local_size_w) << stringize(entry.local_workgroup_size);
     ss << std::right << std::setw(duration_w) << exec_duration_ns.count();
     ss << std::endl;
   }

diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
@@ -99,14 +99,11 @@ ValueRef prepack_biases(
 
   api::ShaderInfo shader = get_nchw_to_tensor_shader(*t);
 
-  api::utils::uvec3 global_size = t->image_extents();
-  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
-
   graph.prepack_nodes().emplace_back(new PrepackNode(
       graph,
       shader,
-      global_size,
-      local_size,
+      graph.create_global_wg_size(v),
+      graph.create_local_wg_size(v),
       vref,
       v,
       {t->sizes_ubo()},
@@ -203,17 +200,14 @@ ValueRef prepack_weights(
       final_sizes, graph.dtype_of(vref), api::kTexture2D, api::kChannelsPacked);
   vTensorPtr t = graph.get_tensor(v);
 
-  api::utils::uvec3 global_size = t->image_extents();
-  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
-
   api::ShaderInfo shader =
       get_conv2d_shader(graph, *t, /*prepack_weights = */ true, method, vref);
 
   graph.prepack_nodes().emplace_back(new PrepackNode(
       graph,
       shader,
-      global_size,
-      local_size,
+      graph.create_global_wg_size(v),
+      graph.create_local_wg_size(v),
       vref,
       v,
       {t->sizes_ubo(),
@@ -343,9 +337,6 @@ void add_conv2d_node(
   }
   check_conv_args(*t_in, *t_out);
 
-  api::utils::uvec3 global_size = t_out->image_extents();
-  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
-
   Kernel2dParams kernel_params = create_kernel2d_params(
       graph,
       weight,
@@ -366,8 +357,8 @@ void add_conv2d_node(
   graph.execute_nodes().emplace_back(new ExecuteNode(
       graph,
       shader,
-      global_size,
-      local_size,
+      graph.create_global_wg_size(out),
+      graph.create_local_wg_size(out),
       // Inputs and Outputs
       {{out, api::MemoryAccessType::WRITE},
        {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}},

diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# pyre-unsafe
+
 import ctypes
 import unittest
 from typing import Tuple
@@ -117,7 +119,9 @@ def run_test(memory_layout):
             program: ExportedProgram = export(
                 model, sample_inputs, dynamic_shapes=dynamic_shapes
             )
-            edge_program: EdgeProgramManager = to_edge(program)
+            edge_program: EdgeProgramManager = to_edge(
+                program, compile_config=self._edge_compile_config
+            )
 
             edge_program = edge_program.transform([I64toI32(), MeanToSumDiv()])
 

diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml
@@ -262,14 +262,6 @@ deps = [
   "executorch_no_prim_ops",
 ]
 
-[targets.xnnpack_dynamic_quant_utils]
-buck_targets = [
-  "//backends/xnnpack:dynamic_quant_utils",
-]
-filters = [
-  ".cpp$",
-]
-
 [targets.xnnpack_schema]
 buck_targets = [
   "//backends/xnnpack/serialization:xnnpack_flatbuffer_header",

diff --git a/exir/capture/_config.py b/exir/capture/_config.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# pyre-unsafe
+
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Union
 
@@ -38,7 +40,8 @@ class EdgeCompileConfig:
     _use_edge_ops: bool = True
     _skip_type_promotion: bool = False
     # TODO(gasoonjia): remove this
-    _skip_dim_order: bool = False
+    # TODO(T192537614): reenanle dim order as default
+    _skip_dim_order: bool = True
 
 
 @compatibility(is_backward_compatible=False)

diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# pye-strict
+# pyre-unsafe
 
 import typing
 import unittest
@@ -866,7 +866,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         # Success if you use dim_order
         to_edge(
             export(model, inputs),
-            compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
+            compile_config=exir.EdgeCompileConfig(
+                _check_ir_validity=False, _skip_dim_order=False
+            ),
         ).to_executorch()
 
     def test_emit_multiple_entry_points(self) -> None: