From d34f521bf5f9280c6139483a3e45d39e4427453a Mon Sep 17 00:00:00 2001 From: weilinquan <352977670@qq.com> Date: Sat, 9 Dec 2023 10:10:06 +0800 Subject: [PATCH] [frontend] Add Initial Graph Infra. Co-authored-by: xtayex Co-authored-by: zhanghb97 --- examples/BuddyBert/import-bert.py | 8 +- examples/BuddyGraph/README.md | 23 + examples/BuddyGraph/import-dynamo-break.py | 63 + examples/BuddyLlama/import-llama2.py | 15 +- examples/BuddyLlama/llama-main.cpp | 3 - examples/BuddyPython/bert.py | 10 +- examples/BuddyPython/module_gen.py | 29 +- examples/BuddyResNet18/import-resnet18.py | 45 + examples/lit.cfg.py | 1 + frontend/Interfaces/buddy/LLM/TextContainer.h | 2 +- frontend/Python/frontend.py | 627 ++-- frontend/Python/graph/__init__.py | 23 + frontend/Python/graph/graph.py | 487 ++++ frontend/Python/graph/operation.py | 456 +++ frontend/Python/graph/transform/__init__.py | 1 + .../graph/transform/useless_op_eliminate.py | 66 + frontend/Python/graph/type.py | 79 + frontend/Python/ops/linalg.py | 2590 ++++++----------- frontend/Python/ops/math.py | 11 +- frontend/Python/ops/tosa.py | 583 +++- frontend/Python/ops/utils.py | 56 + requirements.txt | 1 + tests/Python/test_addmm.py | 8 +- tests/Python/test_amax.py | 8 +- tests/Python/test_arange.py | 13 +- tests/Python/test_arith_add.py | 13 +- tests/Python/test_arith_div.py | 12 +- tests/Python/test_arith_mul.py | 15 +- tests/Python/test_arith_sub.py | 8 +- tests/Python/test_bmm.py | 15 +- tests/Python/test_cat.py | 12 +- tests/Python/test_clone.py | 14 +- tests/Python/test_convert_element_type.py | 10 +- tests/Python/test_convolution_default.py | 42 + tests/Python/test_embedding.py | 31 +- tests/Python/test_exp.py | 8 +- tests/Python/test_full.py | 12 +- tests/Python/test_index.py | 12 +- tests/Python/{test_expand.py => test_iota.py} | 27 +- tests/Python/test_lt.py | 14 +- tests/Python/test_masked_fill.py | 12 +- tests/Python/test_max_pool2d.py | 44 + tests/Python/test_mean.py | 26 +- tests/Python/test_mm.py | 12 +- tests/Python/test_neg.py | 14 +- tests/Python/test_ones.py | 12 +- tests/Python/test_permute.py | 8 +- tests/Python/test_pow.py | 12 +- tests/Python/test_reciprocal.py | 36 + tests/Python/test_relu.py | 36 + tests/Python/test_reshape.py | 8 +- tests/Python/test_rsqrt.py | 15 +- tests/Python/test_rsub.py | 20 +- tests/Python/test_select.py | 8 +- tests/Python/test_sigmoid.py | 35 + tests/Python/test_silu.py | 12 +- tests/Python/test_slice.py | 12 +- tests/Python/test_softmax.py | 22 +- tests/Python/test_sqrt.py | 36 + tests/Python/test_squeeze.py | 12 +- tests/Python/test_sum.py | 8 +- tests/Python/test_t.py | 16 +- tests/Python/test_tanh.py | 8 +- tests/Python/test_to_copy.py | 12 +- tests/Python/test_transpose.py | 13 +- tests/Python/test_unsqueeze.py | 12 +- tests/Python/test_var_mean.py | 37 +- tests/Python/test_view.py | 18 +- tests/Python/test_where.py | 38 + 69 files changed, 3766 insertions(+), 2241 deletions(-) create mode 100644 examples/BuddyGraph/README.md create mode 100644 examples/BuddyGraph/import-dynamo-break.py create mode 100644 examples/BuddyResNet18/import-resnet18.py create mode 100644 frontend/Python/graph/__init__.py create mode 100644 frontend/Python/graph/graph.py create mode 100644 frontend/Python/graph/operation.py create mode 100644 frontend/Python/graph/transform/__init__.py create mode 100644 frontend/Python/graph/transform/useless_op_eliminate.py create mode 100644 frontend/Python/graph/type.py create mode 100644 frontend/Python/ops/utils.py create mode 100644 tests/Python/test_convolution_default.py rename tests/Python/{test_expand.py => test_iota.py} (52%) create mode 100644 tests/Python/test_max_pool2d.py create mode 100644 tests/Python/test_reciprocal.py create mode 100644 tests/Python/test_relu.py create mode 100644 tests/Python/test_sigmoid.py create mode 100644 tests/Python/test_sqrt.py create mode 100644 tests/Python/test_where.py diff --git a/examples/BuddyBert/import-bert.py b/examples/BuddyBert/import-bert.py index c2044cb037..92e8e055ea 100644 --- a/examples/BuddyBert/import-bert.py +++ b/examples/BuddyBert/import-bert.py @@ -46,12 +46,16 @@ "attention_mask": torch.tensor([[1 for _ in range(5)]], dtype=torch.int64), } with torch.no_grad(): - module, params = dynamo_compiler.importer(model, **inputs) + graphs = dynamo_compiler.importer(model, **inputs) +assert len(graphs) == 1 +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] +graph.lower_to_top_level_ir(do_params_pack=True) current_path = os.path.dirname(os.path.abspath(__file__)) with open(Path(current_path) / "bert.mlir", "w") as module_file: - module_file.write(str(module)) + module_file.write(str(graph._imported_module)) float32_param = np.concatenate( [param.detach().numpy().reshape([-1]) for param in params[:-1]] diff --git a/examples/BuddyGraph/README.md b/examples/BuddyGraph/README.md new file mode 100644 index 0000000000..d7b977f57e --- /dev/null +++ b/examples/BuddyGraph/README.md @@ -0,0 +1,23 @@ +# Buddy Graph Representation Examples + +## Run the Examples + +0. Enter your Python Env +``` +(base)$ conda activate buddy +(buddy)$ ... +``` +1. Build Python Packages +2. Configure Python Path +``` +(buddy)$ cd buddy-mlir/build +(buddy)$ export BUDDY_MLIR_BUILD_DIR=$PWD +(buddy)$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build +(buddy)$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH} + +``` +3. Run the Examples +``` +(buddy)$ cd examples/BuddyGraph +(buddy)$ python import-dynamo-break.py +``` \ No newline at end of file diff --git a/examples/BuddyGraph/import-dynamo-break.py b/examples/BuddyGraph/import-dynamo-break.py new file mode 100644 index 0000000000..42bbed6030 --- /dev/null +++ b/examples/BuddyGraph/import-dynamo-break.py @@ -0,0 +1,63 @@ +# ===- import-dynamo-break.py -------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# The example for dynamo graph break, import, and execute. +# +# ===--------------------------------------------------------------------------- + +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp +from torch._functorch.aot_autograd import aot_autograd_decompositions + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +class TestModule(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + def forward(self, b, c): + if torch.nn.functional.silu(b)[0][0]: + return torch.add(b, c) + else: + return torch.matmul(b, c) + +# Define a PyTorch model and run it with PyTorch runtime. +model = TestModule() +a, b = torch.randn((1024, 1024)), torch.randn((1024, 1024)) +print(model(a, b)) + +# JIT Mode +# Initialize Buddy Dynamo Compiler to compile and execute the PyTorch model. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=aot_autograd_decompositions +) +model_opt = torch.compile(model, backend=dynamo_compiler) +print(model_opt(a, b)) + +torch._dynamo.reset() + +# AOT Mode +# Import PyTorch model to Buddy Graph and MLIR/LLVM IR. +graphs = dynamo_compiler.importer( + model, a, b +) +for g in graphs: + g.lower_to_top_level_ir() + print(g._imported_module) diff --git a/examples/BuddyLlama/import-llama2.py b/examples/BuddyLlama/import-llama2.py index d5a3a29e1b..47eb9e61ec 100644 --- a/examples/BuddyLlama/import-llama2.py +++ b/examples/BuddyLlama/import-llama2.py @@ -19,11 +19,13 @@ # ===--------------------------------------------------------------------------- import os +import time import numpy import torch from transformers import LlamaForCausalLM, LlamaTokenizer from torch._functorch.aot_autograd import aot_autograd_decompositions +from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler from buddy.compiler.ops import tosa @@ -44,19 +46,22 @@ # Initialize Dynamo Compiler with specific configurations as an importer. dynamo_compiler = DynamoCompiler( primary_registry=tosa.ops_registry, - aot_autograd_decomposition=aot_autograd_decompositions, + aot_autograd_decomposition=inductor_decomp, ) # Import the model into MLIR module and parameters. with torch.no_grad(): - gm, params = dynamo_compiler.importer( - model, torch.tensor([[1 for _ in range(40)]], dtype=torch.int64) - ) + data = torch.tensor([[1 for i in range(40)]], dtype=torch.int64) + graphs = dynamo_compiler.importer(model, data) +assert len(graphs) == 1 +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] +graph.lower_to_top_level_ir(True) path_prefix = os.path.dirname(os.path.abspath(__file__)) # Write the MLIR module to the file. with open(os.path.join(path_prefix, "llama.mlir"), "w") as module_file: - print(gm, file=module_file) + print(graph._imported_module, file=module_file) # Concatenate all parameters into a single numpy array and write to a file. all_param = numpy.concatenate( diff --git a/examples/BuddyLlama/llama-main.cpp b/examples/BuddyLlama/llama-main.cpp index 78b5cec027..55530a01c2 100644 --- a/examples/BuddyLlama/llama-main.cpp +++ b/examples/BuddyLlama/llama-main.cpp @@ -18,12 +18,9 @@ #include #include #include -#include #include #include #include -#include -#include using namespace buddy; diff --git a/examples/BuddyPython/bert.py b/examples/BuddyPython/bert.py index 7f4f004359..e57dc991b1 100644 --- a/examples/BuddyPython/bert.py +++ b/examples/BuddyPython/bert.py @@ -15,6 +15,10 @@ text = "Replace me by any text you'd like." encoded_text = tokenizer(text, return_tensors="pt") with torch.no_grad(): - module, params = dynamo_compiler.importer(model, **encoded_text) - print(module) - print(params) + graphs = dynamo_compiler.importer(model, **encoded_text) + +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] +graph.lower_to_top_level_ir(do_params_pack=True) +print(graph._imported_module) +print(params) diff --git a/examples/BuddyPython/module_gen.py b/examples/BuddyPython/module_gen.py index 10a1e2ee1c..e2c722cebf 100644 --- a/examples/BuddyPython/module_gen.py +++ b/examples/BuddyPython/module_gen.py @@ -43,23 +43,12 @@ def foo(x, y): aot_autograd_decomposition=inductor_decomp, ) -# The first way to generate an MLIR Module: -# Pass the function and input data to the dynamo compiler's importer, -# and accepts the generated module and weight parameters. -module, params = dynamo_compiler.importer(foo, *(float32_in1, float32_in2)) - -print(module) -print(params) - -# The second way to generate an MLIR Module: -# Execute the target function using a define-by-run style, -# and get the module and weight parameters from the dynamo compiler's attribute. -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) - -foo_mlir(float32_in1, float32_in2) -print(dynamo_compiler.imported_module) -print(dynamo_compiler.imported_params) - -foo_mlir(int32_in1, int32_in2) -print(dynamo_compiler.imported_module) -print(dynamo_compiler.imported_params) +# Pass the function and input data to the dynamo compiler's importer, the +# importer will first build a graph. Then, lower the graph to top-level IR. +# (tosa, linalg, etc.). Finally, accepts the generated module and weight parameters. +graphs = dynamo_compiler.importer(foo, *(float32_in1, float32_in2)) +graph = graphs[0] +graph.lower_to_top_level_ir(do_params_pack=True) + +print(graph._imported_module) +print(dynamo_compiler.imported_params[graph]) diff --git a/examples/BuddyResNet18/import-resnet18.py b/examples/BuddyResNet18/import-resnet18.py new file mode 100644 index 0000000000..c58f4a604a --- /dev/null +++ b/examples/BuddyResNet18/import-resnet18.py @@ -0,0 +1,45 @@ +# ===- import-resnet18.py ------------------------------------------------------ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the test of resnet18 model. +# +# ===--------------------------------------------------------------------------- + +import torch +import torchvision +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +model = torchvision.models.resnet18() +model = model.eval() + +# Initialize Dynamo Compiler with specific configurations as an importer. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +data = torch.randn([1, 3, 224, 224]) +# Import the model into MLIR module and parameters. +with torch.no_grad(): + graphs = dynamo_compiler.importer(model, data) + +assert len(graphs) == 1 +graphs[0].lower_to_top_level_ir(do_params_pack=True) +print(graphs[0]._imported_module) diff --git a/examples/lit.cfg.py b/examples/lit.cfg.py index fa37a2e51f..724d9cdaad 100644 --- a/examples/lit.cfg.py +++ b/examples/lit.cfg.py @@ -36,6 +36,7 @@ # subdirectories contain auxiliary inputs for various tests in their parent # directories. config.excludes = [ + 'BuddyBert', 'BuddyLlama', 'BuddyBert', 'ConvOpt', diff --git a/frontend/Interfaces/buddy/LLM/TextContainer.h b/frontend/Interfaces/buddy/LLM/TextContainer.h index b5e307abdf..28432b3c19 100644 --- a/frontend/Interfaces/buddy/LLM/TextContainer.h +++ b/frontend/Interfaces/buddy/LLM/TextContainer.h @@ -325,7 +325,7 @@ template std::string Text::revertLlama() { const int CLS_ID = 1; const int SEP_ID = 2; - for (size_t i = 0; i < this->getSize(); i++) { + for (size_t i = 0; i < this->tokenCnt; i++) { int id = this->aligned[i]; if (id == PAD_ID || id == CLS_ID) continue; diff --git a/frontend/Python/frontend.py b/frontend/Python/frontend.py index 24002fd641..e89597800c 100644 --- a/frontend/Python/frontend.py +++ b/frontend/Python/frontend.py @@ -16,32 +16,44 @@ # # This is the entry of the Buddy Compiler frontend. # +# TODO[Low]: When integrating more frameworks, `frontend.py` acts as a unified +# entry and driver, separating out compilers/importers for various platforms +# (e.g. DynamoCompiler). +# # ===--------------------------------------------------------------------------- -import operator from typing import Any, List, Optional -import functools +import operator +import os +import ctypes +import platform -import mlir.dialects.func as func import mlir.ir as ir +import mlir.dialects.func as func +from mlir.passmanager import * +from mlir.execution_engine import * +from mlir import runtime as rt import torch import torch._dynamo as dynamo from torch._functorch.aot_autograd import aot_module_simplified import torch.utils._pytree as pytree -from .ops.math import ops_registry as math_ops_registry -from .ops.tosa import ops_registry as tosa_ops_registry from .ops.linalg import ops_registry as linalg_ops_registry +from .ops.tosa import ops_registry as tosa_ops_registry +from .ops.math import ops_registry as math_ops_registry +from .graph import Graph, TensorDType, TensorMeta +from .graph.operation import * +from .graph.transform import maxpool2d_simplify class DynamoCompiler: """ Dynamo Compiler is one of the frontends of Buddy Compiler. - Dynamo Compiler acts as a custom compiler for the Torch Dynamo framework, - which converts an FX Graph into an equivalent MLIR module. + Dynamo Compiler acts as a custom compiler for the TorchDynamo framework, + which converts an FX Graph into an equivalent Buddy Graph and MLIR module. Attributes: - imported_module: The imported MLIR module after compilation. + imported_graphs: The imported graphs. imported_params: The imported parameters from the model. """ @@ -50,80 +62,279 @@ def __init__( func_name: str = "forward", primary_registry: Optional[dict] = None, aot_autograd_decomposition: Optional[dict] = None, - do_param_pack: bool = True, ) -> None: """ Initializes the Dynamo Compiler. Args: - func_name (str, optional): The function name to be used. + func_name: The function name to be used. primary_registry (dict, optional): The primary operations registry. aot_autograd_decomposition (Optional[dict], optional): - The ahead-of-time autograd decomposition dictionary. + The ahead-of-time autograd decomposition dictionary. + Attributes: + _func_name: The function name to be used. + _aot_autograd_decomposition (Optional[dict], optional): + The ahead-of-time autograd decomposition dictionary. + _imported_graphs: The buddy graphs from dynamo importer. + _ops_registry (dict, optional): The buddy operations' lower func + registry. + _imported_params: The model params extract from torch. + _ops_map: The torch aten ops map with buddy ops. + """ if primary_registry is None: primary_registry = {} self._func_name = func_name self._aot_autograd_decomposition = aot_autograd_decomposition - self._imported_module = None - self._imported_params = None - self._do_param_pack = do_param_pack + self._imported_graphs = [] self._ops_registry = {} + self._imported_params = {} self._ops_registry.update(math_ops_registry) self._ops_registry.update(linalg_ops_registry) self._ops_registry.update(tosa_ops_registry) self._ops_registry.update(primary_registry) + self._ops_map = { + "output": OutputOp, + "placeholder": PlaceholderOp, + "arange.start": ArangeOp, + "arange.default": ArangeOp, + "unsqueeze.default": UnsqueezeOp, + "view.default": ViewOp, + "ones.default": OnesOp, + "full.default": FullOp, + "lt.Tensor": LessThanOp, + "embedding.default": EmbeddingOp, + "masked_fill.Scalar": MaskedFillOp, + "slice.Tensor": SliceOp, + "expand.default": ExpandOp, + "_to_copy.default": ToCopyOp, + "rsub.Scalar": RsubOp, + "pow.Tensor_Scalar": PowOp, + "mean.dim": MeanOp, + "rsqrt.default": RsqrtOp, + "mul.Tensor": MulOp, + "t.default": TOp, + "mm.default": MatmulOp, + "transpose.int": TransposeOp, + "index.Tensor": IndexOp, + "neg.default": NegOp, + "cat.default": CatOp, + "squeeze.dim": SqueezeOp, + "bmm.default": BatchMatmulOp, + "div.Tensor": DivOp, + "_softmax.default": SoftmaxOp, + "clone.default": CloneOp, + "silu.default": SiluOp, + "add.Tensor": AddOp, + "addmm.default": AddMMOp, + "permute.default": PermuteOp, + "convert_element_type.default": ConvertElementTypeOp, + "sum.dim_IntList": SumDimOp, + "tanh.default": TanhOp, + "sub.Tensor": SubOp, + "var_mean.correction": VarMeanOp, + "amax.default": AmaxOp, + "select.int": SelectOp, + "exp.default": ExpOp, + "erf.default": ErfOp, + "getitem": GetItemOp, + "convolution.default": Conv2dOp, + "max_pool2d_with_indices.default": MaxPool2dWithIndicesOp, + "relu.default": ReluOp, + "iota.default": IotaOp, + "sigmoid.default": SigmoidOp, + "scalar_tensor.default": ScalarTensorOp, + "where.self": WhereOp, + "sqrt.default": SqrtOp, + "reciprocal.default": ReciprocalOp, + } @property - def imported_module(self): - """Returns the imported MLIR module after compilation.""" - return self._imported_module + def imported_graphs(self): + """Returns the imported buddy graphs after compilation.""" + return self._imported_graphs @property def imported_params(self): - """Returns the imported parameters from the model.""" + """Returns the imported model params after compilation.""" return self._imported_params + def _torch_dtype_translate(self, dtype): + match dtype: + case "torch.int64": + return TensorDType.Int64 + case "torch.int32": + return TensorDType.Int32 + case "torch.float16": + return TensorDType.Float16 + case "torch.float32": + return TensorDType.Float32 + case "torch.float64": + return TensorDType.Float64 + case "torch.bool": + return TensorDType.Bool + case _: + raise NotImplementedError(f"Unsupported dtype: {dtype}") + + def _create_node( + self, + gm_node_name: str, + node_name: str, + node_input: Tuple, + node_users: List[str], + node_output_shape: list = [], + node_output_dtype: TensorDType = None, + node_kwargs: Optional[Dict] = None, + ): + """ + Create buddy op node from torch aten op. + + Args: + gm_node_name: The op node class map to buddy op by _ops_map. + node_name: The op node name to be used. + node_input: The args input to op node. + node_output_shape: The list of the op node's output shape. + node_output_dtype: The TensorDType enum type of the op node's output + data type. + node_kwargs: The restful attributes for op node. + """ + op_class = self._ops_map[gm_node_name] + buddy_node = op_class() + buddy_node._name = node_name + if gm_node_name == "output": + for input_arg in node_input[0]: + buddy_node.add_argument(str(input_arg)) + return buddy_node + for input_arg in node_input: + if isinstance(input_arg, torch.fx.Node): + buddy_node.add_argument(str(input_arg)) + buddy_node.add_parent(str(input_arg)) + elif isinstance(input_arg, torch.dtype): + buddy_node.add_argument(self._torch_dtype_translate(str(input_arg))) + else: + buddy_node.add_argument(input_arg) + for user in node_users: + buddy_node.add_children(user) + if node_kwargs is None: + node_kwargs = {} + buddy_node._keyword_arguments.update(node_kwargs) + buddy_node._tensor_meta["shape"] = node_output_shape + buddy_node._tensor_meta["dtype"] = node_output_dtype + return buddy_node + def _compile_fx( self, gm: torch.fx.GraphModule, inputs: List[torch.Tensor] ) -> Any: """ - Compiles the provided FX Graph to MLIR module. + Compiles the provided FX Graph to Buddy Graph. Args: gm (torch.fx.GraphModule): The GraphModule to be compiled. inputs (List[torch.Tensor]): The input tensors. Returns: - Any: The result of the ahead-of-time compiled module. + dynamo_run: The function of the ahead-of-time compiled module, + return for torchdynamo's call. """ - def _compiler(_gm: torch.fx.GraphModule, _inputs: List[torch.Tensor]): - """Compile a FX graph in Aten/Prims IR to MLIR.""" - func_params = _inputs[: len(self.imported_params)] - func_inputs = _inputs[len(self.imported_params) :] - - # Initializes the MLIR context. - ctx = ir.Context() - with ir.Location.unknown(ctx): - fx_importer = FXGraphImporter( - _gm, - func_params, - func_inputs, - self._do_param_pack, - self._func_name, - self._ops_registry, - ) - self._imported_module = fx_importer.import_graph() - # TODO: Lower to LLVM dialect and use JIT engine to execute. - return _gm.forward - params = { **dict(gm.named_parameters(remove_duplicate=False)), **dict(gm.named_buffers(remove_duplicate=False)), } params_flat, _ = pytree.tree_flatten(params) - self._imported_params = params_flat + + def _compiler(_gm: torch.fx.GraphModule, _inputs: List[torch.Tensor]): + """Compile a FX graph in Aten/Prims IR to MLIR.""" + nonlocal params_flat + func_inputs = [] + for inp in _inputs[len(params_flat) :]: + inp_shape = inp.shape + inp_dtype = self._torch_dtype_translate(str(inp.dtype)) + func_inputs.append(TensorMeta(inp_shape, inp_dtype)) + fake_params = [] + for param in params_flat: + param_dtype = self._torch_dtype_translate(str(param.dtype)) + fake_params.append(TensorMeta(param.shape, param_dtype)) + graph = Graph( + func_inputs, + fake_params, + self._ops_registry, + self._func_name, + ) + for gm_node in _gm.graph.nodes: + node_users = [] + for user in gm_node.users.keys(): + node_users.append(str(user)) + if gm_node.op == "placeholder": + node_dtype = self._torch_dtype_translate( + str(gm_node.meta["tensor_meta"].dtype) + ) + buddy_node = self._create_node( + gm_node.op, + gm_node.name, + gm_node.args, + node_users, + gm_node.meta["tensor_meta"].shape, + node_dtype, + ) + + elif gm_node.op == "output": + buddy_node = self._create_node( + gm_node.op, + gm_node.name, + gm_node.args, + node_users + ) + + elif gm_node.target is operator.getitem: + node_dtype = self._torch_dtype_translate( + str(gm_node.meta["tensor_meta"].dtype) + ) + buddy_node = self._create_node( + str(gm_node.target.__name__), + gm_node.name, + gm_node.args, + node_users, + gm_node.meta["tensor_meta"].shape, + node_dtype, + ) + + else: + tensor_meta = gm_node.meta.get("tensor_meta") + val = gm_node.meta.get("val") + num_returns = len(gm_node.target._schema.returns) + if num_returns == 1: + node_dtype = self._torch_dtype_translate( + str(tensor_meta.dtype) + ) + node_shape = tensor_meta.shape + elif num_returns > 1: + node_dtype = tuple( + [ + self._torch_dtype_translate(str(val_item.dtype)) + for val_item in val + ] + ) + node_shape = tuple([val_item.shape for val_item in val]) + else: + raise RuntimeError("Zero returns is not supported.") + + buddy_node = self._create_node( + str(gm_node.target.__name__), + gm_node.name, + gm_node.args, + node_users, + node_shape, + node_dtype, + node_kwargs=gm_node.kwargs, + ) + + graph.add_node(buddy_node) + transform_list = [maxpool2d_simplify] + graph.perform(transform_list) + self._imported_graphs.append(graph) + self._imported_params[graph] = params_flat + return self.dynamo_run() return aot_module_simplified( gm, @@ -143,11 +354,12 @@ def __call__( inputs (List[torch.Tensor]): The input tensors. Returns: - Any: The result of the ahead-of-time compiled module. + dynamo_run: The function of the ahead-of-time compiled module, + return for torchdynamo's call. """ return self._compile_fx(gm, inputs) - def importer(self, model, *args, **kwargs): + def importer(self, model, *args, **kwargs) -> List[Graph]: """ Imports the provided model as MLIR module and flat parameters. @@ -157,212 +369,145 @@ def importer(self, model, *args, **kwargs): kwargs: Keyword arguments for the model. Returns: - module: The imported MLIR module. - params: The imported flat parameters. + imported_graphs: The imported buddy graphs. """ model_opt = dynamo.optimize(self._compile_fx)(model) model_opt(*args, **kwargs) - module = self._imported_module - params = self._imported_params - return module, params - - -class FXGraphImporter: - """ - Imports an FX graph and generates an MLIR module in high-level dialects. - - Attributes: - _symbol_table (dict): A dictionary to keep track of the symbols. - _gm (torch.fx.GraphModule): The FX graph module to be imported. - _func_name (str): Name of the generated MLIR function. - _inputs (List[torch.Tensor]): Input tensor(s) of the FX graph. - _num_input_visited (int): Number of input nodes that have been visited. - _module (mlir.ir.Module): The generated MLIR module. - _ops_registry (dict): Registry for the candidate operations. - """ - - def __init__( - self, - gm: torch.fx.GraphModule, - params: List[torch.Tensor], - inputs: List[torch.Tensor], - do_param_pack: bool = True, - func_name: str = "forward", - ops_registry: Optional[dict] = None, - ): - """ - Initializes the FX Graph importer. - - Args: - gm (torch.fx.GraphModule): The FX graph that will be imported. - inputs (List[torch.Tensor]): Input tensor(s) of the FX graph. - func_name (str): Name of the generated MLIR function. - ops_registry (dict): Registry for the candidate operations. - """ - if ops_registry is None: - ops_registry = {} - self._symbol_table = {} - self._gm = gm - self._func_name = func_name - self._params = params - self._inputs = inputs - self._do_param_pack = do_param_pack - self._param_packs = [] - self._num_input_visited = 0 - self._module = ir.Module.create() - self._ops_registry = ops_registry - self._current_param_pack_offset = None - - def _torch_dtype_to_mlir_dtype(self, dtype: torch.dtype) -> ir.Type: - """ - Converts a torch dtype to the corresponding MLIR dtype. - - Args: - dtype (torch.dtype): The torch data type. + return self._imported_graphs - Returns: - mlir.ir.Type: The corresponding MLIR data type. - - Raises: - NotImplementedError: If the given dtype is not supported. - """ - match dtype: - case torch.int32: - return ir.IntegerType.get_signless(32) - case torch.int64: - return ir.IntegerType.get_signless(64) - case torch.float32: - return ir.F32Type.get() - case torch.bool: - return ir.IntegerType.get_signless(1) - case _: - raise NotImplementedError(f"Unsupported dtype {dtype}") - - def _pack_params(self) -> None: - dtypes = list(set([param.dtype for param in self._params])) - dtypes.sort(key=str) - self._current_param_pack_offset = {dtype: 0 for dtype in dtypes} - for dtype in dtypes: - params_of_dtype = [ - param for param in self._params if param.dtype == dtype - ] - param_total_size = 0 - for param in params_of_dtype: - param_total_size += functools.reduce( - lambda x, y: x * y, list(param.shape) - ) - mlir_dtype = self._torch_dtype_to_mlir_dtype(dtype) - self._param_packs.append( - ir.RankedTensorType.get([param_total_size], mlir_dtype) - ) - - def import_graph(self) -> ir.Module: + def dynamo_run(self): """ - Imports FX graph and generates an MLIR module in high-level dialects. + A callable method that wraps around the `exec_buddy_graph` method. Returns: - mlir.ir.Module: An MLIR module in high-level dialects. - """ - with ir.InsertionPoint(self._module.body): - arguments = [] - if self._do_param_pack: - self._pack_params() - arguments.extend(self._param_packs) - inputs = self._inputs - else: - inputs = self._params + self._inputs - for arg in inputs: - shape_list = list(arg.shape) - torch_dtype = arg.dtype - mlir_dtype = self._torch_dtype_to_mlir_dtype(torch_dtype) - tensor_arg = ir.RankedTensorType.get(shape_list, mlir_dtype) - arguments.append(tensor_arg) - - @func.FuncOp.from_py_func(*arguments, name=self._func_name) - def generated_func(*args): - args_list = list(args) - for node in self._gm.graph.nodes: - if not ( - node.op in ["output", "placeholder", "call_function"] - or node.target is operator.getitem - ): - continue - if node.op == "output": - output_node_args = node.args[0] - returns = [ - self._symbol_table.get((str(output_arg), 0)) - for output_arg in output_node_args - ] - self._symbol_table[("output", 0)] = returns - elif node.op == "placeholder": - self._import_placeholder(node, args_list) - elif node.target is operator.getitem: - self._symbol_table[ - (str(node.name), 0) - ] = self._symbol_table[ - (str(node.args[0]), node.args[1]) - ] - else: - self._import_op(node) - - return self._symbol_table.get(("output", 0)) - - return self._module - - def _import_placeholder( - self, node: torch.fx.Node, args_list: List[ir.BlockArgument] - ): + exec_buddy_graph: The function of the ahead-of-time compiled module, + return for torchdynamo's call. """ - Imports a placeholder node from the FX graph. - Args: - node (torch.fx.Node): The FX node representing the placeholder. - args_list (List[mlir.ir.BlockArgument]): List of input tensors. - """ - if self._num_input_visited < len(self._params): - dtype = node.meta["tensor_meta"].dtype - pack_of_dtype = None - for pack in args_list: - if ir.RankedTensorType( - pack.type - ).element_type == self._torch_dtype_to_mlir_dtype(dtype): - pack_of_dtype = pack - break - placeholder_name = self._ops_registry["param.extract"]( - node, self._current_param_pack_offset[dtype], pack_of_dtype - ).result - self._current_param_pack_offset[dtype] += functools.reduce( - lambda x, y: x * y, list(node.meta["tensor_meta"].shape) - ) - else: - if len(self._params) > 0: - placeholder_name = args_list[ - self._num_input_visited - - len(self._params) - + len(self._param_packs) - ] + def get_lib_extension(): + if platform.system() == "Linux": + return ".so" + elif platform.system() == "Darwin": + return ".dylib" else: - placeholder_name = args_list[self._num_input_visited] - - self._symbol_table[(str(node.name), 0)] = placeholder_name - self._num_input_visited += 1 - - def _import_op(self, node: torch.fx.Node): - """ - Imports an operation node from the FX graph. - - Args: - node (torch.fx.Node): The FX node representing the operation. - - """ - op_name = node.target.__name__ - op_ret: ir.Operation | ir.Value | tuple | ir.OpResult = ( - self._ops_registry[op_name](node, self._symbol_table) + raise RuntimeError("Unsupported platform") + + # Dynamo's graph break may import more than one graph. + graph = self._imported_graphs[-1] + graph.compile() + # Collect dependency libraries. + lib_extension = get_lib_extension() + lib_names = ["libmlir_runner_utils", "libmlir_c_runner_utils", "libomp"] + path_prefix = os.path.dirname(os.path.abspath(__file__)) + lib_base_path = os.path.join(path_prefix, "../../../../llvm/build/lib/") + lib_base_path = os.path.abspath(lib_base_path) + shared_libs = [ + os.path.join(lib_base_path, lib_name + lib_extension) + for lib_name in lib_names + ] + # Define execution engine. + ee = ExecutionEngine( + graph._imported_module, opt_level=3, shared_libs=shared_libs ) - if isinstance(op_ret, tuple): - for i, operation in enumerate(op_ret): - self._symbol_table[(str(node.name), i)] = operation.result - elif isinstance(op_ret, ir.OpResult): - self._symbol_table[(str(node.name), 0)] = op_ret - else: - self._symbol_table[(str(node.name), 0)] = op_ret.result + + def cast_c_ptr(outdata_ptr, memref_ptr): + """ + Casts a C pointer (`outdata_ptr`) to the type of another C pointer + (`memref_ptr`). + + Args: + outdata_ptr: ctypes.POINTER + The C pointer whose type needs to be cast. + memref_ptr: ctypes.POINTER + The reference C pointer whose type will be used for casting. + + Returns: + ctypes.POINTER + A new C pointer with the type of `memref_ptr`, representing the + same memory location as `outdata_ptr`. + + Example: + outdata = ctypes.pointer(ctypes.c_int()) + memref = ctypes.pointer(ctypes.c_float()) + casted_ptr = cast_c_ptr(outdata, memref) + # Now `casted_ptr` points to the same memory location as `outdata`, + but with the type of `memref`. + """ + outdata_addr = ctypes.addressof(outdata_ptr.contents) + out_ptr = ctypes.cast(outdata_addr, type(memref_ptr)) + return out_ptr + + def move_c_ptr(outdata_ptr, memref_ptr): + """ + Moves a C pointer (`outdata_ptr`) to the next element in memory, + based on the size of the referenced type in another C pointer + (`memref_ptr`). + + Args: + outdata_ptr: ctypes.POINTER + The C pointer whose position needs to be moved. + memref_ptr: ctypes.POINTER + The reference C pointer whose type determines the size of each + element for the move. + + Returns: + ctypes.POINTER + A new C pointer pointing to the next element in memory, based on + the size of the type referenced by `memref_ptr`. + """ + elem_size = ctypes.sizeof(memref_ptr.contents) + outdata_addr = ctypes.addressof(outdata_ptr.contents) + out_ptr = ctypes.cast(outdata_addr + elem_size, type(memref_ptr)) + return out_ptr + + def exec_buddy_graph(*args): + """ + Execute a graph using TorchDynamo with the provided input tensors. + + Args: + *args: List[torch.Tensor] + Input tensors to be passed to the graph's function. + + Returns: + List[torch.Tensor] + The result of executing the graph, represented as a list of + output tensors. + """ + # A list of ctypes pointers representing memory references for input + # tensors. + input_memref = [ + ctypes.pointer( + ctypes.pointer( + rt.get_ranked_memref_descriptor(tensor.numpy()) + ) + ) + for tensor in args + ] + # A list of ctypes pointers representing memory references for + # output tensors. + output_memref = [ + ctypes.pointer(ctypes.pointer(graph._output_descriptor())) + ] + args_memref = output_memref + input_memref + # Invoke the graph's function using the provided execution engine + # and memory references + ee.invoke(graph._func_name, *args_memref) + + output_tensor = [] + outdata_ptr = args_memref[0][0] + # Iterate through each output memory reference in the graph + for output_ptr in graph._output_memref: + # Cast the output data pointer to the type of the current output + # memory reference + data_ptr = cast_c_ptr(outdata_ptr, output_ptr[0]) + # Convert the C data pointer to a NumPy array and append it to + # the output_tensor list + output_tensor.append(rt.ranked_memref_to_numpy(data_ptr)) + # Move to the next element in memory based on the size of the + # current output type + outdata_ptr = move_c_ptr(outdata_ptr, output_ptr[0]) + # Convert each NumPy array to a PyTorch tensor and return the list + # of tensors + return [torch.from_numpy(tensor) for tensor in output_tensor] + + return exec_buddy_graph diff --git a/frontend/Python/graph/__init__.py b/frontend/Python/graph/__init__.py new file mode 100644 index 0000000000..bd927a3c0d --- /dev/null +++ b/frontend/Python/graph/__init__.py @@ -0,0 +1,23 @@ +# ===- __init__.py ------------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# Init the packages in graph directory. +# +# ===--------------------------------------------------------------------------- + +from .graph import Graph +from .operation import * +from .type import TensorDType, TensorMeta diff --git a/frontend/Python/graph/graph.py b/frontend/Python/graph/graph.py new file mode 100644 index 0000000000..be2ce438c4 --- /dev/null +++ b/frontend/Python/graph/graph.py @@ -0,0 +1,487 @@ +# ===- graph.py ---------------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the graph level of the Buddy Compiler frontend. +# +# ===--------------------------------------------------------------------------- + +from typing import Any, List, Optional +from types import FunctionType +import ctypes +import functools + +import numpy as np +import mlir.ir as ir +import mlir.dialects.func as func +from mlir.passmanager import * +from mlir.execution_engine import * +from mlir import runtime as rt + +from .operation import * +from .type import * + + +def make_output_memref_descriptor(ranks, dtypes): + """ + Make an output memref descriptor for the given memref ranks and dtypes. + + Parameters: + - ranks: List[int] + A list of integers representing the ranks of each memref. + - dtypes: List[str] + A list of strings representing the data types of each memref. + + Returns: + ctypes.Structure + An output memref descriptor struct. + + Example: + ranks = [2, 3, 1] + dtypes = [np.float32, np.int64, np.bool] + descriptor = make_output_memref_descriptor(ranks, dtypes) + # Use the descriptor in your code + """ + memref_descriptor = [] + for i, rank, dtype in zip(range(len(ranks)), ranks, dtypes): + memref_descriptor.append( + (str(i), rt.make_nd_memref_descriptor(rank, dtype)) + ) + + class OutputDescriptor(ctypes.Structure): + """Builds an output struct descriptor for the multi memref.""" + + _fields_ = memref_descriptor + + return OutputDescriptor + + +class Graph: + """ + Graph is a graph-level expression for the Buddy Compiler frontends. + It acts as a model compute graph, which converts a Graph into an equivalent + MLIR module. + + Attributes: + - _body: List[Op] + The sequence of operation nodes in the graph. + - _inputs: List[TensorMeta] + The model inputs represented as TensorMeta objects. + - _fake_params: List[TensorMeta] + The fake parameters represented as TensorMeta objects. + - device: str + The hardware for graph runtime. + - _imported_module: Union[None, ImportedModuleType] + The imported MLIR module after compilation, if set. + - _ops_registry: dict + The ops lower strategy for the graph. + - _func_name: str + The function name for the MLIR module. + - _ctx: ir.Context + The context of the MLIR module. + - _output_memref: Union[None, ctypes.POINTER] + The memref pointer in the MLIR function output, if set. + - _output_descriptor: Union[None, OutputDescriptorType] + The output descriptor for the MLIR function, if set. + - ee_: Union[None, ExecutionEngineType] + The execution engine for the graph, if set. + """ + + def __init__( + self, + inputs: List[TensorMeta], + fake_params: List[TensorMeta], + ops_registry: dict, + func_name: str, + ) -> None: + """ + Initializes the Graph. + + Args: + inputs: List[TensorMeta] + The model inputs represented as TensorMeta objects. + fake_params: List[TensorMeta] + The fake parameters represented as TensorMeta objects. + ops_registry: dict + The ops lower strategy for the graph. + func_name: str + The function name for the MLIR module. + """ + self._body = [] + self._inputs = inputs + self.node_table: Dict[str, Op] = {} + self._fake_params = fake_params + self.device = "cpu" + self._imported_module = None + self._ops_registry = ops_registry + self._func_name = func_name + self._ctx = ir.Context() + self._output_memref = None + self._output_descriptor = None + self.execution_engine = None + + def add_node(self, node: Op): + """ + Adds an operation node to the graph's body. + + Parameters: + - node: Op + The operation node to be added to the graph. + + Returns: + None + + Example: + graph_instance = Graph(inputs, fake_params, ops_registry, func_name) + op_node = Op() + graph_instance.add_node(op_node) + # The op_node is now part of the graph's body + """ + self._body.append(node) + self.node_table[node.name] = node + + def perform(self, func_list: List[FunctionType]): + for transform_func in func_list: + transform_func(self) + + def lower_to_top_level_ir(self, do_params_pack=False): + """ + Lowers the graph to top-level MLIR dialects. + + Parameters: + - do_params_pack: bool, optional (default=False) + Flag indicating whether to perform parameters packing to one memref. + + Returns: + None + + Example: + graph_instance = Graph(inputs, fake_params, ops_registry, func_name) + graph_instance.lower_to_top_level_ir(do_params_pack=True) + # The graph is now lowered to top-level MLIR dialects + """ + with ir.Location.unknown(self._ctx): + fx_importer = GraphImporter( + self._body, + self._fake_params, + self._inputs, + do_params_pack, + self._func_name, + self._ops_registry, + ) + self._imported_module = fx_importer.import_graph() + outputs = fx_importer.get_output_nodes() + self._output_memref = [] + output_ranks = [] + output_dtypes = [] + for out_node in outputs: + out_type = ir.RankedTensorType(out_node.type) + shape = list(out_type.shape) + dtype = out_type.element_type + match str(dtype): + case "i1": + np_type = np.dtype(np.bool_) + case "i32": + np_type = np.dtype(np.int32) + case "i64": + np_type = np.dtype(np.int64) + case "f32": + np_type = np.dtype(np.float32) + case _: + raise NotImplementedError(f"Unsupported dtype {dtype}") + self._output_memref.append( + ctypes.pointer( + ctypes.pointer( + rt.make_nd_memref_descriptor( + len(shape), rt.as_ctype(np_type) + )() + ) + ) + ) + output_ranks.append(len(shape)) + output_dtypes.append(rt.as_ctype(np_type)) + self._output_descriptor = make_output_memref_descriptor( + output_ranks, output_dtypes + ) + + def lower_to_llvm_ir(self): + """ + Lower graph to llvm ir. + """ + if self._imported_module is None: + self.lower_to_top_level_ir() + + with ir.Location.unknown(self._ctx): + pm = PassManager("builtin.module") + pm.add("func.func(tosa-to-linalg-named)") + pm.add("func.func(tosa-to-linalg)") + pm.add("func.func(tosa-to-tensor)") + pm.add("func.func(tosa-to-arith)") + pm.run(self._imported_module.operation) + pm.add("arith-expand") + pm.add("eliminate-empty-tensors") + pm.add("empty-tensor-to-alloc-tensor") + pm.add("convert-elementwise-to-linalg") + pm.add('one-shot-bufferize') + pm.add("func.func(convert-linalg-to-affine-loops)") + pm.add("affine-loop-fusion") + pm.add("func.func(affine-parallelize)") + pm.add("lower-affine") + pm.add("convert-scf-to-openmp") + pm.add("func-bufferize") + pm.add("arith-bufferize") + pm.add("func.func(tensor-bufferize)") + pm.add("func.func(buffer-deallocation)") + pm.add("func.func(finalizing-bufferize)") + pm.add("expand-strided-metadata") + pm.add("convert-vector-to-llvm") + pm.add("memref-expand") + pm.add("arith-expand") + pm.add("convert-arith-to-llvm") + pm.add("finalize-memref-to-llvm") + pm.add("convert-scf-to-cf") + pm.add("func.func(llvm-request-c-wrappers)") + pm.add("convert-openmp-to-llvm") + pm.add("convert-math-to-llvm") + pm.add("convert-math-to-libm") + pm.add("convert-func-to-llvm") + pm.add("reconcile-unrealized-casts") + pm.run(self._imported_module.operation) + + def compile(self): + """ + Compile graph from Buddy Graph to LLVM IR. + """ + self.lower_to_top_level_ir() + self.lower_to_llvm_ir() + + +class GraphImporter: + """ + Imports an buddy graph and generates an MLIR module in high-level dialects. + + Attributes: + _symbol_table (dict): A dictionary to keep track of the symbols. + _body (List[Op]): The FX graph module to be imported. + _func_name (str): Name of the generated MLIR function. + _inputs (List[TensorMeta]): Input tensor(s) of the FX graph. + _num_input_visited (int): Number of input nodes that have been visited. + _module (mlir.ir.Module): The generated MLIR module. + _ops_registry (dict): Registry for the candidate operations. + """ + + def __init__( + self, + body: List[Op], + params: List[TensorMeta], + inputs: List[TensorMeta], + do_param_pack: bool, + func_name: str, + ops_registry: dict, + ): + """ + Initializes the buddy Graph importer. + + Args: + gm (Graph): The buddy graph that will be imported. + inputs (List[TensorMeta]): Input tensor(s) of the buddy graph. + func_name (str): Name of the generated MLIR function. + ops_registry (dict): Registry for the candidate operations. + """ + if ops_registry is None: + ops_registry = {} + self._symbol_table = {} + self._body = body + self._func_name = func_name + self._params = params + self._inputs = inputs + self._do_param_pack = do_param_pack + self._param_packs = [] + self._num_input_visited = 0 + self._module = ir.Module.create() + self._ops_registry = ops_registry + self._current_param_pack_offset = None + + def _str_to_mlir_dtype(self, dtype: str) -> ir.Type: + """ + Converts a str to the corresponding MLIR dtype. + + Args: + dtype (str): The tensor type. + + Returns: + mlir.ir.Type: The corresponding MLIR data type. + + Raises: + NotImplementedError: If the given dtype is not supported. + """ + match dtype: + case TensorDType.Int32: + return ir.IntegerType.get_signless(32) + case TensorDType.Int64: + return ir.IntegerType.get_signless(64) + case TensorDType.Float32: + return ir.F32Type.get() + case TensorDType.Bool: + return ir.IntegerType.get_signless(1) + case _: + raise NotImplementedError(f"Unsupported dtype {dtype}") + + def _pack_params(self) -> None: + """ + Packs parameters of the graph to one memref. + + Returns: + None + + Example: + graph_instance = Graph(inputs, fake_params, ops_registry, func_name) + graph_instance._pack_params() + # The parameters of the graph are now packed to one memref. + """ + dtypes = list(set([param.dtype for param in self._params])) + dtypes.sort(key=str) + self._current_param_pack_offset = {dtype: 0 for dtype in dtypes} + for dtype in dtypes: + params_of_dtype = [ + param for param in self._params if param.dtype == dtype + ] + param_total_size = 0 + for param in params_of_dtype: + param_total_size += functools.reduce( + lambda x, y: x * y, list(param.shape), 1 + ) + mlir_dtype = self._str_to_mlir_dtype(dtype) + self._param_packs.append( + ir.RankedTensorType.get([param_total_size], mlir_dtype) + ) + + def import_graph(self) -> ir.Module: + """ + Imports buddy graph and generates an MLIR module in high-level dialects. + + Returns: + mlir.ir.Module: An MLIR module in high-level dialects. + """ + with ir.InsertionPoint(self._module.body): + arguments = [] + if self._do_param_pack: + self._pack_params() + arguments.extend(self._param_packs) + inputs = self._inputs + else: + inputs = self._params + self._inputs + for arg in inputs: + shape_list = list(arg.shape) + dtype = arg.dtype + mlir_dtype = self._str_to_mlir_dtype(dtype) + tensor_arg = ir.RankedTensorType.get(shape_list, mlir_dtype) + arguments.append(tensor_arg) + + @func.FuncOp.from_py_func(*arguments, name=self._func_name) + def generated_func(*args): + args_list = list(args) + for node in self._body: + if isinstance(node, OutputOp): + output_node_args = node.args + returns = [ + self._symbol_table.get((str(output_arg), 0)) + for output_arg in output_node_args + ] + self._symbol_table[("output", 0)] = returns + elif isinstance(node, PlaceholderOp): + self._import_placeholder(node, args_list) + elif isinstance(node, GetItemOp): + self._symbol_table[ + (str(node.name), 0) + ] = self._symbol_table[ + (str(node.args[0]), node.args[1]) + ] + else: + self._import_op(node) + + return self._symbol_table.get(("output", 0)) + + return self._module + + def _import_placeholder( + self, node: PlaceholderOp, args_list: List[ir.BlockArgument] + ): + """ + Imports a placeholder node from the Buddy graph. + + Parameters: + - node (PlaceholderOp): The PlaceholderOp node representing the + placeholder. + - args_list (List[mlir.ir.BlockArgument]): List of input memrefs. + + Returns: + None + """ + if self._num_input_visited < len(self._params) and self._do_param_pack: + dtype = node.tensor_meta["dtype"] + pack_of_dtype = None + for pack in args_list: + if ir.RankedTensorType( + pack.type + ).element_type == self._str_to_mlir_dtype(dtype): + pack_of_dtype = pack + break + placeholder_name = self._ops_registry["param.extract"]( + node, self._current_param_pack_offset[dtype], pack_of_dtype + ).result + self._current_param_pack_offset[dtype] += functools.reduce( + lambda x, y: x * y, list(node.tensor_meta["shape"]), 1 + ) + elif self._do_param_pack: + if len(self._params) > 0: + placeholder_name = args_list[ + self._num_input_visited + - len(self._params) + + len(self._param_packs) + ] + else: + placeholder_name = args_list[self._num_input_visited] + else: + placeholder_name = args_list[self._num_input_visited] + + self._symbol_table[(str(node.name), 0)] = placeholder_name + self._num_input_visited += 1 + + def _import_op(self, node: Op): + """ + Imports an operation node from the buddy graph. + + Args: + node (Op): The buddy node representing the operation. + + """ + op_name = node.__class__.__name__ + op_ret: ir.Operation | ir.Value | tuple | ir.OpResult = ( + self._ops_registry[op_name](node, self._symbol_table) + ) + if isinstance(op_ret, tuple): + for i, operation in enumerate(op_ret): + self._symbol_table[(str(node.name), i)] = operation.result + elif isinstance(op_ret, ir.OpResult): + self._symbol_table[(str(node.name), 0)] = op_ret + else: + self._symbol_table[(str(node.name), 0)] = op_ret.result + + def get_output_nodes(self): + """ + Get output nodes from the lowered mlir func. + """ + return self._symbol_table.get(("output", 0)) diff --git a/frontend/Python/graph/operation.py b/frontend/Python/graph/operation.py new file mode 100644 index 0000000000..550f3f3211 --- /dev/null +++ b/frontend/Python/graph/operation.py @@ -0,0 +1,456 @@ +# ===- operation.py ------------------------------------------------------------ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the operation structure of Buddy Compiler graph representation. +# +# ===--------------------------------------------------------------------------- + +from enum import Enum +from typing import Dict, Optional, List, Tuple + +from .type import TensorDType, TensorMeta + + +class OpType(Enum): + """ + Enum class for declaring operation types. + + Members: + - BroadcastType: int + Represents a broadcast operation. + - ElementwiseType: int + Represents an elementwise operation. + - ReshapeType: int + Represents a reshape operation. + - ReduceType: int + Represents a reduction operation. + - ConcatType: int + Represents a concatenation operation. + - PlaceholderType: int + Represents a placeholder operation. + - GetItemType: int + Represents an operation to retrieve an item. + + Note: The underlying values are integers for these operation types. + """ + + BroadcastType = 0 + ElementwiseType = 1 + ReshapeType = 2 + ReduceType = 3 + ConcatType = 4 + PlaceholderType = 5 + GetItemType = 6 + + +class Op: + """ + Base class for all operations in a computational graph. + + Attributes: + - _name: str + The unique name of the operation node. + - _arguments: list + The input arguments of the operation node. + - _keyword_arguments: dict + The keyword arguments of the operation node. + - _tensor_meta: dict + The metadata of the output tensor, including shape and data type. + - _op_type: OpType + The type of the operation node, as defined in the OpType enum. + """ + + def __init__(self) -> None: + """ + Initialize a new instance of the Op class. + """ + self._name = None + self._arguments = [] + self._keyword_arguments = {} + self._tensor_meta: List[TensorMeta] = {} + self._op_type: OpType = None + self._children: List[str] = [] + self._parents: List[str] = [] + + def add_argument(self, arg): + """ + Add an input argument to the operation node. + + Parameters: + - arg: Any + The input argument to be added. + """ + self._arguments.append(arg) + + def add_parent(self, parent: str): + """ + Add an parent node's name to the operation node. + + Parameters: + - parent: str + The parent node's name to be added. + """ + self._parents.append(parent) + + def add_children(self, child): + """ + Add an user node's name to the operation node. + + Parameters: + - user: str + The user node's name to be added. + """ + self._children.append(child) + + @property + def args(self): + return self._arguments + + @property + def kwargs(self): + return self._keyword_arguments + + @property + def name(self): + return self._name + + @name.setter + def name(self, new_name): + self._name = new_name + + @property + def tensor_meta(self): + return self._tensor_meta + + +class PlaceholderOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + + +class MatmulOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class GetItemOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.GetItemType + + +class OutputOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.GetItemType + + +class ArangeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + + +class UnsqueezeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class ViewOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class EmbeddingOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class OnesOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + + +class FullOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + + +class LessThanOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class MaskedFillOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class SliceOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class ToCopyOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class RsubOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class PowOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class MeanOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class RsqrtOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class MulOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class TransposeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class IndexOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class NegOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class CatOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ConcatType + + +class SqueezeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class BatchMatmulOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class DivOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class SoftmaxOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class CloneOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class SiluOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class AddOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class AddMMOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class AmaxOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class SubOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.BroadcastType + + +class ConvertElementTypeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class ExpOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class ExpandOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class PermuteOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class ReshapeOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class SelectOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class SumDimOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class TanhOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class VarMeanOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + + +class TOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReshapeType + + +class ErfOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + +class Conv2dOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + self._layout = "NCHW_FCHW" + +class ReluOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + +class SigmoidOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + +class IotaOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + +class ScalarTensorOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.PlaceholderType + +class WhereOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + +class MaxPool2dWithIndicesOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + self._layout = "NCHW" + + +class MaxPool2dOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ReduceType + self._layout = "NCHW" + + +class ReciprocalOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + + +class SqrtOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType diff --git a/frontend/Python/graph/transform/__init__.py b/frontend/Python/graph/transform/__init__.py new file mode 100644 index 0000000000..c4b7ac3d16 --- /dev/null +++ b/frontend/Python/graph/transform/__init__.py @@ -0,0 +1 @@ +from .useless_op_eliminate import maxpool2d_simplify \ No newline at end of file diff --git a/frontend/Python/graph/transform/useless_op_eliminate.py b/frontend/Python/graph/transform/useless_op_eliminate.py new file mode 100644 index 0000000000..1b3f592966 --- /dev/null +++ b/frontend/Python/graph/transform/useless_op_eliminate.py @@ -0,0 +1,66 @@ +# ===- maxpool2d_simplify.py --------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# simplify the maxpool2d with getitem. +# +# ===--------------------------------------------------------------------------- + +from .. import Graph +from ..operation import * + + +def maxpool2d_simplify(graph: Graph): + """ + Fuse the maxpool op and getitem op to simpllify graph. + + Args: + graph (torch.fx.GraphModule): The Graph to be simplified. + """ + for i, node in enumerate(graph._body): + if isinstance(node, MaxPool2dWithIndicesOp): + getitem_num = 0 + for user in node._children: + if isinstance(graph.node_table[user], GetItemOp): + getitem_num += 1 + getitem_node = graph.node_table[user] + if ( + getitem_num == 1 + and len(node._children) == 1 + and getitem_node.args[1] == 0 + ): + new_node = MaxPool2dOp() + new_node.name = getitem_node.name + for arg in node.args: + new_node.add_argument(arg) + for parent in node._parents: + new_node.add_parent(parent) + for child in getitem_node._children: + new_node.add_children(child) + new_node.tensor_meta["shape"] = getitem_node.tensor_meta[ + "shape" + ] + new_node.tensor_meta["dtype"] = getitem_node.tensor_meta[ + "dtype" + ] + new_node._layout = node._layout + del graph.node_table[node.name] + del graph.node_table[getitem_node.name] + graph.node_table[new_node.name] = new_node + del graph._body[i] + for j, op in enumerate(graph._body): + if op == getitem_node: + graph._body[j] = new_node + break diff --git a/frontend/Python/graph/type.py b/frontend/Python/graph/type.py new file mode 100644 index 0000000000..5e1db3ed8a --- /dev/null +++ b/frontend/Python/graph/type.py @@ -0,0 +1,79 @@ +# ===- type.py ----------------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the tensor type of the Buddy Compiler frontend. +# +# ===--------------------------------------------------------------------------- + +from enum import Enum + + +class TensorDType(Enum): + """ + Enum class for declaring tensor data types. + + Members: + - Int32: str + Represents the 32-bit integer data type. + - Int64: str + Represents the 64-bit integer data type. + - Float32: str + Represents the 32-bit floating-point data type. + - Bool: str + Represents the boolean data type. + """ + + Int32 = "int32" + Int64 = "int64" + Float16 = "float16" + Float32 = "float32" + Float64 = "float64" + Bool = "bool" + + +class TensorMeta: + """ + Store tensor metadata, including shape and data type, while overlooking raw + data. + + Attributes: + - shape: tuple + Represents the shape of the tensor. + - dtype: str + Represents the data type of the tensor. + + Methods: + - __init__(shape: tuple, dtype: str) -> None: + Initializes a new instance of the TensorMeta class with the specified + shape and data type. + + Example: + meta = TensorMeta(shape=(3, 4), dtype='float32') + # Access metadata attributes: meta.shape, meta.dtype + """ + + def __init__(self, shape, dtype) -> None: + """ + Initialize a new instance of the TensorMeta class. + + Parameters: + - shape: tuple + Represents the shape of the tensor. + - dtype: str + Represents the data type of the tensor. + """ + self.shape = shape + self.dtype = dtype diff --git a/frontend/Python/ops/linalg.py b/frontend/Python/ops/linalg.py index 6a6e161c93..0a22478e13 100644 --- a/frontend/Python/ops/linalg.py +++ b/frontend/Python/ops/linalg.py @@ -14,29 +14,70 @@ # # ===--------------------------------------------------------------------------- # -# The registry of mappings from Torch node to MLIR linalg dialect operations. +# The registry of mappings from Buddy Graph to MLIR linalg dialect operations. # # ===--------------------------------------------------------------------------- from typing import Dict, Tuple, List -import torch - import mlir.ir as ir from mlir.dialects import tosa, linalg, arith, tensor, math import copy import numpy import functools +from ..graph import * +from ..graph.graph import TensorDType +from .utils import * + + +def add_op(node: AddOp, symbol_table: Dict[Tuple[str, int], ir.Operation]): + """ + Import tensor add operation. + From buddy AddOp to MLIR arith `constant` operation. + + Note: this function init an output tensor according input range. + + Args: + node: Containing information from the input graph node. + symbol_table: A dictionary mapping symbols to their corresponding + operations. + + Returns: + op: The operation representing the result tensor of two input nodes' add + result. + """ + input1 = symbol_table.get((str(node.args[0]), 0)) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + shape = list(node.tensor_meta["shape"]) + if isinstance(node.args[1], str): + input2 = symbol_table.get((str(node.args[1]), 0)) + else: + data = [node.args[1]] + input2_shape = numpy.array(data).shape + tensor_type = ir.RankedTensorType.get(input2_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, node.args[1]) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + input2 = arith.ConstantOp(tensor_type, attr).result + if input1 is None or input2 is None: + return + add_result_tensor_type = ir.RankedTensorType.get(shape, mlir_dtype) + op = tosa.AddOp( + add_result_tensor_type, + input1, + input2, + ) + return op.result + def arange_op( - node: torch.fx.Node, + node: ArangeOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import tensor arange operation. - From PyTorch `aten.arange.default` and `aten.arange.start` operator to MLIR - arith `constant` operation. + From buddy ArangeOp to MLIR arith `constant` operation. Note: this function init an output tensor according input range. @@ -49,51 +90,34 @@ def arange_op( op: The operation representing the result tensor of ranging the start and end from input node. """ - if node.target.__name__ == "arange.start": + if len(node.args) == 2: start = int(node.args[0]) end = int(node.args[1]) - stride = int(node.meta["tensor_meta"].stride[0]) - dtype = str(node.meta["tensor_meta"].dtype) - shape = list(node.meta["tensor_meta"].shape) - dtype = ir.IntegerType.get_signless(64) - tensor_type = ir.RankedTensorType.get(shape, dtype) - attr = ir.DenseElementsAttr.get( - numpy.array([i for i in range(start, end, stride)]), - signless=True, - type=tensor_type, - ) - op = arith.ConstantOp(tensor_type, attr) - - elif node.target.__name__ == "arange.default": + else: start = 0 end = int(node.args[0]) - stride = int(node.meta["tensor_meta"].stride[0]) - dtype = str(node.meta["tensor_meta"].dtype) - shape = list(node.meta["tensor_meta"].shape) - dtype = ir.IntegerType.get_signless(64) - tensor_type = ir.RankedTensorType.get(shape, dtype) - attr = ir.DenseElementsAttr.get( - numpy.array([i for i in range(start, end, stride)]), - signless=True, - type=tensor_type, - ) - op = arith.ConstantOp(tensor_type, attr) + stride = 1 + dtype = node.tensor_meta["dtype"] + shape = list(node.tensor_meta["shape"]) + dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(shape, dtype) + attr = ir.DenseElementsAttr.get( + numpy.array([i for i in range(start, end, stride)]), + signless=True, + type=tensor_type, + ) + op = arith.ConstantOp(tensor_type, attr) return op def unsqueeze_op( - node: torch.fx.Node, + node: UnsqueezeOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the unsqueeze operation. - From PyTorch `aten.unsqueeze.default` operator to MLIR TOSA `reshape` - operation. - - Note: "unsqueeze" means inserting a new dimension of size 1 at the specified - position. For more information, please refer to - https://pytorch.org/docs/stable/generated/torch.unsqueeze.html + From buddy UnsqueezeOp to MLIR TOSA `reshape` operation. Args: node: Containing information from the input graph node. @@ -118,12 +142,12 @@ def unsqueeze_op( def view_op( - node: torch.fx.Node, + node: ViewOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor view operation. - From PyTorch `aten.view.default` operator to MLIR TOSA `reshape` operation. + From buddy ViewOp to MLIR TOSA `reshape` operation. Note: If the new shape contains one and only one `-1`, the size of the new shape will be inferred automatically. @@ -160,13 +184,12 @@ def view_op( def embedding_op( - node: torch.fx.Node, + node: EmbeddingOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the embedding operation. - From PyTorch `aten.embedding.default` operator to MLIR linalg `generic` - operation. + From buddy EmbeddingOp to MLIR linalg `generic` operation. Note: In this op, input node1's value is as index to get input node2's row slice. @@ -180,52 +203,51 @@ def embedding_op( """ input1 = symbol_table.get((str(node.args[0]), 0)) input2 = symbol_table.get((str(node.args[1]), 0)) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation([0, 1, 2]) - op = linalg.GenericOp( - [tensor_type], - [input2], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(generic_map.get_submap([0, 1])), - ir.AffineMapAttr.get(generic_map.get_submap([0, 1, 2])), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] * 3 - ), - ) - block = ir.Block.create_at_start( - op.region, + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, dtype) + output = tensor.EmptyOp(output_shape, dtype) + generic_map = ir.AffineMap.get_permutation([0, 1, 2]) + op = linalg.GenericOp( + [tensor_type], + [input2], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - index1 = arith.IndexCastOp(ir.IndexType.get(), block.arguments[0]) - index2 = linalg.IndexOp(ir._i64Attr(2, None)) - value = tensor.ExtractOp(input1, [index1.result, index2.result]) - block.append(index1) - block.append(index2) - block.append(value) - block.append(linalg.YieldOp([value.result])) + ir.AffineMapAttr.get(generic_map.get_submap([0, 1])), + ir.AffineMapAttr.get(generic_map.get_submap([0, 1, 2])), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] * 3 + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input2.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + index1 = arith.IndexCastOp(ir.IndexType.get(), block.arguments[0]) + index2 = linalg.IndexOp(ir._i64Attr(2, None)) + value = tensor.ExtractOp(input1, [index1.result, index2.result]) + block.append(index1) + block.append(index2) + block.append(value) + block.append(linalg.YieldOp([value.result])) return op def ones_op( - node: torch.fx.Node, + node: OnesOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor ones operation. - From PyTorch `aten.ones.default` operator to MLIR arith `constant` - operation. + From buddy OnesOp to MLIR arith `constant` operation. Note: This op, input node1's value is as index to get input node2's row slice. @@ -238,30 +260,21 @@ def ones_op( op: The operation return the arith.constant op. """ output_shape = list(node.args[0]) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.bool": - element = ir.BoolAttr.get(1) - tensor_type = ir.RankedTensorType.get(output_shape, element.type) - attr = ir.DenseElementsAttr.get_splat(tensor_type, element) - elif dtype == "torch.int64": - dtype = ir.IntegerType.get_signless(64) - tensor_type = ir.RankedTensorType.get(output_shape, dtype) - attr = ir.DenseElementsAttr.get( - numpy.ones(output_shape), signless=True, type=tensor_type - ) + dtype = node.tensor_meta["dtype"] + element = mlir_element_attr_get(dtype, 1) + tensor_type = ir.RankedTensorType.get(output_shape, element.type) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) op = arith.ConstantOp(tensor_type, attr) return op - def full_op( - node: torch.fx.Node, + node: FullOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor full operation. - From PyTorch `aten.full.default` operator to MLIR arith `constant` - operation. + From buddy FullOp to MLIR arith `constant` operation. Note: This op, input node1's value is the shape of output tensor, input node2's value is the value of all elements in output tensor. @@ -275,39 +288,22 @@ def full_op( """ output_shape = list(node.args[0]) value = node.args[1] - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.bool": - element = ir.BoolAttr.get(bool(value)) - tensor_type = ir.RankedTensorType.get(output_shape, element.type) - attr = ir.DenseElementsAttr.get_splat(tensor_type, element) - elif dtype == "torch.int64": - dtype = ir.IntegerType.get_signless(64) - tensor_type = ir.RankedTensorType.get(output_shape, dtype) - attr = ir.DenseElementsAttr.get( - numpy.full(output_shape, value, dtype=numpy.int64), - signless=True, - type=tensor_type, - ) - elif dtype == "torch.float32": - dtype = ir.F32Type.get() - tensor_type = ir.RankedTensorType.get(output_shape, dtype) - attr = ir.DenseElementsAttr.get( - numpy.full(output_shape, value, dtype=numpy.float32), - signless=True, - type=tensor_type, - ) + dtype = node.tensor_meta["dtype"] + element = mlir_element_attr_get(dtype, value) + tensor_type = ir.RankedTensorType.get(output_shape, element.type) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) op = arith.ConstantOp(tensor_type, attr) return op def lt_op( - node: torch.fx.Node, + node: LessThanOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor less than operation. - From PyTorch `aten.lt.Tensor` operator to MLIR arith `constant` operation. + From buddy LessThanOp to MLIR arith `constant` operation. Note: This op, campare two input nodes, and output bool tensor to represent compare result. @@ -321,93 +317,86 @@ def lt_op( """ input1 = symbol_table.get((str(node.args[0]), 0)) input2 = symbol_table.get((str(node.args[1]), 0)) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] value = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), 2) shp1 = list(ir.RankedTensorType(ir.Value(input1).type).shape) shp2 = list(ir.RankedTensorType(ir.Value(input2).type).shape) - if dtype == "torch.bool": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.IntegerType.get_signless(1) - ) - output = tensor.EmptyOp(output_shape, ir.IntegerType.get_signless(1)) - if len(shp1) < len(shp2): - if int(shp1[-1]) > 1 and shp2[-1] == 1: - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(shp2) + 1)] - ) - op = linalg.GenericOp( - [tensor_type], - [input1, input2], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [ - i - for i in range( - len(shp2) - len(shp1), len(shp2) - ) - ] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(0, len(shp2) - 1)] - + [len(shp2)] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(0, len(shp2))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(shp2) - + [ - ir.Attribute.parse( - "#linalg.iterator_type" + dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, dtype) + output = tensor.EmptyOp(output_shape, dtype) + if len(shp1) < len(shp2): + if int(shp1[-1]) > 1 and shp2[-1] == 1: + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(shp2) + 1)] + ) + op = linalg.GenericOp( + [tensor_type], + [input1, input2], + [output], + ir.ArrayAttr.get( + [ + ir.AffineMapAttr.get( + generic_map.get_submap( + [ + i + for i in range( + len(shp2) - len(shp1), len(shp2) + ) + ] ) - ] - ), + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(0, len(shp2) - 1)] + + [len(shp2)] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(0, len(shp2))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(shp2) + + [ir.Attribute.parse("#linalg.iterator_type")] + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input2.type).element_type, + ir.RankedTensorType(input2.type).element_type, + dtype, + ], + ) + if ( + str(ir.RankedTensorType(input2.type).element_type).find("i") + != -1 + ): + cmpop = arith.CmpIOp( + value, block.arguments[0], block.arguments[1] ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType(input2.type).element_type, - ir.IntegerType.get_signless(1), - ], + else: + cmpop = arith.CmpFOp( + value, block.arguments[0], block.arguments[1] ) - if ( - str(ir.RankedTensorType(input2.type).element_type).find("i") - != -1 - ): - cmpop = arith.CmpIOp( - value, block.arguments[0], block.arguments[1] - ) - else: - cmpop = arith.CmpFOp( - value, block.arguments[0], block.arguments[1] - ) - block.append(cmpop) - block.append(linalg.YieldOp([cmpop.result])) + block.append(cmpop) + block.append(linalg.YieldOp([cmpop.result])) return op def masked_fill_op( - node: torch.fx.Node, + node: MaskedFillOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor masked fill operation. - From PyTorch `aten.masked_fill.Scalar` operator to MLIR linalg `generic` - operation. + From buddy MaskedFillOp to MLIR linalg `generic` operation. Note: This op, input node2 is a bool tensor. Select input node1's value or input node3's value by true or false in input node2's value. @@ -423,71 +412,67 @@ def masked_fill_op( input2 = symbol_table.get((str(node.args[1]), 0)) if input1 is None or input2 is None: return - if str(node.args[0].meta["tensor_meta"].dtype) == "torch.float32": - value = float(node.args[2]) - attr = ir.FloatAttr.get(ir.F32Type.get(), value) - value = arith.ConstantOp(ir.F32Type.get(), attr) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1, input2], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, + dtype = node.tensor_meta["dtype"] + value = node.args[2] + attr = mlir_element_attr_get(dtype, value) + dtype = mlir_element_type_get(dtype) + value = arith.ConstantOp(dtype, attr) + output_shape = list(node.tensor_meta["shape"]) + tensor_type = ir.RankedTensorType.get(output_shape, dtype) + output = tensor.EmptyOp(output_shape, dtype) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + op = linalg.GenericOp( + [tensor_type], + [input1, input2], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - select_op = arith.SelectOp( - block.arguments[1], value, block.arguments[0] - ) - block.append(select_op) - block.append(linalg.YieldOp([select_op.result])) + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(input2.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + select_op = arith.SelectOp(block.arguments[1], value, block.arguments[0]) + block.append(select_op) + block.append(linalg.YieldOp([select_op.result])) return op def slice_op( - node: torch.fx.Node, + node: SliceOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor slice operation. - From PyTorch `aten.slice.Tensor` operator to MLIR tensor `extract_slice` - operation. + From buddy SliceOp to MLIR tensor `extract_slice` operation. Note: This op, get the slice of input node1. Args: @@ -514,18 +499,14 @@ def slice_op( offset = [0 for x in input_shape] offset[dim] = start offset_attr = ir._denseI64ArrayAttr(offset, None) - output_shape = list(node.meta["tensor_meta"].shape) + output_shape = list(node.tensor_meta["shape"]) size_attr = ir._denseI64ArrayAttr(output_shape, None) stride = [1 for x in output_shape] stride[dim] = step stride_attr = ir._denseI64ArrayAttr(stride, None) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - if dtype == "torch.bool": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.IntegerType.get_signless(1) - ) + dtype = node.tensor_meta["dtype"] + dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, dtype) op = tensor.ExtractSliceOp( tensor_type, input1, [], [], [], offset_attr, size_attr, stride_attr @@ -535,13 +516,12 @@ def slice_op( def expand_op( - node: torch.fx.Node, + node: ExpandOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor expand operation. - From PyTorch `aten.expand.default` operator to MLIR tensor `extract_slice` - operation. + From buddy ExpandOp to MLIR tensor `extract_slice` operation. Note: This op, based on expand shape, create a new tensor and extract slice from origin tensor. @@ -559,26 +539,15 @@ def expand_op( if input1 is None: return input_shape = ir.RankedTensorType(input1.type).shape - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.bool": - empty_tensor = tensor.EmptyOp( - output_shape, ir.IntegerType.get_signless(1) - ) - elif dtype == "torch.float32": - empty_tensor = tensor.EmptyOp(output_shape, ir.F32Type.get()) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + dtype = mlir_element_type_get(dtype) + empty_tensor = tensor.EmptyOp(output_shape, dtype) if list(input_shape) == list(node.args[1]): offset_attr = ir._denseI64ArrayAttr([0 for x in input_shape], None) size_attr = ir._denseI64ArrayAttr(output_shape, None) stride_attr = ir._denseI64ArrayAttr([1 for x in input_shape], None) - if dtype == "torch.bool": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.IntegerType.get_signless(1) - ) - elif dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) + tensor_type = ir.RankedTensorType.get(output_shape, dtype) extract_tensor = tensor.ExtractSliceOp( tensor_type, input1, [], [], [], offset_attr, size_attr, stride_attr ) @@ -602,16 +571,10 @@ def expand_op( [1] * (i + 1) + [x for x in output_shape[i + 1 :]], None ) stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) - if dtype == "torch.bool": - tensor_type = ir.RankedTensorType.get( - [1] * (i + 1) + [x for x in output_shape[i + 1 :]], - ir.IntegerType.get_signless(1), - ) - elif dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get( - [1] * (i + 1) + [x for x in output_shape[i + 1 :]], - ir.F32Type.get(), - ) + tensor_type = ir.RankedTensorType.get( + [1] * (i + 1) + [x for x in output_shape[i + 1 :]], + dtype, + ) extract_tensor = tensor.ExtractSliceOp( tensor_type, input1, @@ -639,12 +602,12 @@ def expand_op( def to_copy_op( - node: torch.fx.Node, + node: ToCopyOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor copy operation. - From PyTorch `aten._to_copy.default` operator to MLIR linalg `generic` + From buddy ToCopyOp to MLIR linalg `generic` operation. Note: This op, will convert input node's value type, such as float32 to @@ -660,10 +623,10 @@ def to_copy_op( input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] - if dtype == "torch.bool": + if dtype == TensorDType.Bool: if str(ir.RankedTensorType(input1.type).element_type) == "f32": tensor_type = ir.RankedTensorType.get( output_shape, ir.IntegerType.get_signless(1) @@ -713,7 +676,7 @@ def to_copy_op( block.append(fptosi_op) block.append(trunc_op) block.append(linalg.YieldOp([trunc_op.result])) - elif dtype == "torch.float32": + elif dtype == TensorDType.Float32: if str(ir.RankedTensorType(input1.type).element_type) == "i1": tensor_type = ir.RankedTensorType.get( output_shape, ir.F32Type.get() @@ -764,12 +727,12 @@ def to_copy_op( def rsub_op( - node: torch.fx.Node, + node: RsubOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor rsub operation. - From PyTorch `aten.rsub.Scalar` operator to MLIR linalg `generic` operation. + From buddy RsubOp to MLIR linalg `generic` operation. Note: This op, compute input node1 rsub input node2 Args: @@ -782,20 +745,94 @@ def rsub_op( """ input1 = symbol_table.get((str(node.args[0]), 0)) value = node.args[1] - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if not isinstance(value, torch.fx.Node): - if dtype == "torch.float32": - value = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), value) - ) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + if not isinstance(value, str): + value = arith.ConstantOp( + mlir_dtype, mlir_element_attr_get(dtype, value) + ) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + op = linalg.GenericOp( + [tensor_type], + [input1], + [output], + ir.ArrayAttr.get( + [ + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + if str(ir.RankedTensorType(input1.type).element_type).find("i") != -1: + sub_op = arith.SubIOp(value.result, block.arguments[0]) + else: + sub_op = arith.SubFOp(value.result, block.arguments[0]) + block.append(sub_op) + block.append(linalg.YieldOp([sub_op.result])) + + return op + + +def pow_op( + node: PowOp, + symbol_table: Dict[Tuple[str, int], ir.Operation], +): + """ + Import the tensor copy operation. + From buddy PowOp to MLIR linalg `generic` + operation. + + Note: This op, compute input node's power result. + Args: + node: Containing information from the input graph node. + symbol_table: A dictionary mapping symbols to their corresponding + operations. + + Returns: + op: The operation return the linalg.generic op. + """ + input1 = symbol_table.get((str(node.args[0]), 0)) + if input1 is None: + return + value = node.args[1] + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + dtype = mlir_element_type_get(dtype) + if not isinstance(value, str): + if abs(int(value) - float(value)) < 1e-6: generic_map = ir.AffineMap.get_permutation( [i for i in range(len(output_shape))] ) - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() + tensor_type = ir.RankedTensorType.get(output_shape, dtype) + output = tensor.EmptyOp(output_shape, dtype) + value = arith.ConstantOp( + ir.IntegerType.get_signless(32), + ir.IntegerAttr.get(ir.IntegerType.get_signless(32), value), ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) op = linalg.GenericOp( [tensor_type], [input1], @@ -826,23 +863,28 @@ def rsub_op( ir.RankedTensorType(output.result.type).element_type, ], ) - subf_op = arith.SubFOp(value.result, block.arguments[0]) - block.append(subf_op) - block.append(linalg.YieldOp([subf_op.result])) + if ( + str(ir.RankedTensorType(input1.type).element_type).find("i") + != -1 + ): + powi_op = math.IPowIOp(block.arguments[0], value.result) + else: + powi_op = math.FPowIOp(block.arguments[0], value.result) + block.append(powi_op) + block.append(linalg.YieldOp([powi_op.result])) return op -def pow_op( - node: torch.fx.Node, +def mean_op( + node: MeanOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor copy operation. - From PyTorch `aten.pow.Tensor_Scalar` operator to MLIR linalg `generic` - operation. + From buddy MeanOp to MLIR linalg `generic` operation. - Note: This op, compute input node's power result. + Note: This op, compute input node's mean result in a specified dim. Args: node: Containing information from the input graph node. symbol_table: A dictionary mapping symbols to their corresponding @@ -854,160 +896,91 @@ def pow_op( input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return - value = node.args[1] - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if not isinstance(value, torch.fx.Node): - if dtype == "torch.float32": + dims = list(node.args[1]) + keep_dim = bool(node.args[2]) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, 0.0) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + output = arith.ConstantOp(tensor_type, attr) + assert len(dims) == 1 + for dim in dims: + if dim < 0: + dim = len(list(ir.RankedTensorType(input1.type).shape)) + dim + if keep_dim: generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] + [i for i in range(len(output_shape) + 1)] ) - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output_map = [i for i in range(len(output_shape))] + output_map[dim] = len(output_shape) + loop_type = [ + ir.Attribute.parse("#linalg.iterator_type") + ] * (len(output_shape) + 1) + loop_type[dim] = ir.Attribute.parse( + "#linalg.iterator_type" ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - if abs(int(value) - float(value)) < 1e-6: - value = arith.ConstantOp( - ir.IntegerType.get_signless(32), - ir.IntegerAttr.get(ir.IntegerType.get_signless(32), value), - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - fpowi_op = math.FPowIOp(block.arguments[0], value.result) - block.append(fpowi_op) - block.append(linalg.YieldOp([fpowi_op.result])) - - return op - - -def mean_op( - node: torch.fx.Node, - symbol_table: Dict[Tuple[str, int], ir.Operation], -): - """ - Import the tensor copy operation. - From PyTorch `aten.mean.dim` operator to MLIR linalg `generic` operation. - - Note: This op, compute input node's mean result in a specified dim. - Args: - node: Containing information from the input graph node. - symbol_table: A dictionary mapping symbols to their corresponding - operations. - - Returns: - op: The operation return the linalg.generic op. - """ - input1 = symbol_table.get((str(node.args[0]), 0)) - if input1 is None: - return - dims = list(node.args[1]) - keep_dim = bool(node.args[2]) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - element = ir.FloatAttr.get(ir.F32Type.get(), 0.0) - attr = ir.DenseElementsAttr.get_splat(tensor_type, element) - output = arith.ConstantOp(tensor_type, attr) - - assert len(dims) == 1 - - for dim in dims: - if dim == -1: - dim = len(list(ir.RankedTensorType(input1.type).shape)) - 1 - if keep_dim: - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape) + 1)] - ) - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output_map = [i for i in range(len(output_shape))] - output_map[dim] = len(output_shape) - loop_type = [ - ir.Attribute.parse("#linalg.iterator_type") - ] * (len(output_shape) + 1) - loop_type[dim] = ir.Attribute.parse( - "#linalg.iterator_type" - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap(output_map) - ), - ] - ), - ir.ArrayAttr.get(loop_type), - ) - block = ir.Block.create_at_start( - op.region, + op = linalg.GenericOp( + [tensor_type], + [input1], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap(output_map) + ), + ] + ), + ir.ArrayAttr.get(loop_type), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + value = arith.ConstantOp( + mlir_dtype, + mlir_element_attr_get( + dtype, list(ir.RankedTensorType(input1.type).shape)[dim] + ), + ) + if ( + str(ir.RankedTensorType(input1.type).element_type).find("i") + != -1 + ): + block_div_op = arith.DivSIOp(block.arguments[0], value.result) + block_add_op = arith.AddIOp( + block_div_op.result, block.arguments[1] ) - value = arith.ConstantOp( - ir.F32Type.get(), - ir.FloatAttr.get( - ir.F32Type.get(), - list(ir.RankedTensorType(input1.type).shape)[dim], - ), + else: + block_div_op = arith.DivFOp(block.arguments[0], value.result) + block_add_op = arith.AddFOp( + block_div_op.result, block.arguments[1] ) - divf_op = arith.DivFOp(block.arguments[0], value.result) - addf_op = arith.AddFOp(divf_op.result, block.arguments[1]) - block.append(value) - block.append(divf_op) - block.append(addf_op) - block.append(linalg.YieldOp([addf_op.result])) + block.append(value) + block.append(block_div_op) + block.append(block_add_op) + block.append(linalg.YieldOp([block_add_op.result])) return op def rsqrt_op( - node: torch.fx.Node, + node: RsqrtOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor rsqrt operation. - From PyTorch `aten.rsqrt.default` operator to MLIR linalg `generic` - operation. + From buddy RsqrtOp to MLIR linalg `generic` operation. Note: This op, compute input node's rsqrt result. Args: @@ -1023,59 +996,58 @@ def rsqrt_op( if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + op = linalg.GenericOp( + [tensor_type], + [input1], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - math_rsqrt_op = math.RsqrtOp(block.arguments[0]) - block.append(math_rsqrt_op) - block.append(linalg.YieldOp([math_rsqrt_op.result])) + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + math_rsqrt_op = math.RsqrtOp(block.arguments[0]) + block.append(math_rsqrt_op) + block.append(linalg.YieldOp([math_rsqrt_op.result])) return op def mul_op( - node: torch.fx.Node, + node: MulOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor mul operation. - From PyTorch `aten.mul.Tensor` operator to MLIR linalg `generic` operation. + From buddy MulOp to MLIR linalg `generic` operation. Note: This op, compute input node's mul result. Args: @@ -1087,257 +1059,38 @@ def mul_op( op: The operation return the linalg.generic op. """ assert len(node.args) == 2 - if isinstance(node.args[0], torch.fx.Node): - input1 = symbol_table.get((str(node.args[0]), 0)) - else: - input1 = node.args[0] - - if isinstance(node.args[1], torch.fx.Node): + input1 = symbol_table.get((str(node.args[0]), 0)) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + shape = list(node.tensor_meta["shape"]) + if isinstance(node.args[1], str): input2 = symbol_table.get((str(node.args[1]), 0)) else: - input2 = node.args[1] - + data = [node.args[1]] + input2_shape = numpy.array(data).shape + tensor_type = ir.RankedTensorType.get(input2_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, node.args[1]) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + input2 = arith.ConstantOp(tensor_type, attr).result if input1 is None or input2 is None: return - - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - - if isinstance(node.args[0], torch.fx.Node): - if dtype == "torch.float32": - if not isinstance(node.args[1], torch.fx.Node): - input2 = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), input2) - ) - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - mulf_op = arith.MulFOp(block.arguments[0], input2.result) - block.append(mulf_op) - block.append(linalg.YieldOp([mulf_op.result])) - else: - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - input1_shape = list(ir.RankedTensorType(input1.type).shape) - if input1_shape != output_shape: - dims = [] - for i in range(len(input1_shape) - 1, -1, -1): - if ( - input1_shape[i] - != output_shape[ - len(output_shape) - (len(input1_shape) - i) - ] - ): - dims.append(i) - output1 = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape) + len(dims))] - ) - input1_map = [ - i - for i in range( - len(output_shape) - len(input1_shape), - len(output_shape), - ) - ] - for index, i in enumerate(dims): - input1_map[i] = len(output_shape) + index - input1_map = generic_map.get_submap(input1_map) - input1_op = linalg.GenericOp( - [tensor_type], - [input1], - [output1], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input1_map), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(output_shape) - + [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(dims) - ), - ) - block = ir.Block.create_at_start( - input1_op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType( - output.result.type - ).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) - input1 = input1_op.result - - input2_shape = list(ir.RankedTensorType(input2.type).shape) - if input2_shape != output_shape: - dims = [] - for i in range(len(input2_shape) - 1, -1, -1): - if ( - input2_shape[i] - != output_shape[ - len(output_shape) - (len(input2_shape) - i) - ] - ): - dims.append(i) - output2 = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape) + len(dims))] - ) - input2_map = [ - i - for i in range( - len(output_shape) - len(input2_shape), - len(output_shape), - ) - ] - for index, i in enumerate(dims): - input2_map[i] = len(output_shape) + index - input2_map = generic_map.get_submap(input2_map) - input2_op = linalg.GenericOp( - [tensor_type], - [input2], - [output2], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input2_map), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(output_shape) - + [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(dims) - ), - ) - block = ir.Block.create_at_start( - input2_op.region, - [ - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType( - output.result.type - ).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) - input2 = input2_op.result - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1, input2], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - mulf_op = arith.MulFOp(block.arguments[0], block.arguments[1]) - block.append(mulf_op) - block.append(linalg.YieldOp([mulf_op.result])) - - return op + mul_result_tensor_type = ir.RankedTensorType.get(shape, mlir_dtype) + op = tosa.MulOp( + mul_result_tensor_type, + input1, + input2, + ir.IntegerAttr.get(ir.IntegerType.get_signless(8), 0), + ) + return op.result def t_op( - node: torch.fx.Node, + node: TOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor tanspose operation. - From PyTorch `aten.t.default` operator to MLIR linalg `generic` operation. + From buddy TransposeOp to MLIR linalg `generic` operation. Note: This op, compute input node's transpose result. Args: @@ -1353,50 +1106,23 @@ def t_op( if input1 is None: return - input_shape = list(ir.RankedTensorType(input1.type).shape) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if len(input_shape) == 2: - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation([0, 1]) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(generic_map.get_submap([0, 1])), - ir.AffineMapAttr.get(generic_map.get_submap([1, 0])), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + perm = ir._denseI64ArrayAttr([1, 0], None) + output = tensor.EmptyOp(output_shape, mlir_dtype) + op = linalg.transpose(input=input1, outs=[output], permutation=perm) - return op + return op.result[0] def matmul_op( - node: torch.fx.Node, + node: MatmulOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor matmul operation. - From PyTorch `aten.mm.default` operator to MLIR linalg `matmul` operation. + From Buddy MatmulOp to MLIR linalg `matmul` operation. Note: This op, compute input node's matrix multiplication result. Args: @@ -1413,25 +1139,24 @@ def matmul_op( if input1 is None or input2 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - f32 = ir.F32Type.get() - element = ir.FloatAttr.get(f32, 0.0) - attr = ir.DenseElementsAttr.get_splat(tensor_type, element) - matmul_result_buffer = arith.ConstantOp(tensor_type, attr).result - op = linalg.matmul(input1, input2, outs=[matmul_result_buffer]) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, 0.0) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + matmul_result_buffer = arith.ConstantOp(tensor_type, attr).result + op = linalg.matmul(input1, input2, outs=[matmul_result_buffer]) return op def transpose_op( - node: torch.fx.Node, + node: TransposeOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor transpose operation. - From PyTorch `aten.transpose.int` operator to MLIR linalg `generic` + From buddy TransposeSpecificDimOp to MLIR linalg `generic` operation. Note: This op, compute input node's transpose result. @@ -1449,51 +1174,25 @@ def transpose_op( return dim1 = int(node.args[1]) dim2 = int(node.args[2]) - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - input1_map = [i for i in range(len(output_shape))] - input1_map[dim1], input1_map[dim2] = input1_map[dim2], input1_map[dim1] - output_map = [i for i in range(len(output_shape))] - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(generic_map.get_submap(input1_map)), - ir.AffineMapAttr.get(generic_map.get_submap(output_map)), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + output_perm = [i for i in range(len(output_shape))] + output_perm[dim2], output_perm[dim1] = output_perm[dim1], output_perm[dim2] + perm = ir._denseI64ArrayAttr(output_perm, None) + output = tensor.EmptyOp(output_shape, mlir_dtype) + op = linalg.transpose(input=input1, outs=[output], permutation=perm) - return op + return op.result[0] def index_op( - node: torch.fx.Node, + node: IndexOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor index operation. - From PyTorch `aten.index.Tensor` operator to MLIR linalg `generic` + From buddy IndexOp to MLIR linalg `generic` operation. Note: This op, get input node slice result by input index. @@ -1511,70 +1210,66 @@ def index_op( return input1_shape = ir.RankedTensorType(input1.type).shape input2 = node.args[1] - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) if len(input2) < len(input1_shape): - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - loops = ir.RankedTensorType( - symbol_table.get((str(input2[0]), 0)).type - ).shape - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + loops = ir.RankedTensorType( + symbol_table.get((str(input2[0]), 0)).type + ).shape + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + input_map = [ + ir.AffineMapAttr.get( + generic_map.get_submap([j for j in range(len(loops))]) ) - input_map = [ - ir.AffineMapAttr.get( - generic_map.get_submap([j for j in range(len(loops))]) - ) - for i in range(len(input2)) - ] + [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [j for j in range(len(output_shape))] - ) - ) - ] - operands = [symbol_table.get((str(i), 0)) for i in input2] - op = linalg.GenericOp( - [tensor_type], - operands, - [output], - ir.ArrayAttr.get(input_map), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), + for i in range(len(input2)) + ] + [ + ir.AffineMapAttr.get( + generic_map.get_submap([j for j in range(len(output_shape))]) ) - arguments = [ - ir.RankedTensorType(i.type).element_type for i in operands - ] + [ir.RankedTensorType(output.result.type).element_type] - block = ir.Block.create_at_start(op.region, arguments) - index = [] - for i in block.arguments[:-1]: - indexcast_op = arith.IndexCastOp(ir.IndexType.get(), i) - block.append(indexcast_op) - index.append(indexcast_op.result) - for i in range(len(loops), len(output_shape) - len(input2) + 1): - index_op = linalg.IndexOp(ir._i64Attr(i, None)) - block.append(index_op) - index.append(index_op.result) - value = tensor.ExtractOp(input1, index) - block.append(value) - block.append(linalg.YieldOp([value.result])) + ] + operands = [symbol_table.get((str(i), 0)) for i in input2] + op = linalg.GenericOp( + [tensor_type], + operands, + [output], + ir.ArrayAttr.get(input_map), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + arguments = [ + ir.RankedTensorType(i.type).element_type for i in operands + ] + [ir.RankedTensorType(output.result.type).element_type] + block = ir.Block.create_at_start(op.region, arguments) + index = [] + for i in block.arguments[:-1]: + indexcast_op = arith.IndexCastOp(ir.IndexType.get(), i) + block.append(indexcast_op) + index.append(indexcast_op.result) + for i in range(len(loops), len(output_shape) - len(input2) + 1): + index_op = linalg.IndexOp(ir._i64Attr(i, None)) + block.append(index_op) + index.append(index_op.result) + value = tensor.ExtractOp(input1, index) + block.append(value) + block.append(linalg.YieldOp([value.result])) return op def neg_op( - node: torch.fx.Node, + node: NegOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor neg operation. - From PyTorch `aten.neg.default` operator to MLIR linalg `matmul` operation. + From buddy NegOp to MLIR linalg `negf` operation. Note: This op, compute input node's neg result. Args: @@ -1589,59 +1284,22 @@ def neg_op( input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return - - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - negf_op = arith.NegFOp(block.arguments[0]) - block.append(negf_op) - block.append(linalg.YieldOp([negf_op.result])) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + op = linalg.negf(input1, outs=output) return op def cat_op( - node: torch.fx.Node, + node: CatOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor concate operation. - From PyTorch `aten.cat.default` operator to MLIR tensor `insert_slice` + From buddy CatOp to MLIR tensor `insert_slice` operation. Note: This op, concate two input tensor. @@ -1660,52 +1318,52 @@ def cat_op( if input1 is None or input2 is None: return - output_shape = list(node.meta["tensor_meta"].shape) + output_shape = list(node.tensor_meta["shape"]) if dim < 0: dim = len(output_shape) + dim - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - offset = [0 for x in output_shape] - offset_attr = ir._denseI64ArrayAttr(offset, None) - input1_shape = ir.RankedTensorType(input1.type).shape - size_attr = ir._denseI64ArrayAttr(input1_shape, None) - stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) - insert_input1 = tensor.InsertSliceOp( - input1, - output.result, - [], - [], - [], - offset_attr, - size_attr, - stride_attr, - ) - offset[dim] += input1_shape[dim] - offset_attr = ir._denseI64ArrayAttr(offset, None) - input2_shape = ir.RankedTensorType(input2.type).shape - size_attr = ir._denseI64ArrayAttr(input2_shape, None) - insert_input2 = tensor.InsertSliceOp( - input2, - insert_input1.result, - [], - [], - [], - offset_attr, - size_attr, - stride_attr, - ) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + offset = [0 for x in output_shape] + offset_attr = ir._denseI64ArrayAttr(offset, None) + input1_shape = ir.RankedTensorType(input1.type).shape + size_attr = ir._denseI64ArrayAttr(input1_shape, None) + stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) + insert_input1 = tensor.InsertSliceOp( + input1, + output.result, + [], + [], + [], + offset_attr, + size_attr, + stride_attr, + ) + offset[dim] += input1_shape[dim] + offset_attr = ir._denseI64ArrayAttr(offset, None) + input2_shape = ir.RankedTensorType(input2.type).shape + size_attr = ir._denseI64ArrayAttr(input2_shape, None) + insert_input2 = tensor.InsertSliceOp( + input2, + insert_input1.result, + [], + [], + [], + offset_attr, + size_attr, + stride_attr, + ) return insert_input2 def squeeze_op( - node: torch.fx.Node, + node: SqueezeOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor squeeze operation. - From PyTorch `aten.squeeze.dim` operator to MLIR linalg `generic` operation. + From buddy SqueezeOp to MLIR linalg `generic` operation. Note: This op, reduce the input tensor's shape dims by specified dim. Args: @@ -1722,78 +1380,78 @@ def squeeze_op( if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) + output_shape = list(node.tensor_meta["shape"]) input1_shape = ir.RankedTensorType(input1.type).shape if dim < 0: dim = len(input1_shape) + dim - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - if input1_shape[dim] != 1: - offset = [0 for x in output_shape] - offset_attr = ir._denseI64ArrayAttr(offset, None) - size_attr = ir._denseI64ArrayAttr(input1_shape, None) - stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) - op = tensor.InsertSliceOp( - input1, - output.result, - [], - [], - [], - offset_attr, - size_attr, - stride_attr, - ) - else: - output_map = ir.AffineMap.get( - len(output_shape), - 0, - [ir.AffineExpr.get_dim(i) for i in range(len(output_shape))], - ) - input1_map = [] - loop_index = 0 - for i in range(len(input1_shape)): - if len(input1_map) == dim: - input1_map.append(ir.AffineExpr.get_constant(0)) - else: - input1_map.append(ir.AffineExpr.get_dim(loop_index)) - loop_index += 1 - input1_map = ir.AffineMap.get(len(output_shape), 0, input1_map) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input1_map), - ir.AffineMapAttr.get(output_map), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + if input1_shape[dim] != 1: + offset = [0 for x in output_shape] + offset_attr = ir._denseI64ArrayAttr(offset, None) + size_attr = ir._denseI64ArrayAttr(input1_shape, None) + stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) + op = tensor.InsertSliceOp( + input1, + output.result, + [], + [], + [], + offset_attr, + size_attr, + stride_attr, + ) + else: + output_map = ir.AffineMap.get( + len(output_shape), + 0, + [ir.AffineExpr.get_dim(i) for i in range(len(output_shape))], + ) + input1_map = [] + loop_index = 0 + for i in range(len(input1_shape)): + if len(input1_map) == dim: + input1_map.append(ir.AffineExpr.get_constant(0)) + else: + input1_map.append(ir.AffineExpr.get_dim(loop_index)) + loop_index += 1 + input1_map = ir.AffineMap.get(len(output_shape), 0, input1_map) + op = linalg.GenericOp( + [tensor_type], + [input1], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) + ir.AffineMapAttr.get(input1_map), + ir.AffineMapAttr.get(output_map), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + block.append(linalg.YieldOp([block.arguments[0]])) return op def batch_matmul_op( - node: torch.fx.Node, + node: BatchMatmulOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor batch matmul operation. - From PyTorch `aten.bmm.default` operator to MLIR linalg `batch_matmul` + From buddy BatchMatmulOp to MLIR linalg `batch_matmul` operation. Note: This op, compute input node's batch matrix multiplication result. @@ -1811,45 +1469,25 @@ def batch_matmul_op( if input1 is None or input2 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - # use linalg.generic implementation - generic_map = ir.AffineMap.get_permutation([0, 1, 2]) - zero_fill = linalg.GenericOp( - [tensor_type], - [], - [output], - ir.ArrayAttr.get( - [ir.AffineMapAttr.get(generic_map.get_submap([0, 1, 2]))] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] * 3 - ), - ) - block = ir.Block.create_at_start( - zero_fill.region, - [ir.RankedTensorType(output.result.type).element_type], - ) - zero_op = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), 0) - ) - block.append(zero_op) - block.append(linalg.YieldOp([zero_op.result])) - op = linalg.batch_matmul(input1, input2, outs=[zero_fill.result]) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, 0) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + zero_fill = arith.ConstantOp(tensor_type, attr).result + op = linalg.batch_matmul(input1, input2, outs=[zero_fill]) return op def div_op( - node: torch.fx.Node, + node: DivOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor divsion operation. - From PyTorch `aten.div.Tensor` operator to MLIR linalg `generic` operation. + From buddy DivOp to MLIR linalg `generic` operation. Note: This op, compute input node's division result. Args: @@ -1861,258 +1499,38 @@ def div_op( op: The operation return the linalg.generic op. """ assert len(node.args) == 2 - if isinstance(node.args[0], torch.fx.Node): - input1 = symbol_table.get((str(node.args[0]), 0)) - else: - input1 = node.args[0] - - if isinstance(node.args[1], torch.fx.Node): + input1 = symbol_table.get((str(node.args[0]), 0)) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + shape = list(node.tensor_meta["shape"]) + if isinstance(node.args[1], str): input2 = symbol_table.get((str(node.args[1]), 0)) else: - input2 = node.args[1] - + data = [node.args[1]] + input2_shape = numpy.array(data).shape + tensor_type = ir.RankedTensorType.get(input2_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, node.args[1]) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + input2 = arith.ConstantOp(tensor_type, attr).result if input1 is None or input2 is None: return - - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - - if isinstance(node.args[0], torch.fx.Node): - if dtype == "torch.float32": - if not isinstance(node.args[1], torch.fx.Node): - input2 = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), input2) - ) - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - divf_op = arith.DivFOp(block.arguments[0], input2.result) - block.append(divf_op) - block.append(linalg.YieldOp([divf_op.result])) - else: - tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - input1_shape = list(ir.RankedTensorType(input1.type).shape) - if input1_shape != output_shape: - dims = [] - for i in range(len(input1_shape) - 1, -1, -1): - if ( - input1_shape[i] - != output_shape[ - len(output_shape) - (len(input1_shape) - i) - ] - ): - dims.append(i) - output1 = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape) + len(dims))] - ) - input1_map = [ - i - for i in range( - len(output_shape) - len(input1_shape), - len(output_shape), - ) - ] - for index, i in enumerate(dims): - input1_map[i] = len(output_shape) + index - input1_map = generic_map.get_submap(input1_map) - input1_op = linalg.GenericOp( - [tensor_type], - [input1], - [output1], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input1_map), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(output_shape) - + [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(dims) - ), - ) - block = ir.Block.create_at_start( - input1_op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType( - output.result.type - ).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) - input1 = input1_op.result - - input2_shape = list(ir.RankedTensorType(input2.type).shape) - if input2_shape != output_shape: - dims = [] - for i in range(len(input2_shape) - 1, -1, -1): - if ( - input2_shape[i] - != output_shape[ - len(output_shape) - (len(input2_shape) - i) - ] - ): - dims.append(i) - output2 = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape) + len(dims))] - ) - input2_map = [ - i - for i in range( - len(output_shape) - len(input2_shape), - len(output_shape), - ) - ] - for index, i in enumerate(dims): - input2_map[i] = len(output_shape) + index - input2_map = generic_map.get_submap(input2_map) - input2_op = linalg.GenericOp( - [tensor_type], - [input2], - [output2], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input2_map), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(output_shape) - + [ - ir.Attribute.parse( - "#linalg.iterator_type" - ) - ] - * len(dims) - ), - ) - block = ir.Block.create_at_start( - input2_op.region, - [ - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType( - output.result.type - ).element_type, - ], - ) - block.append(linalg.YieldOp([block.arguments[0]])) - input2 = input2_op.result - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1, input2], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(input2.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - divf_op = arith.DivFOp(block.arguments[0], block.arguments[1]) - block.append(divf_op) - block.append(linalg.YieldOp([divf_op.result])) - - return op + div_result_tensor_type = ir.RankedTensorType.get(shape, mlir_dtype) + op = tosa.MulOp( + div_result_tensor_type, + input1, + tosa.ReciprocalOp(input2.type, input2).result, + ir.IntegerAttr.get(ir.IntegerType.get_signless(8), 0), + ) + return op.result def softmax_op( - node: torch.fx.Node, + node: SoftmaxOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor softmax operation. - From PyTorch `aten._softmax.default` operator to MLIR linalg `generic` - operation. + From buddy SoftmaxOp to MLIR linalg `generic` operation. Note: This op, compute input node's softmax result. Args: @@ -2129,266 +1547,109 @@ def softmax_op( dim = int(node.args[1]) if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] if dim < 0: dim += len(output_shape) - if dtype == "torch.float32": - max_tensor_shape = copy.deepcopy(output_shape) - max_tensor_shape[dim] = 1 - max_tensor_type = ir.RankedTensorType.get( - max_tensor_shape, ir.F32Type.get() - ) - max_tensor = tensor.EmptyOp(max_tensor_shape, ir.F32Type.get()) - max_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(max_tensor_shape)) - ] - max_tensor_map = ir.AffineMap.get( - len(max_tensor_shape), 0, max_tensor_map - ) - neg_inf_fill = linalg.GenericOp( - [max_tensor_type], - [], - [max_tensor], - ir.ArrayAttr.get([ir.AffineMapAttr.get(max_tensor_map)]), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(max_tensor_shape) - ), - ) - block = ir.Block.create_at_start( - neg_inf_fill.region, - [ir.RankedTensorType(max_tensor.result.type).element_type], - ) - neg_inf_op = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), float("-inf")) - ) - block.append(neg_inf_op) - block.append(linalg.YieldOp([neg_inf_op.result])) - - input1_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - input1_map = ir.AffineMap.get(len(output_shape), 0, input1_map) - max_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - max_tensor_map[dim] = ir.AffineExpr.get_constant(0) - max_tensor_map = ir.AffineMap.get(len(output_shape), 0, max_tensor_map) - loop_type = [ - ir.Attribute.parse("#linalg.iterator_type") - ] * len(output_shape) - loop_type[dim] = ir.Attribute.parse("#linalg.iterator_type") - max_tensor_op = linalg.GenericOp( - [max_tensor_type], - [input1], - [neg_inf_fill], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input1_map), - ir.AffineMapAttr.get(max_tensor_map), - ] - ), - ir.ArrayAttr.get(loop_type), - ) - block = ir.Block.create_at_start( - max_tensor_op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(neg_inf_fill.result.type).element_type, - ], - ) - max_op = arith.MaximumFOp(block.arguments[0], block.arguments[1]) - block.append(max_op) - block.append(linalg.YieldOp([max_op.result])) - - exp_tensor = tensor.EmptyOp(output_shape, ir.F32Type.get()) - exp_tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - input1_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - input1_map = ir.AffineMap.get(len(output_shape), 0, input1_map) - max_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - max_tensor_map[dim] = ir.AffineExpr.get_constant(0) - max_tensor_map = ir.AffineMap.get(len(output_shape), 0, max_tensor_map) - exp_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - exp_tensor_map = ir.AffineMap.get(len(output_shape), 0, exp_tensor_map) - exp_tensor_op = linalg.GenericOp( - [exp_tensor_type], - [input1, max_tensor_op.result], - [exp_tensor], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(input1_map), - ir.AffineMapAttr.get(max_tensor_map), - ir.AffineMapAttr.get(exp_tensor_map), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - exp_tensor_op.region, - [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(max_tensor_op.result.type).element_type, - ir.RankedTensorType(exp_tensor.result.type).element_type, - ], - ) - sub_op = arith.SubFOp(block.arguments[0], block.arguments[1]) - exp_op = math.ExpOp(sub_op.result) - block.append(sub_op) - block.append(exp_op) - block.append(linalg.YieldOp([exp_op.result])) - - reduce_sum_tensor_shape = copy.deepcopy(output_shape) - reduce_sum_tensor_shape[dim] = 1 - reduce_sum_tensor = tensor.EmptyOp( - reduce_sum_tensor_shape, ir.F32Type.get() - ) - reduce_sum_tensor_type = ir.RankedTensorType.get( - reduce_sum_tensor_shape, ir.F32Type.get() - ) - reduce_sum_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - reduce_sum_tensor_map = ir.AffineMap.get( - len(output_shape), 0, reduce_sum_tensor_map - ) - zero_fill_op = linalg.GenericOp( - [reduce_sum_tensor_type], - [], - [reduce_sum_tensor.result], - ir.ArrayAttr.get([ir.AffineMapAttr.get(reduce_sum_tensor_map)]), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - zero_fill_op.region, - [ir.RankedTensorType(reduce_sum_tensor.result.type).element_type], - ) - zero_op = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), 0) - ) - block.append(zero_op) - block.append(linalg.YieldOp([zero_op.result])) - - reduce_sum_tensor_shape = copy.deepcopy(output_shape) - reduce_sum_tensor_shape[dim] = 1 - reduce_sum_tensor_type = ir.RankedTensorType.get( - reduce_sum_tensor_shape, ir.F32Type.get() - ) - exp_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - exp_tensor_map = ir.AffineMap.get(len(output_shape), 0, exp_tensor_map) - reduce_sum_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - reduce_sum_tensor_map[dim] = ir.AffineExpr.get_constant(0) - reduce_sum_tensor_map = ir.AffineMap.get( - len(output_shape), 0, reduce_sum_tensor_map - ) - loop_type = [ - ir.Attribute.parse("#linalg.iterator_type") - ] * len(output_shape) - loop_type[dim] = ir.Attribute.parse("#linalg.iterator_type") - reduce_sum_tensor_op = linalg.GenericOp( - [reduce_sum_tensor_type], - [exp_tensor_op.result], - [zero_fill_op.result], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(exp_tensor_map), - ir.AffineMapAttr.get(reduce_sum_tensor_map), - ] - ), - ir.ArrayAttr.get(loop_type), - ) - block = ir.Block.create_at_start( - reduce_sum_tensor_op.region, + mlir_dtype = mlir_element_type_get(dtype) + # tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + # output = tensor.EmptyOp(output_shape, mlir_dtype) + # op = linalg.softmax( + # [tensor_type], + # input1, + # output, + # ir.IntegerAttr.get(ir.IntegerType.get_signless(64), dim), + # ) + # print(op, flush=True) + sum_tensor_shape = copy.deepcopy(output_shape) + sum_tensor_shape[dim] = 1 + sum_tensor_type = ir.RankedTensorType.get(sum_tensor_shape, mlir_dtype) + element = mlir_element_attr_get(dtype, 0) + attr = ir.DenseElementsAttr.get_splat(sum_tensor_type, element) + sum_tensor = arith.ConstantOp(sum_tensor_type, attr).result + input1_map = [ir.AffineExpr.get_dim(i) for i in range(len(output_shape))] + input1_map = ir.AffineMap.get(len(output_shape), 0, input1_map) + sum_tensor_map = [ + ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) + ] + sum_tensor_map[dim] = ir.AffineExpr.get_constant(0) + sum_tensor_map = ir.AffineMap.get(len(output_shape), 0, sum_tensor_map) + loop_type = [ir.Attribute.parse("#linalg.iterator_type")] * len( + output_shape + ) + loop_type[dim] = ir.Attribute.parse("#linalg.iterator_type") + sum_tensor_op = linalg.GenericOp( + [sum_tensor_type], + [input1], + [sum_tensor], + ir.ArrayAttr.get( [ - ir.RankedTensorType(exp_tensor_op.result.type).element_type, - ir.RankedTensorType(zero_fill_op.result.type).element_type, - ], - ) - add_op = arith.AddFOp(block.arguments[0], block.arguments[1]) - block.append(add_op) - block.append(linalg.YieldOp([add_op.result])) - - reduce_sum_tensor_shape = copy.deepcopy(output_shape) - reduce_sum_tensor_shape[dim] = 1 - result_tensor_type = ir.RankedTensorType.get( - output_shape, ir.F32Type.get() - ) - result_tensor = tensor.EmptyOp(output_shape, ir.F32Type.get()) - exp_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - exp_tensor_map = ir.AffineMap.get(len(output_shape), 0, exp_tensor_map) - reduce_sum_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - reduce_sum_tensor_map[dim] = ir.AffineExpr.get_constant(0) - reduce_sum_tensor_map = ir.AffineMap.get( - len(output_shape), 0, reduce_sum_tensor_map - ) - result_tensor_map = [ - ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) - ] - result_tensor_map = ir.AffineMap.get( - len(output_shape), 0, result_tensor_map - ) - op = linalg.GenericOp( - [result_tensor_type], - [exp_tensor_op.result, reduce_sum_tensor_op.result], - [result_tensor.result], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get(exp_tensor_map), - ir.AffineMapAttr.get(reduce_sum_tensor_map), - ir.AffineMapAttr.get(result_tensor_map), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, + ir.AffineMapAttr.get(input1_map), + ir.AffineMapAttr.get(sum_tensor_map), + ] + ), + ir.ArrayAttr.get(loop_type), + ) + block = ir.Block.create_at_start( + sum_tensor_op.region, + [ + mlir_dtype, + mlir_dtype, + ], + ) + exp_op = math.ExpOp(block.arguments[0]) + add_op = arith.AddFOp(exp_op.result, block.arguments[1]) + block.append(exp_op) + block.append(add_op) + block.append(linalg.YieldOp([add_op.result])) + result_tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + result_tensor = tensor.EmptyOp(output_shape, mlir_dtype) + result_tensor_map = [ + ir.AffineExpr.get_dim(i) for i in range(len(output_shape)) + ] + result_tensor_map = ir.AffineMap.get( + len(output_shape), 0, result_tensor_map + ) + op = linalg.GenericOp( + [result_tensor_type], + [input1, sum_tensor_op.result], + [result_tensor.result], + ir.ArrayAttr.get( [ - ir.RankedTensorType(exp_tensor_op.result.type).element_type, - ir.RankedTensorType( - reduce_sum_tensor_op.result.type - ).element_type, - ir.RankedTensorType(result_tensor.result.type).element_type, - ], - ) - div_op = arith.DivFOp(block.arguments[0], block.arguments[1]) - block.append(div_op) - block.append(linalg.YieldOp([div_op.result])) + ir.AffineMapAttr.get(input1_map), + ir.AffineMapAttr.get(sum_tensor_map), + ir.AffineMapAttr.get(result_tensor_map), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + mlir_dtype, + mlir_dtype, + mlir_dtype, + ], + ) + exp_op = math.ExpOp(block.arguments[0]) + div_op = arith.DivFOp(exp_op.result, block.arguments[1]) + block.append(exp_op) + block.append(div_op) + block.append(linalg.YieldOp([div_op.result])) return op def clone_op( - node: torch.fx.Node, + node: CloneOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor clone operation. - From PyTorch `aten.clone.default` operator to MLIR tensor `extract_slice` + From buddy CloneOp to MLIR tensor `extract_slice` operation. Note: This op, clone input tensor to a new tensor. @@ -2405,31 +1666,29 @@ def clone_op( if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - offset = [0 for x in output_shape] - offset_attr = ir._denseI64ArrayAttr(offset, None) - size_attr = ir._denseI64ArrayAttr(output_shape, None) - stride = [1 for x in output_shape] - stride_attr = ir._denseI64ArrayAttr(stride, None) - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - - op = tensor.ExtractSliceOp( - tensor_type, input1, [], [], [], offset_attr, size_attr, stride_attr - ) + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + offset = [0 for x in output_shape] + offset_attr = ir._denseI64ArrayAttr(offset, None) + size_attr = ir._denseI64ArrayAttr(output_shape, None) + stride = [1 for x in output_shape] + stride_attr = ir._denseI64ArrayAttr(stride, None) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + op = tensor.ExtractSliceOp( + tensor_type, input1, [], [], [], offset_attr, size_attr, stride_attr + ) return op def silu_op( - node: torch.fx.Node, + node: SiluOp, symbol_table: Dict[Tuple[str, int], ir.Operation], ): """ Import the tensor silu activation operation. - From PyTorch `aten.silu.default` operator to MLIR linalg `generic` - operation. + From Buddy SiluOp to MLIR linalg `generic` operation. Note: This op, compute input node's silu activation result. Args: @@ -2445,63 +1704,61 @@ def silu_op( if input1 is None: return - output_shape = list(node.meta["tensor_meta"].shape) - dtype = str(node.meta["tensor_meta"].dtype) - if dtype == "torch.float32": - tensor_type = ir.RankedTensorType.get(output_shape, ir.F32Type.get()) - output = tensor.EmptyOp(output_shape, ir.F32Type.get()) - generic_map = ir.AffineMap.get_permutation( - [i for i in range(len(output_shape))] - ) - op = linalg.GenericOp( - [tensor_type], - [input1], - [output], - ir.ArrayAttr.get( - [ - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ir.AffineMapAttr.get( - generic_map.get_submap( - [i for i in range(len(output_shape))] - ) - ), - ] - ), - ir.ArrayAttr.get( - [ir.Attribute.parse("#linalg.iterator_type")] - * len(output_shape) - ), - ) - block = ir.Block.create_at_start( - op.region, + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + op = linalg.GenericOp( + [tensor_type], + [input1], + [output], + ir.ArrayAttr.get( [ - ir.RankedTensorType(input1.type).element_type, - ir.RankedTensorType(output.result.type).element_type, - ], - ) - neg_op = arith.NegFOp(block.arguments[0]) - exp_op = math.ExpOp(neg_op.result) - one_op = arith.ConstantOp( - ir.F32Type.get(), ir.FloatAttr.get(ir.F32Type.get(), 1) - ) - add_op = arith.AddFOp(one_op.result, exp_op.result) - div_op = arith.DivFOp(block.arguments[0], add_op.result) - block.append(neg_op) - block.append(exp_op) - block.append(one_op) - block.append(add_op) - block.append(div_op) - block.append(linalg.YieldOp([div_op.result])) + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + neg_op = arith.NegFOp(block.arguments[0]) + exp_op = math.ExpOp(neg_op.result) + one_op = arith.ConstantOp(mlir_dtype, mlir_element_attr_get(dtype, 1)) + add_op = arith.AddFOp(one_op.result, exp_op.result) + div_op = arith.DivFOp(block.arguments[0], add_op.result) + block.append(neg_op) + block.append(exp_op) + block.append(one_op) + block.append(add_op) + block.append(div_op) + block.append(linalg.YieldOp([div_op.result])) return op def param_extract( - node: torch.fx.Node, + node: PlaceholderOp, offset, params_mlir_node, ): @@ -2519,12 +1776,12 @@ def param_extract( op: The operation return the tensor.expand_shape op. """ dtype_mapping = { - torch.float32: ir.F32Type.get(), - torch.int64: ir.IntegerType.get_signless(64), + TensorDType.Float32: ir.F32Type.get(), + TensorDType.Int64: ir.IntegerType.get_signless(64), } - tensor_element_type = dtype_mapping[node.meta["tensor_meta"].dtype] - output_shape = list(node.meta["tensor_meta"].shape) - extract_size = functools.reduce(lambda x, y: x * y, output_shape) + tensor_element_type = dtype_mapping[node.tensor_meta["dtype"]] + output_shape = list(node.tensor_meta["shape"]) + extract_size = functools.reduce(lambda x, y: x * y, output_shape, 1) offset_attr = ir._denseI64ArrayAttr([offset], None) size_attr = ir._denseI64ArrayAttr([extract_size], None) stride = [1] @@ -2540,7 +1797,7 @@ def param_extract( size_attr, stride_attr, ) - if len(output_shape) == 1: + if len(output_shape) == 1 or len(output_shape) == 0: return extract_slice_op tensor_type = ir.RankedTensorType.get(output_shape, tensor_element_type) axis = ir.ArrayAttr.get( @@ -2553,36 +1810,123 @@ def param_extract( axis = ir.ArrayAttr.get([axis], None) return tensor.ExpandShapeOp(tensor_type, extract_slice_op.result, axis) +def where_op( + node: WhereOp, + symbol_table: Dict[Tuple[str, int], ir.Operation], +): + """ + Import the tensor where operation. + From Buddy WhereOp to MLIR linalg `generic` operation. + + Note: This op, compute input node's silu activation result. + Args: + node: Containing information from the input graph node. + symbol_table: A dictionary mapping symbols to their corresponding + operations. + + Returns: + op: The operation return the linalg.generic op. + """ + assert len(node.args) == 3 + input1 = symbol_table.get((str(node.args[0]), 0)) + input2 = symbol_table.get((str(node.args[1]), 0)) + input3 = symbol_table.get((str(node.args[2]), 0)) + if input1 is None or input2 is None or input3 is None: + return + + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + output = tensor.EmptyOp(output_shape, mlir_dtype) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(output_shape))] + ) + op = linalg.GenericOp( + [tensor_type], + [input1, input3], + [output], + ir.ArrayAttr.get( + [ + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(output_shape))] + ) + ), + ] + ), + ir.ArrayAttr.get( + [ir.Attribute.parse("#linalg.iterator_type")] + * len(output_shape) + ), + ) + block = ir.Block.create_at_start( + op.region, + [ + ir.RankedTensorType(input1.type).element_type, + ir.RankedTensorType(input3.type).element_type, + ir.RankedTensorType(output.result.type).element_type, + ], + ) + select_op = arith.SelectOp(block.arguments[0], input2, block.arguments[1]) + block.append(select_op) + block.append(linalg.YieldOp([select_op.result])) + + return op + +def scalar_tensor_op(node: ScalarTensorOp, symbol_table): + """ + Import the tensor Scalar_Tensor operation. + From Buddy ScalarTensorOp to MLIR arith `ConstantOp` operation. + """ + assert len(node.args) == 1 + dtype = node.tensor_meta["dtype"] + attr = mlir_element_attr_get(dtype, node.args[0]) + op = arith.ConstantOp(dtype, attr) + + return op ops_registry = { - "arange.start": arange_op, - "arange.default": arange_op, - "unsqueeze.default": unsqueeze_op, - "view.default": view_op, - "ones.default": ones_op, - "full.default": full_op, - "lt.Tensor": lt_op, - "embedding.default": embedding_op, - "masked_fill.Scalar": masked_fill_op, - "slice.Tensor": slice_op, - "expand.default": expand_op, - "_to_copy.default": to_copy_op, - "rsub.Scalar": rsub_op, - "pow.Tensor_Scalar": pow_op, - "mean.dim": mean_op, - "rsqrt.default": rsqrt_op, - "mul.Tensor": mul_op, - "t.default": t_op, - "mm.default": matmul_op, - "transpose.int": transpose_op, - "index.Tensor": index_op, - "neg.default": neg_op, - "cat.default": cat_op, - "squeeze.dim": squeeze_op, - "bmm.default": batch_matmul_op, - "div.Tensor": div_op, - "_softmax.default": softmax_op, - "clone.default": clone_op, - "silu.default": silu_op, "param.extract": param_extract, + "MatmulOp": matmul_op, + "ArangeOp": arange_op, + "UnsqueezeOp": unsqueeze_op, + "ViewOp": view_op, + "EmbeddingOp": embedding_op, + "OnesOp": ones_op, + "FullOp": full_op, + "LessThanOp": lt_op, + "MaskedFillOp": masked_fill_op, + "SliceOp": slice_op, + "ExpandOp": expand_op, + "ToCopyOp": to_copy_op, + "RsubOp": rsub_op, + "PowOp": pow_op, + "MeanOp": mean_op, + "RsqrtOp": rsqrt_op, + "MulOp": mul_op, + "TOp": t_op, + "TransposeOp": transpose_op, + "IndexOp": index_op, + "NegOp": neg_op, + "CatOp": cat_op, + "SqueezeOp": squeeze_op, + "BatchMatmulOp": batch_matmul_op, + "DivOp": div_op, + "SoftmaxOp": softmax_op, + "CloneOp": clone_op, + "SiluOp": silu_op, + "AddOp": add_op, + "WhereOp": where_op, + "ScalarTensorOp": scalar_tensor_op, } diff --git a/frontend/Python/ops/math.py b/frontend/Python/ops/math.py index 7e2de80b5f..19820c2b3b 100644 --- a/frontend/Python/ops/math.py +++ b/frontend/Python/ops/math.py @@ -22,11 +22,16 @@ def erf_op(node, symbol_table): - input_ = symbol_table.get((str(node.args[0]), 0)) - op = math.ErfOp(input_) + input_tensor = symbol_table.get((str(node.args[0]), 0)) + op = math.ErfOp(input_tensor) return op +def sqrt_op(node, symbol_table): + input_tensor = symbol_table.get((str(node.args[0]), 0)) + return math.SqrtOp(input_tensor) + ops_registry = { - "erf.default": erf_op, + "ErfOp": erf_op, + "SqrtOp": sqrt_op, } diff --git a/frontend/Python/ops/tosa.py b/frontend/Python/ops/tosa.py index bf957002a9..8a0997a3a0 100644 --- a/frontend/Python/ops/tosa.py +++ b/frontend/Python/ops/tosa.py @@ -14,17 +14,52 @@ # # ===--------------------------------------------------------------------------- # -# The registry of mappings from Torch node to MLIR tosa dialect operations. +# The registry of mappings from Buddy Graph to MLIR tosa dialect operations. # # ===--------------------------------------------------------------------------- -import torch import array from typing import Dict, List, Tuple, Union +import numpy import mlir.ir as ir from mlir.dialects import tensor, tosa +from ..graph import TensorDType +from ..graph import ( + AddOp, + PermuteOp, + AddMMOp, + BatchMatmulOp, + SubOp, + MulOp, + DivOp, + TanhOp, + ExpOp, + RsqrtOp, + AmaxOp, + ReshapeOp, + UnsqueezeOp, + SelectOp, + SliceOp, + ConvertElementTypeOp, + CloneOp, + VarMeanOp, + EmbeddingOp, + ExpandOp, + SumDimOp, + TOp, + TransposeOp, + MaxPool2dOp, + Conv2dOp, + ReluOp, + IotaOp, + SigmoidOp, + ReciprocalOp, + MeanOp, +) +from .utils import * + def _normalize_binary_operator_shape(shp1, shp2): """Normalize the shape of two input tensors according to the broadcasting @@ -75,9 +110,8 @@ def _gen_arith_binary_op(input1, input2, op_func): def _scalar_to_tensor( scalar: Union[float, int], element_type: ir.Type, shape: List[int] ): - """PyTorch allow the binary operation between tensor and scalar. But MLIR - does not. - So we need to convert scalars to the corresponding tensors.""" + """Convert scalers to cooresponding tensors since MLIR + doesn't support operation between scalers and tensors.""" element = ( ir.FloatAttr.get(element_type, float(scalar)) if str(element_type) == "f32" @@ -128,11 +162,11 @@ def _normalize_binary_operator_args(arg1, arg2): def addmm_op( - node, symbol_table: Dict[Tuple[str, int], ir.Operation] + node: AddMMOp, symbol_table: Dict[Tuple[str, int], ir.Operation] ) -> ir.Operation: """ Import matrix multiplication operation. - From PyTorch `aten.addmm.default` operator to MLIR TOSA `matmul` operation. + From buddy graph ir's `AddMMOp` operator to MLIR TOSA `matmul` operation. Note: this function first reshapes the input matrices to 3D tensors (since tosa.MatMulOp requires it). Then it multiplies these reshaped @@ -146,8 +180,7 @@ def addmm_op( Returns: op: The operation representing the result of adding the matrix - multiplication - to the input tensor. + multiplication to the input tensor. """ # get input input_ = symbol_table.get((str(node.args[0]), 0)) @@ -184,10 +217,11 @@ def addmm_op( return op -def bmm_op(node, symbol_table) -> ir.Operation: +def bmm_op(node: BatchMatmulOp, symbol_table) -> ir.Operation: """ Import batch matrix multiplication operation. - From PyTorch `aten.bmm.default` operator to MLIR TOSA `matmul` operation. + From buddy graph ir's `BatchMatmulOp` operator to MLIR TOSA `matmul` + operation. """ input_ = symbol_table.get((str(node.args[0]), 0)) mat2 = symbol_table.get((str(node.args[1]), 0)) @@ -200,30 +234,30 @@ def bmm_op(node, symbol_table) -> ir.Operation: return op -def add_op(node, symbol_table): +def add_op(node: AddOp, symbol_table): """ Import tensor addition operation. - From PyTorch `aten.add.Tensor` operator to MLIR TOSA `add` operation. + From buddy graph ir's `AddOp` operator to MLIR TOSA `add` operation. """ input1 = symbol_table.get((str(node.args[0]), 0), node.args[0]) input2 = symbol_table.get((str(node.args[1]), 0), node.args[1]) return _gen_arith_binary_op(input1, input2, tosa.AddOp) -def sub_op(node, symbol_table): +def sub_op(node: SubOp, symbol_table): """ Import tensor subtraction operation. - From PyTorch `aten.sub.Tensor` operator to MLIR TOSA `sub` operation. + From buddy graph ir's `SubOp` operator to MLIR TOSA `sub` operation. """ input1 = symbol_table.get((str(node.args[0]), 0), node.args[0]) input2 = symbol_table.get((str(node.args[1]), 0), node.args[1]) return _gen_arith_binary_op(input1, input2, tosa.SubOp) -def mul_op(node, symbol_table): +def mul_op(node: MulOp, symbol_table): """ - Import tensor multiplication operation. - From PyTorch `aten.mul.Tensor` operator to MLIR TOSA `mul` operation. + Import tensor division operation. + From buddy graph ir's `DivOp` operator to MLIR TOSA `div` operation. """ def _inner_op(result_type, input1, input2): @@ -240,10 +274,10 @@ def _inner_op(result_type, input1, input2): return _gen_arith_binary_op(input1, input2, _inner_op) -def div_op(node, symbol_table): +def div_op(node: DivOp, symbol_table): """ Import tensor division operation. - From PyTorch `aten.div.Tensor` operator to MLIR TOSA `div` operation. + From buddy graph ir's `DivOp` operator to MLIR TOSA `div` operation. """ def _inner_op(result_type, input1, input2): @@ -260,10 +294,10 @@ def _inner_op(result_type, input1, input2): return _gen_arith_binary_op(input1, input2, _inner_op) -def tanh_op(node, symbol_table): +def tanh_op(node: TanhOp, symbol_table): """ Import elementwise tanh operation. - From PyTorch `aten.tanh.default` operator to MLIR TOSA `tanh` operation. + From buddy graph ir's `TanhOp` operator to MLIR TOSA `tanh` operation. """ input1 = symbol_table.get((str(node.args[0]), 0)) sizes = ir.RankedTensorType(input1.type).shape @@ -273,10 +307,10 @@ def tanh_op(node, symbol_table): return op -def exp_op(node, symbol_table): +def exp_op(node: ExpOp, symbol_table): """ Import elementwise exponential operation. - From PyTorch `aten.exp.default` operator to MLIR TOSA `exp` operation. + From buddy graph ir's `ExpOp` operator to MLIR TOSA `exp` operation. """ input1 = symbol_table.get((str(node.args[0]), 0)) sizes = ir.RankedTensorType(input1.type).shape @@ -286,10 +320,10 @@ def exp_op(node, symbol_table): return op -def rsqrt_op(node, symbol_table): +def rsqrt_op(node: RsqrtOp, symbol_table): """ Import elementwise reciprocal square root operation. - From PyTorch `aten.rsqrt.default` operator to MLIR TOSA `rsqrt` operation. + From buddy graph ir's `RsqrtOp` operator to MLIR TOSA `rsqrt` operation. """ input1 = symbol_table.get((str(node.args[0]), 0)) sizes = ir.RankedTensorType(input1.type).shape @@ -301,15 +335,11 @@ def rsqrt_op(node, symbol_table): return op -def amax_op(node, symbol_table): +def amax_op(node: AmaxOp, symbol_table): """ Import the amax operation. - From PyTorch `aten.amax.default` operator to MLIR TOSA `reduce_max` + From buddy graph ir's `AmaxOp` operator to MLIR TOSA `reduce_max` operation. - - Note: This conversion function returns the maximum value of each slice - of the input tensor in the given dimension(s). This is consistent - with PyTorch's `torch.amax` operator. """ input1 = symbol_table.get((str(node.args[0]), 0)) dim_val = node.args[1][0] @@ -321,10 +351,10 @@ def amax_op(node, symbol_table): return op -def reshape_op(node, symbol_table): +def reshape_op(node: ReshapeOp, symbol_table): """ Import the reshape operation. - From PyTorch `aten.reshape.default` operator to MLIR TOSA `reshape` + From buddy graph ir's `ReshapeOp` operator to MLIR TOSA `reshape` operation. Note: If the new shape contains one and only one `-1`, the size of the new @@ -362,34 +392,30 @@ def reshape_op(node, symbol_table): return op -def unsqueeze_op(node, symbol_table): +def unsqueeze_op(node: UnsqueezeOp, symbol_table): """ Import the unsqueeze operation. - From PyTorch `aten.unsqueeze.default` operator to MLIR TOSA `reshape` + From buddy graph ir's `UnsqueezeOp` operator to MLIR TOSA `reshape` operation. - - Note: "unsqueeze" means inserting a new dimension of size 1 at the specified - position. For more information, please refer to - https://pytorch.org/docs/stable/generated/torch.unsqueeze.html """ input_tensor = symbol_table.get((str(node.args[0]), 0)) dim = node.args[1] sizes = ir.RankedTensorType(input_tensor.type).shape - sizes.insert(dim, 1) + if dim == -1: + sizes.append(1) + else: + sizes.insert(dim, 1) new_shape_content = array.array("i", sizes) new_shape_content = memoryview(new_shape_content) op = tosa.ReshapeOp(input_tensor, new_shape_content) return op -def select_op(node, symbol_table): +def select_op(node: SelectOp, symbol_table): """ Import the select operation. - From PyTorch `aten.select.int` operator to MLIR TOSA `reshape` operation. - - Note: "select" means slicing the input tensor along the selected dimension - at the given index. For more information, please refer to - https://pytorch.org/docs/stable/generated/torch.select.html + From buddy graph ir's `SelectOp` operator to MLIR TOSA `reshape` + operation. """ input_tensor = symbol_table.get((str(node.args[0]), 0)) dim = node.args[1] @@ -416,14 +442,11 @@ def select_op(node, symbol_table): return op -def slice_op(node, symbol_table): +def slice_op(node: SliceOp, symbol_table): """ Import the slice operation. - From PyTorch `aten.slice.Tensor` operator to MLIR tensor `extract_slice` + From buddy graph ir's `SliceOp` operator to MLIR TOSA `extract_slice` operation. - - Note: "slice" means slicing the input tensor along the selected dimension - from a given start index to an end index. """ input_tensor = symbol_table.get((str(node.args[0]), 0)) dim = node.args[1] @@ -477,17 +500,19 @@ def slice_op(node, symbol_table): return op -def convert_element_type_op(node, symbol_table): +def convert_element_type_op(node: ConvertElementTypeOp, symbol_table): """ Import the element type conversion operation. - From PyTorch `prims.convert_element_type.default` operator to - MLIR TOSA `cast` operation. + From buddy graph ir's `ConvertElementTypeOp` operator to MLIR TOSA + `cast` operation. """ - # maintain a mapping of torch types and mlir types + # maintain a mapping of buddy dtype to mlir types types_mapping = { - torch.float64: ir.F64Type.get(), - torch.float32: ir.F32Type.get(), - torch.float16: ir.F16Type.get(), + TensorDType.Float64: ir.F64Type.get(), + TensorDType.Float32: ir.F32Type.get(), + TensorDType.Float16: ir.F16Type.get(), + TensorDType.Int32: ir.IntegerType.get_signless(32), + TensorDType.Bool: ir.IntegerType.get_signless(1), } input_tensor = symbol_table.get((str(node.args[0]), 0)) to_cast_type = types_mapping[node.args[1]] @@ -496,13 +521,13 @@ def convert_element_type_op(node, symbol_table): return tosa.CastOp(output_type, input_tensor) -def clone_op(node, symbol_table): +def clone_op(node: CloneOp, symbol_table): """ Import the clone operation. - From PyTorch `aten.clone.default` operator to MLIR TOSA `identity` + From buddy graph ir's `CloneOp` operator to MLIR TOSA `identity` operation. - Note: Since MLIR follow the SSA form, when using the `identity` operation, + Note: Since MLIR follows the SSA form, when using the `identity` operation, we actually deep-copies the original tensor. """ input_tensor = symbol_table.get((str(node.args[0]), 0)) @@ -513,13 +538,16 @@ def clone_op(node, symbol_table): return tosa.IdentityOp(output_type, input_tensor) -def var_mean_op(node, symbol_table): +def var_mean_op(node: VarMeanOp, symbol_table): """ Import the variance & mean operation. - From PyTorch `aten.var_mean.default` operator to two MLIR TOSA `mul` + From buddy graph ir's `VarMeanOp` operator to two MLIR TOSA `mul` operation. - Note: The conversion procedure can be splited into two steps: + Note: By now, this conversion function follows PyTorch's `var_mean` + semantic. + + The conversion procedure can be splited into two steps: 1. In the first part, we calculate the mean value along the given dimension(s) in `mean_dim_op` function. We first reduce the input tensor along the given dimension(s) using tosa's `reduce_sum` @@ -667,10 +695,10 @@ def var_dim_op( return var_op, mean_op -def permute_op(node, symbol_table): +def permute_op(node: PermuteOp, symbol_table): """ Import the permute operation. - From PyTorch `aten.permute.default` operator to MLIR TOSA `transpose` + From buddy graph ir's `PermuteOp` operator to MLIR TOSA `transpose` operation. """ input_tensor = symbol_table.get((str(node.args[0]), 0)) @@ -693,10 +721,10 @@ def permute_op(node, symbol_table): return permute_op -def embedding_op(node, symbol_table): +def embedding_op(node: EmbeddingOp, symbol_table): """ Import the embedding operation. - From PyTorch `aten.embedding.default` operator to MLIR TOSA `reshape` + From buddy graph ir's `EmbeddingOp` operator to MLIR TOSA `reshape` operation. Note: Althought this conversion function will finally return a `reshape` @@ -754,10 +782,10 @@ def embedding_op(node, symbol_table): return op -def expand_op(node, symbol_table) -> ir.Operation: +def expand_op(node: ExpandOp, symbol_table) -> ir.Operation: """ Import the expand operation. - From PyTorch `aten.expand.default` operator to MLIR TOSA `add` operation. + From buddy graph ir's `ExpandOp` operator to MLIR TOSA `add` operation. Note: This conversion is implemented using the broadcast machanism of TOSA `add` operation. We allocate a tensor with the shape to expand and @@ -787,11 +815,10 @@ def expand_op(node, symbol_table) -> ir.Operation: return op -def sum_op(node, symbol_table): +def sum_op(node: SumDimOp, symbol_table): """ Import the sum operation. - From PyTorch `aten.sum.dim_IntList` operator to MLIR TOSA `reduce_sum` - operation. + From buddy graph ir's `SumDimOp` operator to MLIR TOSA `reduce_sum` """ input_tensor = symbol_table.get((str(node.args[0]), 0)) reduce_sum_dims = node.args[1] @@ -813,40 +840,37 @@ def sum_op(node, symbol_table): return reduce_sum_op -def t_op(node, symbol_table): +def t_op(node: TOp, symbol_table): """ Import the tensor transpose operation. - From PyTorch `aten.t.default` operator to MLIR TOSA `reduce_sum` operation. + From buddy graph ir's `TOp` operator to MLIR TOSA `transpose` operation """ assert len(node.args) == 1 input1 = symbol_table.get((str(node.args[0]), 0)) - if input1 is None: - return + assert input1 is not None input_shape = list(ir.RankedTensorType(input1.type).shape) - output_shape = list(node.meta["tensor_meta"].shape) - if len(input_shape) == 2: - perm_const_op = tosa.ConstOp( - ir.DenseElementsAttr.get(memoryview(array.array("i", [1, 0]))) - ) - result_element_type = ir.RankedTensorType(input1.type).element_type - permute_result_type = ir.RankedTensorType.get( - output_shape, result_element_type - ) - op = tosa.TransposeOp( - permute_result_type, input1, perm_const_op.results[0] - ) + output_shape = list(node.tensor_meta["shape"]) + assert len(input_shape) == 2, "Input tensor must be 2D" + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", [1, 0]))) + ) + result_element_type = ir.RankedTensorType(input1.type).element_type + permute_result_type = ir.RankedTensorType.get( + output_shape, result_element_type + ) + op = tosa.TransposeOp(permute_result_type, input1, perm_const_op.results[0]) return op -def transpose_op(node, symbol_table): +def transpose_op(node: TransposeOp, symbol_table): """ Import the tensor permute operation based on input dims. - From PyTorch `aten.transpose.int` operator to MLIR TOSA `reduce_sum` + From buddy graph ir's `TransposeOp` operator to MLIR TOSA `transpose` operation. """ - assert len(node.args) == 3 + assert len(node.args) == 3, "Input tensor must be 3D" input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return @@ -857,7 +881,7 @@ def transpose_op(node, symbol_table): temp = perm_list[dim1] perm_list[dim1] = perm_list[dim2] perm_list[dim2] = temp - output_shape = list(node.meta["tensor_meta"].shape) + output_shape = list(node.tensor_meta["shape"]) perm_const_op = tosa.ConstOp( ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) ) @@ -870,29 +894,352 @@ def transpose_op(node, symbol_table): return op +def maxpool2d_op(node: MaxPool2dOp, symbol_table): + """ + Import the maxpool2d operation. + From Buddy MaxPool2dOp to MLIR TOSA `max_pool2d` operation. + """ + if len(node.args) == 5: + raise NotImplementedError + input1 = symbol_table.get((str(node.args[0]), 0)) + kernel = node.args[1] + stride = node.args[2] + if len(node.args) > 3: + pad = node.args[3] + else: + pad = [0 for _ in kernel] + dtype = node.tensor_meta["dtype"] + result_element_type = mlir_element_type_get(dtype) + if node._layout.find("NCHW") != -1: + perm_list = [0, 2, 3, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + ) + out_shape = list(ir.RankedTensorType(input1.type).shape) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + input1 = tosa.TransposeOp( + permute_result_type, input1, perm_const_op.results[0] + ).result + out_shape = node.tensor_meta["shape"] + if len(pad) == 1: + pad = [pad[0]] * 4 + elif len(pad) == 2: + pad = [pad[0]] * 2 + [pad[1]] * 2 + kernel_attr = ir._denseI64ArrayAttr(kernel, None) + stride_attr = ir._denseI64ArrayAttr(stride, None) + pad_attr = ir._denseI64ArrayAttr(pad, None) + if node._layout.find("NCHW") != -1: + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + out_shape = perm_shape + output = ir.RankedTensorType.get(out_shape, result_element_type) + op = tosa.MaxPool2dOp(output, input1, kernel_attr, stride_attr, pad_attr) + if node._layout.find("NCHW") != -1: + perm_list = [0, 3, 1, 2] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + ) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + perm_shape.append(out_shape[2]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + op = tosa.TransposeOp( + permute_result_type, op.result, perm_const_op.results[0] + ) + return op + + +def convolution2d_op(node: Conv2dOp, symbol_table): + """ + Import the convolution operation. + From Buddy Conv2dOp to MLIR TOSA `conv2d` operation. + """ + assert len(node.args) == 9 + input1 = symbol_table.get((str(node.args[0]), 0)) + weight = symbol_table.get((str(node.args[1]), 0)) + is_kernel_transposed = node.args[6] + dtype = node.tensor_meta["dtype"] + result_element_type = mlir_element_type_get(dtype) + if node._layout.find("NCHW") != -1: + perm_list = [0, 2, 3, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + ) + out_shape = list(ir.RankedTensorType(input1.type).shape) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + input1 = tosa.TransposeOp( + permute_result_type, input1, perm_const_op.results[0] + ).result + if node._layout.find("FCHW") != -1: + perm_list = [0, 2, 3, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + ) + out_shape = list(ir.RankedTensorType(weight.type).shape) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + weight = tosa.TransposeOp( + permute_result_type, weight, perm_const_op.results[0] + ).result + if is_kernel_transposed: + in_channels = list(ir.RankedTensorType(weight.type).shape)[0] + out_channels = list(ir.RankedTensorType(weight.type).shape)[1] + else: + in_channels = list(ir.RankedTensorType(weight.type).shape)[1] + out_channels = list(ir.RankedTensorType(weight.type).shape)[0] + if len(node._parents) == 2: + new_size_tensor_type = ir.RankedTensorType.get( + [out_channels], result_element_type + ) + element = mlir_element_attr_get(dtype, 0) + new_size_attr = ir.DenseElementsAttr.get_splat( + new_size_tensor_type, element + ) + bias_tensor = tosa.ConstOp(new_size_attr).results[0] + else: + bias_tensor = symbol_table.get((str(node.args[2]), 0)) + assert input1 != None and weight != None and bias_tensor != None + stride = node.args[3] + input_padding = node.args[4] + if len(input_padding) == 1: + input_padding = [input_padding[0]] * 4 + elif len(input_padding) == 2: + input_padding = [input_padding[0]] * 2 + [input_padding[1]] * 2 + dilation = node.args[5] + groups = node.args[8] + out_shape = node.tensor_meta["shape"] + if node._layout.find("NCHW") != -1: + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + out_shape = perm_shape + output = ir.RankedTensorType.get(out_shape, result_element_type) + stride_attr = ir._denseI64ArrayAttr(stride, None) + assert groups == 1, 'tosa.conv2d only support one group' + if is_kernel_transposed: + if sum(input_padding) > 0 or sum(dilation) > len(dilation): + raise NotImplementedError + out_padding = node.args[7] + for i in range(len(out_padding), 4): + out_padding = [0] + out_padding + out_padding_attr = ir._denseI64ArrayAttr(out_padding, None) + out_shape_attr = ir._denseI64ArrayAttr(out_shape, None) + op = tosa.TransposeConv2DOp( + output, + input1, + weight, + bias_tensor, + out_padding_attr, + stride_attr, + out_shape_attr, + ) + else: + input_padding_attr = ir._denseI64ArrayAttr(input_padding, None) + dilation_attr = ir._denseI64ArrayAttr(dilation, None) + op = tosa.Conv2DOp( + output, + input1, + weight, + bias_tensor, + input_padding_attr, + stride_attr, + dilation_attr, + ) + if node._layout.find("NCHW") != -1: + perm_list = [0, 3, 1, 2] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + ) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + perm_shape.append(out_shape[2]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + op = tosa.TransposeOp( + permute_result_type, op.result, perm_const_op.results[0] + ) + return op + + +def relu_op(node: ReluOp, symbol_table): + """ + Import the tensor relu operation. + From Buddy ReluOp to MLIR TOSA `maximum` operation. + """ + assert len(node.args) == 1 + input1 = symbol_table.get((str(node.args[0]), 0)) + if input1 is None: + return + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + element = mlir_element_attr_get(dtype, 0) + tensor_type = ir.RankedTensorType.get(output_shape, element.type) + attr = ir.DenseElementsAttr.get_splat(tensor_type, element) + zero_op = tosa.ConstOp(attr) + result_element_type = mlir_element_type_get(dtype) + op = tosa.MaximumOp(tensor_type, input1, zero_op) + + return op + + +def iota_op(node: IotaOp, symbol_table): + """ + Import the tensor iota operation. + From Buddy IotaOp to MLIR TOSA `ConstOp` operation. + """ + assert len(node.args) == 1 + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + start = node.kwargs["start"] + end = node.args[0] + step = node.kwargs["step"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + attr = ir.DenseElementsAttr.get( + numpy.arange(start, end, step), + type=tensor_type, + ) + op = tosa.ConstOp(attr) + + return op + + +def sigmoid_op(node: SigmoidOp, symbol_table): + """ + Import the tensor sigmoid operation. + From Buddy SigmoidOp to MLIR TOSA `SigmoidOp` operation. + """ + assert len(node.args) == 1 + input1 = symbol_table.get((str(node.args[0]), 0)) + if input1 is None: + return + output_shape = list(node.tensor_meta["shape"]) + dtype = node.tensor_meta["dtype"] + mlir_dtype = mlir_element_type_get(dtype) + tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) + op = tosa.SigmoidOp(tensor_type, input1) + + return op + + +def reciprocal_op(node: ReciprocalOp, symbol_table): + input_tensor = symbol_table.get((str(node.args[0]), 0)) + return tosa.ReciprocalOp(input_tensor.type, input_tensor) + + +def mean_op(node: MeanOp, symbol_table): + input_tensor = symbol_table.get((str(node.args[0]), 0)) + keepdim = node.args[2] + dims = [x for x in node.args[1]] + if isinstance(dims, int): + dims = [dims] + + for dim_item_idx, _ in enumerate(dims): + if dims[dim_item_idx] < 0: + dims[dim_item_idx] += len( + ir.RankedTensorType(input_tensor.type).shape + ) + + reduce_sum_result = input_tensor + for dim_item in dims: + reduce_dim_attr = ir.IntegerAttr.get( + ir.IntegerType.get_signless(32), dim_item + ) + reduce_sum_op = tosa.ReduceSumOp(reduce_sum_result, reduce_dim_attr) + reduce_sum_result = reduce_sum_op.results[0] + + tensor_shp = ir.RankedTensorType(input_tensor.type).shape + dim_size = 1 + + for dim_item in dims: + dim_size *= tensor_shp[dim_item] + + denominator_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get(memoryview(array.array("f", [dim_size]))) + ) + reciprocal_op = tosa.ReciprocalOp( + denominator_const_op.results[0].type, denominator_const_op + ) + + ret = tosa.MulOp( + reduce_sum_op.results[0].type, + reciprocal_op.results[0], + reduce_sum_op.results[0], + ir.IntegerAttr.get(ir.IntegerType.get_signless(8), 0), + ) + + if not keepdim: + result_shp = ir.RankedTensorType(ret.results[0].type).shape + result_shp = [siz for siz in result_shp if siz != 1] + ret = tosa.ReshapeOp( + ret.results[0], memoryview(array.array("i", result_shp)) + ) + + return ret + + ops_registry = { - "add.Tensor": add_op, - "mul.Tensor": mul_op, - "sub.Tensor": sub_op, - "sum.dim_IntList": sum_op, - "tanh.default": tanh_op, - "amax.default": amax_op, - "rsqrt.default": rsqrt_op, - "bmm.default": bmm_op, - "clone.default": clone_op, - "div.Tensor": div_op, - "exp.default": exp_op, - "expand.default": expand_op, - "var_mean.correction": var_mean_op, - "addmm.default": addmm_op, - "reshape.default": reshape_op, - "view.default": reshape_op, - "select.int": select_op, - "slice.Tensor": slice_op, - "embedding.default": embedding_op, - "convert_element_type.default": convert_element_type_op, - "permute.default": permute_op, - "unsqueeze.default": unsqueeze_op, - "t.default": t_op, - "transpose.int": transpose_op, + "AddOp": add_op, + "MulOp": mul_op, + "SubOp": sub_op, + "SumDimOp": sum_op, + "TanhOp": tanh_op, + "AmaxOp": amax_op, + "RsqrtOp": rsqrt_op, + "BatchMatmulOp": bmm_op, + "CloneOp": clone_op, + "DivOp": div_op, + "ExpOp": exp_op, + "ExpandOp": expand_op, + "VarMeanOp": var_mean_op, + "AddMMOp": addmm_op, + "ReshapeOp": reshape_op, + "ViewOp": reshape_op, + "SelectOp": select_op, + "SliceOp": slice_op, + "EmbeddingOp": embedding_op, + "ConvertElementTypeOp": convert_element_type_op, + "PermuteOp": permute_op, + "UnsqueezeOp": unsqueeze_op, + "TOp": t_op, + "TransposeOp": transpose_op, + "MaxPool2dOp": maxpool2d_op, + "Conv2dOp": convolution2d_op, + "ReluOp": relu_op, + "IotaOp": iota_op, + "SigmoidOp": sigmoid_op, + "ReciprocalOp": reciprocal_op, + "MeanOp": mean_op, } diff --git a/frontend/Python/ops/utils.py b/frontend/Python/ops/utils.py new file mode 100644 index 0000000000..337f5a6b49 --- /dev/null +++ b/frontend/Python/ops/utils.py @@ -0,0 +1,56 @@ +# ===- utils.py ---------------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# The element utils of mlir element type and attribute. +# +# ===--------------------------------------------------------------------------- + +from typing import Dict +import mlir.ir as ir + +from ..graph import TensorDType + + +def mlir_element_type_get(type_name): + """ + Get the mlir element type base on TensorDType's enum type. + Args: + type_name: The TensorDType's enum type. + """ + match type_name: + case TensorDType.Float32: + return ir.F32Type.get() + case TensorDType.Int64: + return ir.IntegerType.get_signless(64) + case TensorDType.Bool: + return ir.IntegerType.get_signless(1) + + +def mlir_element_attr_get(type_name, value): + """ + Get the mlir element attribute base on TensorDType's enum type and value. + Args: + type_name: The TensorDType's enum type. + value: The real value for mlir element attribute. + """ + match type_name: + case TensorDType.Float32: + return ir.FloatAttr.get(ir.F32Type.get(), value) + case TensorDType.Int64: + return ir.IntegerAttr.get(ir.IntegerType.get_signless(64), value) + case TensorDType.Bool: + return ir.IntegerAttr.get(ir.IntegerType.get_signless(1), value) + diff --git a/requirements.txt b/requirements.txt index 45d5b1fa36..606179eb74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ sentencepiece == 0.1.99 accelerate protobuf pybind11 == 2.11.1 +torchvision diff --git a/tests/Python/test_addmm.py b/tests/Python/test_addmm.py index cb4459f450..563c874462 100644 --- a/tests/Python/test_addmm.py +++ b/tests/Python/test_addmm.py @@ -22,8 +22,11 @@ def foo(x, y, z): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2, in3) +graphs = dynamo_compiler.importer(foo, in1, in2, in3) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -32,4 +35,3 @@ def foo(x, y, z): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_amax.py b/tests/Python/test_amax.py index 3759b352c7..81944a2c27 100644 --- a/tests/Python/test_amax.py +++ b/tests/Python/test_amax.py @@ -22,8 +22,11 @@ def foo(x, dim): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, dim) +graphs = dynamo_compiler.importer(foo, in1, dim) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -31,4 +34,3 @@ def foo(x, dim): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_arange.py b/tests/Python/test_arange.py index ac7fa3c45e..f7e1cd1c4f 100644 --- a/tests/Python/test_arange.py +++ b/tests/Python/test_arange.py @@ -2,10 +2,9 @@ import torch import torch._dynamo as dynamo -from torch._inductor.decomposition import decompositions as inductor_decomp from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,12 +15,15 @@ def foo(x): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -29,4 +31,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_arith_add.py b/tests/Python/test_arith_add.py index 44db4609d7..9c6e9d3121 100644 --- a/tests/Python/test_arith_add.py +++ b/tests/Python/test_arith_add.py @@ -1,11 +1,10 @@ # RUN: %PYTHON %s 2>&1 | FileCheck %s import torch -import torch._dynamo as dynamo from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +16,15 @@ def foo(x, y): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +32,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_arith_div.py b/tests/Python/test_arith_div.py index afc222a154..cf5b29023b 100644 --- a/tests/Python/test_arith_div.py +++ b/tests/Python/test_arith_div.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -31,4 +34,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_arith_mul.py b/tests/Python/test_arith_mul.py index 9dc4dfbfff..b22c6ebfda 100644 --- a/tests/Python/test_arith_mul.py +++ b/tests/Python/test_arith_mul.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -13,21 +13,24 @@ def foo(x, y): in1 = torch.randn(10) -in2 = torch.randn(10) +in2 = 2 # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = arith.constant # CHECK: %{{.*}} = tosa.mul # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_arith_sub.py b/tests/Python/test_arith_sub.py index 95b5475fc0..0f6238afa2 100644 --- a/tests/Python/test_arith_sub.py +++ b/tests/Python/test_arith_sub.py @@ -21,8 +21,11 @@ def foo(x, y): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_bmm.py b/tests/Python/test_bmm.py index 403b0621b2..ec7c8b1601 100644 --- a/tests/Python/test_bmm.py +++ b/tests/Python/test_bmm.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,17 +17,20 @@ def foo(x, y): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.matmul +# CHECK: %{{.*}} = arith.constant +# CHECK: %{{.*}} = linalg.batch_matmul # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_cat.py b/tests/Python/test_cat.py index db9dacf11c..9c769ae656 100644 --- a/tests/Python/test_cat.py +++ b/tests/Python/test_cat.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): in2 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -32,4 +35,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_clone.py b/tests/Python/test_clone.py index 24fcd32254..3eabd7d647 100644 --- a/tests/Python/test_clone.py +++ b/tests/Python/test_clone.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,17 +16,19 @@ def foo(x): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.identity +# CHECK: %{{.*}} = tensor.extract_slice # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_convert_element_type.py b/tests/Python/test_convert_element_type.py index 63cd1ddaea..ca88384633 100644 --- a/tests/Python/test_convert_element_type.py +++ b/tests/Python/test_convert_element_type.py @@ -13,7 +13,7 @@ def foo(x, to_cast_type): in1 = torch.randn(10).to(torch.float32) -to_cast_type = torch.float16 +to_cast_type = torch.int32 # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( @@ -21,8 +21,11 @@ def foo(x, to_cast_type): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, to_cast_type) +graphs = dynamo_compiler.importer(foo, in1, to_cast_type) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, to_cast_type): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_convolution_default.py b/tests/Python/test_convolution_default.py new file mode 100644 index 0000000000..fed1607c7a --- /dev/null +++ b/tests/Python/test_convolution_default.py @@ -0,0 +1,42 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +class Convolution(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.conv = torch.nn.Conv2d(3, 255, (5, 5), 3, 3, bias=False) + + def forward(self, a): + return self.conv(a) + + +model = Convolution() +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +in1 = torch.randn((1, 3, 640, 480)) +graphs = dynamo_compiler.importer(model, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.transpose +# CHECK: %{{.*}} = "tosa.const"() +# CHECK: %{{.*}} = tosa.transpose +# CHECK: %{{.*}} = tosa.conv2d +# CHECK: %{{.*}} = tosa.transpose +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_embedding.py b/tests/Python/test_embedding.py index ee76d2068d..484bb617b5 100644 --- a/tests/Python/test_embedding.py +++ b/tests/Python/test_embedding.py @@ -22,8 +22,11 @@ def foo(weight, indices): weight = torch.randn(10, 5) indices = torch.randint(10, (3, 3)).to(torch.int32) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(weight, indices) +graphs = dynamo_compiler.importer(foo, weight, indices) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -34,16 +37,29 @@ def foo(weight, indices): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) - # test cast case weight = torch.randn(10, 5) indices = torch.randint(10, (3, 3)).to(torch.int64) +graphs = dynamo_compiler.importer(foo, weight, indices) +print(graphs) +assert len(graphs) == 2 +graphs[0].lower_to_top_level_ir() +print(graphs[0]._imported_module) + +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = tosa.reshape +# CHECK: %{{.*}} = tosa.reshape +# CHECK: %{{.*}} = tosa.gather +# CHECK: %{{.*}} = tosa.reshape +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(weight, indices) +graphs[1].lower_to_top_level_ir() +print(graphs[1]._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -54,5 +70,4 @@ def foo(weight, indices): # CHECK: %{{.*}} = tosa.reshape # CHECK: return %{{.*}} # CHECK: } -# CHECK: } -print(dynamo_compiler.imported_module) +# CHECK: } \ No newline at end of file diff --git a/tests/Python/test_exp.py b/tests/Python/test_exp.py index 3fcff43613..7519a999b3 100644 --- a/tests/Python/test_exp.py +++ b/tests/Python/test_exp.py @@ -20,8 +20,11 @@ def foo(x): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -29,4 +32,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_full.py b/tests/Python/test_full.py index 33cdc2c1d3..0a5f5888b1 100644 --- a/tests/Python/test_full.py +++ b/tests/Python/test_full.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_index.py b/tests/Python/test_index.py index da31095c1a..c21ce1a5f6 100644 --- a/tests/Python/test_index.py +++ b/tests/Python/test_index.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): in2 = torch.tensor([1]) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -31,4 +34,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_expand.py b/tests/Python/test_iota.py similarity index 52% rename from tests/Python/test_expand.py rename to tests/Python/test_iota.py index 37e9aca383..d4e9d3e566 100644 --- a/tests/Python/test_expand.py +++ b/tests/Python/test_iota.py @@ -8,25 +8,28 @@ from buddy.compiler.ops import tosa -def foo(x, new_size): - return torch.ops.aten.expand(x, new_size) +class foo(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) -x = torch.randn(1, 3) -new_size = (6, 3) + def forward(self, a): + return torch.arange(a) -# Initialize the dynamo compiler. + +model = foo() dynamo_compiler = DynamoCompiler( primary_registry=tosa.ops_registry, aot_autograd_decomposition=inductor_decomp, ) - -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, new_size) - +in1 = 40 +graphs = dynamo_compiler.importer(model, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.add -# CHECK: return %{{.*}} : tensor<6x3xf32> +# CHECK: %{{.*}} = "tosa.const" +# CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_lt.py b/tests/Python/test_lt.py index a6f30b61cb..5cea5ce5f6 100644 --- a/tests/Python/test_lt.py +++ b/tests/Python/test_lt.py @@ -5,23 +5,26 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): - return torch.ops.aten.lt(x,y) + return torch.ops.aten.lt(x, y) in1 = torch.ones([13], dtype=torch.int64) in2 = torch.ones([13, 1], dtype=torch.int64) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_masked_fill.py b/tests/Python/test_masked_fill.py index 3802b3de7a..3abbe88cd8 100644 --- a/tests/Python/test_masked_fill.py +++ b/tests/Python/test_masked_fill.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y, z): @@ -18,12 +18,15 @@ def foo(x, y, z): in3 = 0 # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2, in3) +graphs = dynamo_compiler.importer(foo, in1, in2, in3) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -33,4 +36,3 @@ def foo(x, y, z): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_max_pool2d.py b/tests/Python/test_max_pool2d.py new file mode 100644 index 0000000000..eecfc73d93 --- /dev/null +++ b/tests/Python/test_max_pool2d.py @@ -0,0 +1,44 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +class TestModule(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.pool = torch.nn.MaxPool2d((5, 5), 3, (2, 2)) + + def forward(self, a): + return self.pool(a) + + +model = TestModule() +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +in1 = torch.randn((1, 3, 640, 480)) + +model_opt = torch.compile(model, backend=dynamo_compiler) +assert torch.allclose(model_opt(in1), model(in1), equal_nan=True) + +graphs = dynamo_compiler.importer(model, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.transpose +# CHECK: %{{.*}} = tosa.max_pool2d +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.transpose +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_mean.py b/tests/Python/test_mean.py index 781e494162..0595619d18 100644 --- a/tests/Python/test_mean.py +++ b/tests/Python/test_mean.py @@ -1,16 +1,14 @@ # RUN: %PYTHON %s 2>&1 | FileCheck %s import torch -import torch._dynamo as dynamo from torch._inductor.decomposition import decompositions as inductor_decomp -from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler from buddy.compiler.ops import tosa -def foo(x, y, z): - return torch.mean(x, y, z) +def foo(x, y, keepdim): + return torch.mean(x, y, keepdim=keepdim) in1 = torch.ones([13, 13], dtype=torch.float32) @@ -19,17 +17,25 @@ def foo(x, y, z): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( primary_registry=tosa.ops_registry, - aot_autograd_decomposition=aot_autograd_decompositions, + aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2, in3) +foo_mlir = torch.compile(foo, backend=dynamo_compiler) +assert torch.allclose( + foo_mlir(in1, in2, keepdim=in3), foo(in1, in2, keepdim=in3), equal_nan=True +) +graphs = dynamo_compiler.importer(foo, in1, in2, in3) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = arith.constant -# CHECK: %{{.*}} = linalg.generic +# CHECK: %{{.*}} = tosa.reduce_sum +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.reciprocal +# CHECK: %{{.*}} = tosa.mul # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_mm.py b/tests/Python/test_mm.py index 4440b4ad8c..4f7c41df3e 100644 --- a/tests/Python/test_mm.py +++ b/tests/Python/test_mm.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): in2 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -31,4 +34,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_neg.py b/tests/Python/test_neg.py index e2f9e6f3d7..78261085a4 100644 --- a/tests/Python/test_neg.py +++ b/tests/Python/test_neg.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,18 +16,20 @@ def foo(x): in1 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward # CHECK: %{{.*}} = tensor.empty -# CHECK: %{{.*}} = linalg.generic +# CHECK: %{{.*}} = linalg.negf # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_ones.py b/tests/Python/test_ones.py index 7343fd1026..4af4ead36e 100644 --- a/tests/Python/test_ones.py +++ b/tests/Python/test_ones.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,12 +16,15 @@ def foo(x): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -29,4 +32,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_permute.py b/tests/Python/test_permute.py index d260df3c2f..7f1aad3e10 100644 --- a/tests/Python/test_permute.py +++ b/tests/Python/test_permute.py @@ -21,8 +21,11 @@ def foo(x, y): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, perm) +graphs = dynamo_compiler.importer(foo, x, perm) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, y): # CHECK: return %{{.*}} : tensor<4x3x2xf32> # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_pow.py b/tests/Python/test_pow.py index cfc47feb1e..d671563832 100644 --- a/tests/Python/test_pow.py +++ b/tests/Python/test_pow.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): @@ -17,12 +17,15 @@ def foo(x, y): in2 = 2 # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -32,4 +35,3 @@ def foo(x, y): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_reciprocal.py b/tests/Python/test_reciprocal.py new file mode 100644 index 0000000000..9c31fb8b5b --- /dev/null +++ b/tests/Python/test_reciprocal.py @@ -0,0 +1,36 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import math + + +def foo(x): + return torch.ops.aten.reciprocal(x) + + +x = torch.randn(10, 3, 6) + +# Initialize the dynamo compiler. +dynamo_compiler = DynamoCompiler( + primary_registry=math.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +foo_mlir = torch.compile(foo, backend=dynamo_compiler) +assert torch.allclose(foo_mlir(x), foo(x), equal_nan=True) + +graphs = dynamo_compiler.importer(foo, x) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) + +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = tosa.reciprocal +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_relu.py b/tests/Python/test_relu.py new file mode 100644 index 0000000000..c6d6bc6aed --- /dev/null +++ b/tests/Python/test_relu.py @@ -0,0 +1,36 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +class foo(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + def forward(self, a): + return torch.relu(a) + + +model = foo() +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) +in1 = torch.randn((1, 3, 640, 480), device="cpu") +graphs = dynamo_compiler.importer(model, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.maximum +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_reshape.py b/tests/Python/test_reshape.py index 56a194697e..989e0e4da5 100644 --- a/tests/Python/test_reshape.py +++ b/tests/Python/test_reshape.py @@ -21,8 +21,11 @@ def foo(x, new_shape): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, new_shape) +graphs = dynamo_compiler.importer(foo, x, new_shape) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, new_shape): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_rsqrt.py b/tests/Python/test_rsqrt.py index 8ca0cf929a..370334d661 100644 --- a/tests/Python/test_rsqrt.py +++ b/tests/Python/test_rsqrt.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,17 +16,20 @@ def foo(x): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x) +graphs = dynamo_compiler.importer(foo, x) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.rsqrt +# CHECK: %{{.*}} = tensor.empty() +# CHECK: %{{.*}} = linalg.generic # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_rsub.py b/tests/Python/test_rsub.py index fc945970c9..99843af0e7 100644 --- a/tests/Python/test_rsub.py +++ b/tests/Python/test_rsub.py @@ -6,28 +6,32 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): - return y-x + return torch.ops.aten.rsub(x, y) in1 = torch.ones([13, 13], dtype=torch.float32) -in2 = torch.ones([13, 13], dtype=torch.float32) +in2 = 2 # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.sub +# CHECK: %{{.*}} = arith.constant +# CHECK: %{{.*}} = tensor.empty() +# CHECK: %{{.*}} = linalg.generic # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_select.py b/tests/Python/test_select.py index d94bd296a1..c54420a117 100644 --- a/tests/Python/test_select.py +++ b/tests/Python/test_select.py @@ -22,8 +22,11 @@ def foo(x, dim, index): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, dim, index) +graphs = dynamo_compiler.importer(foo, x, dim, index) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -32,4 +35,3 @@ def foo(x, dim, index): # CHECK: return %{{.*}} : tensor<3x2xf32> # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_sigmoid.py b/tests/Python/test_sigmoid.py new file mode 100644 index 0000000000..43f03cc11f --- /dev/null +++ b/tests/Python/test_sigmoid.py @@ -0,0 +1,35 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + + +class foo(torch.nn.Module): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + def forward(self, a): + return torch.sigmoid(a) + + +model = foo() +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) +in1 = torch.randn((1, 3, 640, 480), device="cpu") +graphs = dynamo_compiler.importer(model, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = tosa.sigmoid +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_silu.py b/tests/Python/test_silu.py index dcd919ca53..2aa5047765 100644 --- a/tests/Python/test_silu.py +++ b/tests/Python/test_silu.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,12 +16,15 @@ def foo(x): in1 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_slice.py b/tests/Python/test_slice.py index 61a8658e1b..acc0acaa25 100644 --- a/tests/Python/test_slice.py +++ b/tests/Python/test_slice.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, dim, start_idx, end_idx): @@ -19,12 +19,15 @@ def foo(x, dim, start_idx, end_idx): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, dim, start_idx, end_idx) +graphs = dynamo_compiler.importer(foo, x, dim, start_idx, end_idx) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -32,4 +35,3 @@ def foo(x, dim, start_idx, end_idx): # CHECK: return %{{.*}} : tensor<3x2x2xf32> # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_softmax.py b/tests/Python/test_softmax.py index d5e656de76..eca5b2c600 100644 --- a/tests/Python/test_softmax.py +++ b/tests/Python/test_softmax.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,26 +16,22 @@ def foo(x): in1 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tensor.empty +# CHECK: %{{.*}} = arith.constant # CHECK: %{{.*}} = linalg.generic -# CHECK: %{{.*}} = linalg.generic -# CHECK: %{{.*}} = tensor.empty -# CHECK: %{{.*}} = linalg.generic -# CHECK: %{{.*}} = tensor.empty -# CHECK: %{{.*}} = linalg.generic -# CHECK: %{{.*}} = linalg.generic -# CHECK: %{{.*}} = tensor.empty +# CHECK: %{{.*}} = tensor.empty() # CHECK: %{{.*}} = linalg.generic # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_sqrt.py b/tests/Python/test_sqrt.py new file mode 100644 index 0000000000..b929d11075 --- /dev/null +++ b/tests/Python/test_sqrt.py @@ -0,0 +1,36 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import math + + +def foo(x): + return torch.ops.aten.sqrt(x) + + +x = torch.randn(10, 3, 6) + +# Initialize the dynamo compiler. +dynamo_compiler = DynamoCompiler( + primary_registry=math.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +foo_mlir = torch.compile(foo, backend=dynamo_compiler) +assert torch.allclose(foo_mlir(x), foo(x), equal_nan=True) + +graphs = dynamo_compiler.importer(foo, x) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) + +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = math.sqrt +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: } diff --git a/tests/Python/test_squeeze.py b/tests/Python/test_squeeze.py index f394ca8d72..e6b1b5c00e 100644 --- a/tests/Python/test_squeeze.py +++ b/tests/Python/test_squeeze.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,12 +16,15 @@ def foo(x): in1 = torch.ones([1, 13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_sum.py b/tests/Python/test_sum.py index 713910f15c..e97f942095 100644 --- a/tests/Python/test_sum.py +++ b/tests/Python/test_sum.py @@ -22,8 +22,11 @@ def foo(x, dim): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, dim) +graphs = dynamo_compiler.importer(foo, x, dim) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -31,4 +34,3 @@ def foo(x, dim): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_t.py b/tests/Python/test_t.py index 835bb4c2f2..09d44facc2 100644 --- a/tests/Python/test_t.py +++ b/tests/Python/test_t.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,18 +16,20 @@ def foo(x): in1 = torch.ones([13, 13], dtype=torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = "tosa.const" -# CHECK: %{{.*}} = tosa.transpose +# CHECK: %{{.*}} = tensor.empty() +# CHECK: %{{.*}} = linalg.transpose # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_tanh.py b/tests/Python/test_tanh.py index b1875dfd51..b9ca6082cd 100644 --- a/tests/Python/test_tanh.py +++ b/tests/Python/test_tanh.py @@ -20,8 +20,11 @@ def foo(x): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x) +graphs = dynamo_compiler.importer(foo, x) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -29,4 +32,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_to_copy.py b/tests/Python/test_to_copy.py index 9632d9f5cb..0b6c2ad22a 100644 --- a/tests/Python/test_to_copy.py +++ b/tests/Python/test_to_copy.py @@ -6,7 +6,7 @@ from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x): @@ -16,12 +16,15 @@ def foo(x): in1 = torch.ones([13, 13], dtype=torch.bool) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=aot_autograd_decompositions, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1) +graphs = dynamo_compiler.importer(foo, in1) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x): # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_transpose.py b/tests/Python/test_transpose.py index d7e71be8e0..9769604f30 100644 --- a/tests/Python/test_transpose.py +++ b/tests/Python/test_transpose.py @@ -3,7 +3,6 @@ import torch import torch._dynamo as dynamo from torch._inductor.decomposition import decompositions as inductor_decomp -from torch._functorch.aot_autograd import aot_autograd_decompositions from buddy.compiler.frontend import DynamoCompiler from buddy.compiler.ops import tosa @@ -19,17 +18,19 @@ def foo(x, y, z): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( primary_registry=tosa.ops_registry, - aot_autograd_decomposition=aot_autograd_decompositions, + aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2, in3) +graphs = dynamo_compiler.importer(foo, in1, in2, in3) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = "tosa.const"() # CHECK: %{{.*}} = tosa.transpose # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_unsqueeze.py b/tests/Python/test_unsqueeze.py index 577354b9f5..5cb4ee5527 100644 --- a/tests/Python/test_unsqueeze.py +++ b/tests/Python/test_unsqueeze.py @@ -5,7 +5,7 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, dim): @@ -17,12 +17,15 @@ def foo(x, dim): # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x, dim) +graphs = dynamo_compiler.importer(foo, x, dim) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -30,4 +33,3 @@ def foo(x, dim): # CHECK: return %{{.*}} : tensor<1x10xf32> # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_var_mean.py b/tests/Python/test_var_mean.py index eb7f254e47..eae1c99839 100644 --- a/tests/Python/test_var_mean.py +++ b/tests/Python/test_var_mean.py @@ -24,8 +24,11 @@ def foo_keepdim(x): aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(x) +graphs = dynamo_compiler.importer(foo, x) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward @@ -44,10 +47,33 @@ def foo_keepdim(x): # CHECK: return %{{.*}} : tensor, tensor # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) -foo_keepdim_mlir = dynamo.optimize(dynamo_compiler)(foo_keepdim) -foo_keepdim_mlir(x) +graphs = dynamo_compiler.importer(foo_keepdim, x) +assert len(graphs) == 2 +graphs[0].lower_to_top_level_ir() +print(graphs[0]._imported_module) + +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = tosa.reduce_sum +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.reciprocal +# CHECK: %{{.*}} = tosa.mul +# CHECK: %{{.*}} = tosa.sub +# CHECK: %{{.*}} = tosa.mul +# CHECK: %{{.*}} = tosa.reduce_sum +# CHECK: %{{.*}} = "tosa.const" +# CHECK: %{{.*}} = tosa.reciprocal +# CHECK: %{{.*}} = tosa.mul +# CHECK: %{{.*}} = tosa.reshape +# CHECK: %{{.*}} = tosa.reshape +# CHECK: return %{{.*}} : tensor, tensor +# CHECK: } +# CHECK: } + +graphs[1].lower_to_top_level_ir() +print(graphs[1]._imported_module) + # CHECK: module { # CHECK-LABEL: func.func @forward # CHECK: %{{.*}} = tosa.reduce_sum @@ -63,4 +89,3 @@ def foo_keepdim(x): # CHECK: return %{{.*}} : tensor<1x1x1xf32>, tensor<1x1x1xf32> # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_view.py b/tests/Python/test_view.py index 44db4609d7..31eacddc77 100644 --- a/tests/Python/test_view.py +++ b/tests/Python/test_view.py @@ -5,29 +5,31 @@ from torch._inductor.decomposition import decompositions as inductor_decomp from buddy.compiler.frontend import DynamoCompiler -from buddy.compiler.ops import tosa +from buddy.compiler.ops import linalg def foo(x, y): - return x + y + return torch.ops.aten.view(x, y) in1 = torch.randn(10) -in2 = torch.randn(10) +in2 = (2, 5) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( - primary_registry=tosa.ops_registry, + primary_registry=linalg.ops_registry, aot_autograd_decomposition=inductor_decomp, ) -foo_mlir = dynamo.optimize(dynamo_compiler)(foo) -foo_mlir(in1, in2) +graphs = dynamo_compiler.importer(foo, in1, in2) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) # CHECK: module { # CHECK-LABEL: func.func @forward -# CHECK: %{{.*}} = tosa.add +# CHECK: %{{.*}} = tosa.reshape # CHECK: return %{{.*}} # CHECK: } # CHECK: } -print(dynamo_compiler.imported_module) diff --git a/tests/Python/test_where.py b/tests/Python/test_where.py new file mode 100644 index 0000000000..5266f00b74 --- /dev/null +++ b/tests/Python/test_where.py @@ -0,0 +1,38 @@ +# RUN: %PYTHON %s 2>&1 | FileCheck %s + +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp +from torch._functorch.aot_autograd import aot_autograd_decompositions + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import linalg + + +def foo(x, y, z): + return torch.where(x, y, z) + + +in1 = torch.ones([13, 13], dtype=torch.bool) +in2 = 0 +in3 = torch.ones([13, 13], dtype=torch.float32) +# Initialize the dynamo compiler. +dynamo_compiler = DynamoCompiler( + primary_registry=linalg.ops_registry, + aot_autograd_decomposition=aot_autograd_decompositions, +) + +graphs = dynamo_compiler.importer(foo, in1, in2, in3) +assert len(graphs) == 1 +graph = graphs[0] +graph.lower_to_top_level_ir() +print(graph._imported_module) + +# CHECK: module { +# CHECK-LABEL: func.func @forward +# CHECK: %{{.*}} = arith.constant +# CHECK: %{{.*}} = tensor.empty +# CHECK: %{{.*}} = linalg.generic +# CHECK: return %{{.*}} +# CHECK: } +# CHECK: }