diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td index 8a8a98853b1f..dce6018e1a7e 100644 --- a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td +++ b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td @@ -4595,6 +4595,34 @@ def Torch_AtenFakeQuantizePerTensorAffineOp : Torch_Op<"aten.fake_quantize_per_t }]; } +def Torch_AtenFakeQuantizePerTensorAffineCachemaskOp : Torch_Op<"aten.fake_quantize_per_tensor_affine_cachemask", [ + AllowsTypeRefinement, + HasValueSemantics, + ReadOnly + ]> { + let summary = "Generated op for `aten::fake_quantize_per_tensor_affine_cachemask : (Tensor, float, int, int, int) -> (Tensor, Tensor)`"; + let arguments = (ins + AnyTorchTensorType:$self, + Torch_FloatType:$scale, + Torch_IntType:$zero_point, + Torch_IntType:$quant_min, + Torch_IntType:$quant_max + ); + let results = (outs + AnyTorchOptionalTensorType:$output, + AnyTorchOptionalTensorType:$mask + ); + let hasCustomAssemblyFormat = 1; + let extraClassDefinition = [{ + ParseResult AtenFakeQuantizePerTensorAffineCachemaskOp::parse(OpAsmParser &parser, OperationState &result) { + return parseDefaultTorchOp(parser, result, 5, 2); + } + void AtenFakeQuantizePerTensorAffineCachemaskOp::print(OpAsmPrinter &printer) { + printDefaultTorchOp(printer, *this, 5, 2); + } + }]; +} + def Torch_AtenMaximumOp : Torch_Op<"aten.maximum", [ AllowsTypeRefinement, HasValueSemantics, diff --git a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp index 7b4cf9c8fa39..408709816cb8 100644 --- a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp +++ b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp @@ -6328,6 +6328,12 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() { " %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list) -> !torch.list\n" " return %0 : !torch.list\n" " }\n" +" func.func @\"__torch_mlir_shape_fn.aten.fake_quantize_per_tensor_affine_cachemask\"(%arg0: !torch.list, %arg1: !torch.float, %arg2: !torch.int, %arg3: !torch.int, %arg4: !torch.int) -> !torch.tuple, list> {\n" +" %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list) -> !torch.list\n" +" %1 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list) -> !torch.list\n" +" %2 = torch.prim.TupleConstruct %0, %1 : !torch.list, !torch.list -> !torch.tuple, list>\n" +" return %2 : !torch.tuple, list>\n" +" }\n" " func.func @\"__torch_mlir_shape_fn.aten.sin\"(%arg0: !torch.list) -> !torch.list {\n" " %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list) -> !torch.list\n" " return %0 : !torch.list\n" @@ -10189,6 +10195,31 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() { " %0 = torch.prim.ListConstruct %int5, %int15, %int6, %int7 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list\n" " return %0 : !torch.list\n" " }\n" +" func.func @\"__torch_mlir_dtype_fn.aten.fake_quantize_per_tensor_affine_cachemask\"(%arg0: !torch.tuple, %arg1: !torch.float, %arg2: !torch.int, %arg3: !torch.int, %arg4: !torch.int) -> !torch.tuple {\n" +" %int11 = torch.constant.int 11\n" +" %int15 = torch.constant.int 15\n" +" %none = torch.constant.none\n" +" %str = torch.constant.str \"AssertionError: \"\n" +" %int1 = torch.constant.int 1\n" +" %0:2 = torch.prim.TupleUnpack %arg0 : !torch.tuple -> !torch.int, !torch.int\n" +" %1 = call @__torch__.torch_mlir.jit_ir_importer.build_tools.library_generator.is_float_dtype(%0#1) : (!torch.int) -> !torch.bool\n" +" torch.prim.If %1 -> () {\n" +" torch.prim.If.yield\n" +" } else {\n" +" torch.prim.RaiseException %str, %none : !torch.str, !torch.none\n" +" torch.prim.If.yield\n" +" }\n" +" %2 = torch.aten.ne.int %0#1, %int15 : !torch.int, !torch.int -> !torch.bool\n" +" torch.prim.If %2 -> () {\n" +" torch.prim.If.yield\n" +" } else {\n" +" torch.prim.RaiseException %str, %none : !torch.str, !torch.none\n" +" torch.prim.If.yield\n" +" }\n" +" %3 = torch.prim.TupleIndex %arg0, %int1 : !torch.tuple, !torch.int -> !torch.int\n" +" %4 = torch.prim.TupleConstruct %3, %int11 : !torch.int, !torch.int -> !torch.tuple\n" +" return %4 : !torch.tuple\n" +" }\n" " func.func @\"__torch_mlir_dtype_fn.aten.cosh\"(%arg0: !torch.tuple) -> !torch.int {\n" " %0:2 = torch.prim.TupleUnpack %arg0 : !torch.tuple -> !torch.int, !torch.int\n" " %1 = call @__torch__._get_dtype_of_floating_point_op(%0#1) : (!torch.int) -> !torch.int\n" diff --git a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp index 0c15841603b2..a72e583fa9fa 100644 --- a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp +++ b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp @@ -8146,6 +8146,31 @@ class DecomposeAtenFakeQuantizePerTensorAffineOp }; } // namespace +namespace { +// Decompose aten.fake_quantize_per_tensor_affine_cachemask +// into aten.fake_quantize_per_tensor_affine +// when the second result is unused. +class DecomposeAtenFakeQuantizePerTensorAffineCachemaskOp + : public OpRewritePattern { +public: + using OpRewritePattern< + AtenFakeQuantizePerTensorAffineCachemaskOp>::OpRewritePattern; + LogicalResult matchAndRewrite(AtenFakeQuantizePerTensorAffineCachemaskOp op, + PatternRewriter &rewriter) const override { + if (!op->getResult(1).use_empty()) + return failure(); + + auto newOp = rewriter.create( + op.getLoc(), op->getResult(0).getType(), op.getSelf(), op.getScale(), + op.getZeroPoint(), op.getQuantMin(), op.getQuantMax()); + + rewriter.replaceAllUsesWith(op->getResult(0), newOp); + rewriter.eraseOp(op); + return success(); + } +}; +} // namespace + namespace { class DecomposeComplexOpsPass : public DecomposeComplexOpsBase { @@ -8375,6 +8400,8 @@ class DecomposeComplexOpsPass addPatternIfTargetOpIsIllegal(patterns); addPatternIfTargetOpIsIllegal(patterns); addPatternIfTargetOpIsIllegal(patterns); + addPatternIfTargetOpIsIllegal< + DecomposeAtenFakeQuantizePerTensorAffineCachemaskOp>(patterns); // More specific conv ops addPatternIfTargetOpIsIllegal(patterns); addPatternIfTargetOpIsIllegal(patterns); diff --git a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp index fc56700f22eb..301cb8e809d7 100644 --- a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp +++ b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp @@ -460,6 +460,7 @@ static void markDecomposedOpsAsIllegal(MLIRContext *context, target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py index ce5e1c5a7442..8617f1d79534 100644 --- a/projects/pt1/e2e_testing/xfail_sets.py +++ b/projects/pt1/e2e_testing/xfail_sets.py @@ -396,6 +396,7 @@ "ElementwiseRreluTrainStaticModule_basic", "ElementwiseToDtypeI64ToUI8Module_basic", "EqIntModule_basic", + "FakeQuantizePerTensorAffineCachemaskModule_basic", "FakeQuantizePerTensorAffineDynamicShapeModule_basic", "FakeQuantizePerTensorAffineModule_basic", "FakeQuantizePerTensorAffineRoundToEvenModule_basic", @@ -1055,6 +1056,7 @@ "EmptyStridedModule_basic", "EqIntModule_basic", "ExpandAsIntModule_basic", + "FakeQuantizePerTensorAffineCachemaskModule_basic", "FakeQuantizePerTensorAffineModule_basic", "FakeQuantizePerTensorAffineRoundToEvenModule_basic", "Fill_TensorFloat64WithFloat32Static_basic", @@ -2400,6 +2402,7 @@ "EmptyStridedSizeIntStrideModule_basic", "EqIntModule_basic", "ExponentialModule_basic", + "FakeQuantizePerTensorAffineCachemaskModule_basic", "FloatImplicitModule_basic", "GeFloatIntModule_basic", "GeFloatModule_basic", diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py index d69f8dbc874e..8920de787d5e 100644 --- a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py +++ b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py @@ -118,6 +118,9 @@ def aten〇diagonal〡shape(self: List[int], offset: int = 0, dim1: int = 0, dim def aten〇fake_quantize_per_tensor_affine〡shape(self: List[int], scale: float, zero_point: int, quant_min: int, quant_max: int) -> List[int]: return upstream_shape_functions.unary(self) +def aten〇fake_quantize_per_tensor_affine_cachemask〡shape(self: List[int], scale: float, zero_point: int, quant_min: int, quant_max: int) -> Tuple[List[int], List[int]]: + return (upstream_shape_functions.unary(self), upstream_shape_functions.unary(self)) + def aten〇sin〡shape(self: List[int]) -> List[int]: return upstream_shape_functions.unary(self) @@ -2162,6 +2165,14 @@ def aten〇fake_quantize_per_tensor_affine〡dtype(self_rank_dtype: Tuple[int, i assert self_dtype != torch.bfloat16 return self_dtype +# note: fake_quantize_per_tensor_affine doesn't support "meta" device, use "cpu" instead. +@check_dtype_function(_check_tensors_with_the_same_dtype(num_of_tensors=1, tensor_device="cpu", scale=0.1, zero_point=0, quant_min=0, quant_max=255, error_types={torch.complex128, torch.complex64, torch.bfloat16, torch.int64, torch.int32, torch.int16, torch.int8, torch.uint8, torch.bool})) +def aten〇fake_quantize_per_tensor_affine_cachemask〡dtype(self_rank_dtype: Tuple[int, int], scale: float, zero_point: int, quant_min: int, quant_max: int) -> Tuple[int, int]: + self_rank, self_dtype = self_rank_dtype + assert is_float_dtype(self_dtype) + assert self_dtype != torch.bfloat16 + return (self_rank_dtype[1], torch.bool) + @check_dtype_function(_check_tensors_with_the_same_dtype(num_of_tensors=1)) def aten〇cosh〡dtype(self_rank_dtype: Tuple[int, int]) -> int: self_rank, self_dtype = self_rank_dtype diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py index 4c574f8ba741..1ad3b09ee701 100644 --- a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py +++ b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py @@ -458,6 +458,9 @@ def emit_with_mutating_variants(key, **kwargs): emit( "aten::fake_quantize_per_tensor_affine : (Tensor, float, int, int, int) -> (Tensor)" ) + emit( + "aten::fake_quantize_per_tensor_affine_cachemask : (Tensor, float, int, int, int) -> (Tensor, Tensor)" + ) emit("aten::maximum : (Tensor, Tensor) -> (Tensor)") emit("aten::minimum : (Tensor, Tensor) -> (Tensor)") emit("aten::mish : (Tensor) -> (Tensor)") diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/quantized_models.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/quantized_models.py index 5114f78d5ca7..3c9c3073525b 100644 --- a/projects/pt1/python/torch_mlir_e2e_test/test_suite/quantized_models.py +++ b/projects/pt1/python/torch_mlir_e2e_test/test_suite/quantized_models.py @@ -181,3 +181,28 @@ def get_quantized_mlp(): @register_test_case(module_factory=get_quantized_mlp) def QuantizedMLP_basic(module, tu: TestUtils): module.forward(get_quant_model_input()) + + +# ============================================================================== + + +class FakeQuantizePerTensorAffineCachemaskModule(torch.nn.Module): + def __init__(self): + super().__init__() + + @export + @annotate_args( + [ + None, + ([6, 4], torch.float32, True), + ] + ) + def forward(self, a): + return torch.ops.aten.fake_quantize_per_tensor_affine_cachemask( + a, 2.0, 0, -128, 127 + )[0] + + +@register_test_case(module_factory=lambda: FakeQuantizePerTensorAffineCachemaskModule()) +def FakeQuantizePerTensorAffineCachemaskModule_basic(module, tu: TestUtils): + module.forward(tu.rand(6, 4))