diff --git a/backends/apple/mps/mps_preprocess.py b/backends/apple/mps/mps_preprocess.py index c8887cee4c..ce669d80e9 100644 --- a/backends/apple/mps/mps_preprocess.py +++ b/backends/apple/mps/mps_preprocess.py @@ -263,17 +263,13 @@ def preprocess( # noqa: C901 from typing import cast input_node = cast(torch.fx.Node, node.args[0]).meta["val"] - sizes = input_node.size() - dim0 = sizes[0] - dim1 = sizes[1] + weight_node = cast(torch.fx.Node, node.args[1]).meta["val"] groups = int(node.args[8]) - group_in_channels = dim1 - group_out_channels = int(dim0 / groups) # Convolution is depthwise if groups = input channels and output channel # is a positive multiple of input channels - is_depthwise_conv = (group_in_channels == 1) and ( - group_out_channels % group_in_channels == 0 + is_depthwise_conv = (groups > 1 and weight_node.size(1) == 1) and ( + input_node.dim() >= 4 and weight_node.dim() >= 4 ) if node.args[2] is None: diff --git a/backends/apple/mps/operations/ConvolutionOps.mm b/backends/apple/mps/operations/ConvolutionOps.mm index 029e91b404..df0f7a0c11 100644 --- a/backends/apple/mps/operations/ConvolutionOps.mm +++ b/backends/apple/mps/operations/ConvolutionOps.mm @@ -9,47 +9,81 @@ using namespace torch; PyMPSGraphTensor* -MPSGraphModule::conv2D(MPSGraphTensor* primaryTensor, MPSGraphTensor* secondaryTensor, - MPSGraphTensor* biasTensor, IntArrayRef stride, - IntArrayRef padding, IntArrayRef dilation, bool transpose, - IntArrayRef outputPadding, int64_t groups, bool is_depthwise) { +MPSGraphModule::conv2D( + MPSGraphTensor* primaryTensor, + MPSGraphTensor* secondaryTensor, + MPSGraphTensor* biasTensor, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + bool transpose, + IntArrayRef outputPadding, + int64_t groups, + bool is_depthwise) { + TORCH_CHECK([primaryTensor.shape count] < 5, "ConvTranspose 3D is not supported on MPS delegate"); + TORCH_CHECK([primaryTensor dataType] == MPSDataTypeFloat32 || [primaryTensor dataType] == MPSDataTypeFloat16, "ConvTranspose 3D is not supported on MPS delegate"); + + // Handle 1D convolution. + bool isConv1D = ([secondaryTensor.shape count] == 3); + if (isConv1D) { + primaryTensor = [mpsGraph expandDimsOfTensor:primaryTensor + axis:2 + name:@"unsqueezeInput"]; + secondaryTensor = [mpsGraph expandDimsOfTensor:secondaryTensor + axis:2 + name:@"unsqueezeWeight"]; + if (stride.size() == 1) { + stride = IntArrayRef{1, stride[0]}; + padding = IntArrayRef{0, padding[0]}; + dilation = IntArrayRef{1, dilation[0]}; + outputPadding = IntArrayRef{0, outputPadding[0]}; + } + } if(is_depthwise){ - MPSGraphDepthwiseConvolution2DOpDescriptor* desc = [MPSGraphDepthwiseConvolution2DOpDescriptor - descriptorWithStrideInX:stride[0] - strideInY:stride[1] - dilationRateInX:dilation[0] - dilationRateInY:dilation[1] - paddingLeft:padding[1] - paddingRight:padding[1] - paddingTop:padding[0] - paddingBottom:padding[0] - paddingStyle:MPSGraphPaddingStyleExplicit - dataLayout:MPSGraphTensorNamedDataLayoutNCHW - weightsLayout:MPSGraphTensorNamedDataLayoutOIHW]; + MPSGraphDepthwiseConvolution3DOpDescriptor* depthWiseConv3dDescriptor = + [[MPSGraphDepthwiseConvolution3DOpDescriptor new] autorelease]; + depthWiseConv3dDescriptor.strides = + @[ @1, [[NSNumber alloc] initWithInteger:stride[0]], [[NSNumber alloc] initWithInteger:stride[1]] ]; + depthWiseConv3dDescriptor.dilationRates = + @[ @1, [[NSNumber alloc] initWithInteger:dilation[0]], [[NSNumber alloc] initWithInteger:dilation[1]] ]; - MPSGraphTensor* depthwiseConv2DTensor = [mpsGraph depthwiseConvolution2DWithSourceTensor:primaryTensor - weightsTensor:secondaryTensor - descriptor:desc - name:@"depthwiseConv2D"]; + depthWiseConv3dDescriptor.paddingStyle = MPSGraphPaddingStyleExplicit; + depthWiseConv3dDescriptor.paddingValues = @[ + @0, + @0, + [[NSNumber alloc] initWithInteger:padding[0]], + [[NSNumber alloc] initWithInteger:padding[0]], + [[NSNumber alloc] initWithInteger:padding[1]], + [[NSNumber alloc] initWithInteger:padding[1]] + ]; + depthWiseConv3dDescriptor.channelDimensionIndex = -3LL; + MPSGraphTensor* weightTransposeTensor = [mpsGraph transposeTensor:secondaryTensor + dimension:-3 + withDimension:-4 + name:nil]; + MPSGraphTensor* depthwiseConvTensor = [mpsGraph depthwiseConvolution3DWithSourceTensor:primaryTensor + weightsTensor:weightTransposeTensor + descriptor:depthWiseConv3dDescriptor + name:nil]; //Can be a nullptr if(biasTensor){ //Need to add correct dimension to bias to avoid broadcasting issues biasTensor = [mpsGraph expandDimsOfTensor:biasTensor axes:@[@0, @2, @3] name:nil]; - depthwiseConv2DTensor = [mpsGraph additionWithPrimaryTensor:depthwiseConv2DTensor + depthwiseConvTensor = [mpsGraph additionWithPrimaryTensor:depthwiseConvTensor secondaryTensor:biasTensor name:@"depthwiseConv2DWithBiasAdd"]; } - return depthwiseConv2DTensor; + return depthwiseConvTensor; } else { MPSGraphConvolution2DOpDescriptor* desc = [MPSGraphConvolution2DOpDescriptor - descriptorWithStrideInX:stride[0] - strideInY:stride[1] - dilationRateInX:dilation[0] - dilationRateInY:dilation[1] + descriptorWithStrideInX:stride[1] + strideInY:stride[0] + dilationRateInX:dilation[1] + dilationRateInY:dilation[0] groups:groups paddingLeft:padding[1] paddingRight:padding[1] @@ -64,7 +98,7 @@ descriptor:desc name:@"conv2D"]; - //Can be a nullptr + // Can be a nullptr if(biasTensor){ //Need to add correct dimension to bias to avoid broadcasting issues biasTensor = [mpsGraph expandDimsOfTensor:biasTensor @@ -74,6 +108,13 @@ secondaryTensor:biasTensor name:@"conv2DWithBiasAdd"]; } + + if (isConv1D) { + conv2DTensor = [mpsGraph squeezeTensor:conv2DTensor + axis:2 + name:@"squeeze"]; + } + return conv2DTensor; } } diff --git a/examples/apple/mps/executor_runner/mps_executor_runner.mm b/examples/apple/mps/executor_runner/mps_executor_runner.mm index 379bd42282..666a24c47a 100644 --- a/examples/apple/mps/executor_runner/mps_executor_runner.mm +++ b/examples/apple/mps/executor_runner/mps_executor_runner.mm @@ -12,11 +12,8 @@ * It uses the original bundled input data from the flatbuffer file. */ -#import -#import -#import - #include +#include #include @@ -31,6 +28,7 @@ #include #include #include +#include #include using namespace std::chrono; @@ -440,15 +438,10 @@ MemoryManager memory_manager( std::vector outputs(method->outputs_size()); status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); - for (EValue& output : outputs) { - // TODO(T159700776): This assumes that all outputs are fp32 tensors. Add - // support for other EValues and Tensor dtypes, and print tensors in a more - // readable way. - auto output_tensor = output.toTensor(); - auto data_output = output_tensor.const_data_ptr(); - for (size_t j = 0; j < output_tensor.numel(); ++j) { - ET_LOG(Info, "%f", data_output[j]); - } + // Print the first and last 100 elements of long lists of scalars. + std::cout << torch::executor::util::evalue_edge_items(100); + for (int i = 0; i < outputs.size(); ++i) { + std::cout << "Output " << i << ": " << outputs[i] << std::endl; } // Dump the profiling data to the specified file. diff --git a/examples/apple/mps/executor_runner/targets.bzl b/examples/apple/mps/executor_runner/targets.bzl index 21ee3373f9..48c4dc3fcf 100644 --- a/examples/apple/mps/executor_runner/targets.bzl +++ b/examples/apple/mps/executor_runner/targets.bzl @@ -15,6 +15,7 @@ def define_common_targets(): deps = [ "//executorch/backends/apple/mps/runtime:MPSBackend", "//executorch/runtime/executor:program", + "//executorch/extension/evalue_util:print_evalue", "//executorch/extension/data_loader:file_data_loader", "//executorch/kernels/portable:generated_lib_all_ops", "//executorch/extension/data_loader:file_data_loader",