Skip to content

Commit

Permalink
Add support for conv1D (fixes w2l) (#5)
Browse files Browse the repository at this point in the history
* Fix mps_executor_runner build when using cmake

* Add CI scripts to run supported executorch networks through MPS (#1)

* Add CI scripts to run supported executorch networks through MPS

* Fix CI

* Fix CI #2

* Don't specialize the executable for the current device (#3)

Co-authored-by: Denis Vieriu <104024078+DenisVieriu97@users.noreply.github.com>

* Update CI script to run test_mps (#4)

* Update CI script to run test_mps

* Update cmdline

* Add lint for mps

* Update lint script

* Update lint script

* Fix lint

* Fix lint

* Fix lint

* Fix lint

* Fix lint

* Fix lint

* Add support for conv1D (fixes w2l)

* Perf imprv - Map conv2D to depthwiseConv3D

* Add support for PyTorch style printing of output tensors

* Fix lint

* Remove unused headers

* Remove unused headers #2

---------

Co-authored-by: Grzegorz George Pawelczak <grzpawelczak@gmail.com>
  • Loading branch information
DenisVieriu97 and georgepaw committed Jan 19, 2024
1 parent bee46d9 commit e1c740d
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 47 deletions.
10 changes: 3 additions & 7 deletions backends/apple/mps/mps_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,17 +263,13 @@ def preprocess( # noqa: C901
from typing import cast

input_node = cast(torch.fx.Node, node.args[0]).meta["val"]
sizes = input_node.size()
dim0 = sizes[0]
dim1 = sizes[1]
weight_node = cast(torch.fx.Node, node.args[1]).meta["val"]
groups = int(node.args[8])
group_in_channels = dim1
group_out_channels = int(dim0 / groups)

# Convolution is depthwise if groups = input channels and output channel
# is a positive multiple of input channels
is_depthwise_conv = (group_in_channels == 1) and (
group_out_channels % group_in_channels == 0
is_depthwise_conv = (groups > 1 and weight_node.size(1) == 1) and (
input_node.dim() >= 4 and weight_node.dim() >= 4
)

if node.args[2] is None:
Expand Down
95 changes: 68 additions & 27 deletions backends/apple/mps/operations/ConvolutionOps.mm
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,81 @@
using namespace torch;

PyMPSGraphTensor*
MPSGraphModule::conv2D(MPSGraphTensor* primaryTensor, MPSGraphTensor* secondaryTensor,
MPSGraphTensor* biasTensor, IntArrayRef stride,
IntArrayRef padding, IntArrayRef dilation, bool transpose,
IntArrayRef outputPadding, int64_t groups, bool is_depthwise) {
MPSGraphModule::conv2D(
MPSGraphTensor* primaryTensor,
MPSGraphTensor* secondaryTensor,
MPSGraphTensor* biasTensor,
IntArrayRef stride,
IntArrayRef padding,
IntArrayRef dilation,
bool transpose,
IntArrayRef outputPadding,
int64_t groups,
bool is_depthwise) {
TORCH_CHECK([primaryTensor.shape count] < 5, "ConvTranspose 3D is not supported on MPS delegate");
TORCH_CHECK([primaryTensor dataType] == MPSDataTypeFloat32 || [primaryTensor dataType] == MPSDataTypeFloat16, "ConvTranspose 3D is not supported on MPS delegate");

// Handle 1D convolution.
bool isConv1D = ([secondaryTensor.shape count] == 3);
if (isConv1D) {
primaryTensor = [mpsGraph expandDimsOfTensor:primaryTensor
axis:2
name:@"unsqueezeInput"];
secondaryTensor = [mpsGraph expandDimsOfTensor:secondaryTensor
axis:2
name:@"unsqueezeWeight"];
if (stride.size() == 1) {
stride = IntArrayRef{1, stride[0]};
padding = IntArrayRef{0, padding[0]};
dilation = IntArrayRef{1, dilation[0]};
outputPadding = IntArrayRef{0, outputPadding[0]};
}
}

if(is_depthwise){
MPSGraphDepthwiseConvolution2DOpDescriptor* desc = [MPSGraphDepthwiseConvolution2DOpDescriptor
descriptorWithStrideInX:stride[0]
strideInY:stride[1]
dilationRateInX:dilation[0]
dilationRateInY:dilation[1]
paddingLeft:padding[1]
paddingRight:padding[1]
paddingTop:padding[0]
paddingBottom:padding[0]
paddingStyle:MPSGraphPaddingStyleExplicit
dataLayout:MPSGraphTensorNamedDataLayoutNCHW
weightsLayout:MPSGraphTensorNamedDataLayoutOIHW];
MPSGraphDepthwiseConvolution3DOpDescriptor* depthWiseConv3dDescriptor =
[[MPSGraphDepthwiseConvolution3DOpDescriptor new] autorelease];
depthWiseConv3dDescriptor.strides =
@[ @1, [[NSNumber alloc] initWithInteger:stride[0]], [[NSNumber alloc] initWithInteger:stride[1]] ];
depthWiseConv3dDescriptor.dilationRates =
@[ @1, [[NSNumber alloc] initWithInteger:dilation[0]], [[NSNumber alloc] initWithInteger:dilation[1]] ];

MPSGraphTensor* depthwiseConv2DTensor = [mpsGraph depthwiseConvolution2DWithSourceTensor:primaryTensor
weightsTensor:secondaryTensor
descriptor:desc
name:@"depthwiseConv2D"];
depthWiseConv3dDescriptor.paddingStyle = MPSGraphPaddingStyleExplicit;
depthWiseConv3dDescriptor.paddingValues = @[
@0,
@0,
[[NSNumber alloc] initWithInteger:padding[0]],
[[NSNumber alloc] initWithInteger:padding[0]],
[[NSNumber alloc] initWithInteger:padding[1]],
[[NSNumber alloc] initWithInteger:padding[1]]
];
depthWiseConv3dDescriptor.channelDimensionIndex = -3LL;
MPSGraphTensor* weightTransposeTensor = [mpsGraph transposeTensor:secondaryTensor
dimension:-3
withDimension:-4
name:nil];
MPSGraphTensor* depthwiseConvTensor = [mpsGraph depthwiseConvolution3DWithSourceTensor:primaryTensor
weightsTensor:weightTransposeTensor
descriptor:depthWiseConv3dDescriptor
name:nil];
//Can be a nullptr
if(biasTensor){
//Need to add correct dimension to bias to avoid broadcasting issues
biasTensor = [mpsGraph expandDimsOfTensor:biasTensor
axes:@[@0, @2, @3]
name:nil];
depthwiseConv2DTensor = [mpsGraph additionWithPrimaryTensor:depthwiseConv2DTensor
depthwiseConvTensor = [mpsGraph additionWithPrimaryTensor:depthwiseConvTensor
secondaryTensor:biasTensor
name:@"depthwiseConv2DWithBiasAdd"];
}

return depthwiseConv2DTensor;
return depthwiseConvTensor;
} else {
MPSGraphConvolution2DOpDescriptor* desc = [MPSGraphConvolution2DOpDescriptor
descriptorWithStrideInX:stride[0]
strideInY:stride[1]
dilationRateInX:dilation[0]
dilationRateInY:dilation[1]
descriptorWithStrideInX:stride[1]
strideInY:stride[0]
dilationRateInX:dilation[1]
dilationRateInY:dilation[0]
groups:groups
paddingLeft:padding[1]
paddingRight:padding[1]
Expand All @@ -64,7 +98,7 @@
descriptor:desc
name:@"conv2D"];

//Can be a nullptr
// Can be a nullptr
if(biasTensor){
//Need to add correct dimension to bias to avoid broadcasting issues
biasTensor = [mpsGraph expandDimsOfTensor:biasTensor
Expand All @@ -74,6 +108,13 @@
secondaryTensor:biasTensor
name:@"conv2DWithBiasAdd"];
}

if (isConv1D) {
conv2DTensor = [mpsGraph squeezeTensor:conv2DTensor
axis:2
name:@"squeeze"];
}

return conv2DTensor;
}
}
Expand Down
19 changes: 6 additions & 13 deletions examples/apple/mps/executor_runner/mps_executor_runner.mm
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@
* It uses the original bundled input data from the flatbuffer file.
*/

#import <Foundation/Foundation.h>
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
#import <MetalPerformanceShadersGraph/MetalPerformanceShadersGraph.h>

#include <memory>
#include <iostream>

#include <gflags/gflags.h>

Expand All @@ -31,6 +28,7 @@
#include <executorch/extension/data_loader/buffer_data_loader.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/runtime.h>
#include <executorch/extension/evalue_util/print_evalue.h>

#include <chrono>
using namespace std::chrono;
Expand Down Expand Up @@ -440,15 +438,10 @@ MemoryManager memory_manager(
std::vector<EValue> outputs(method->outputs_size());
status = method->get_outputs(outputs.data(), outputs.size());
ET_CHECK(status == Error::Ok);
for (EValue& output : outputs) {
// TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
// support for other EValues and Tensor dtypes, and print tensors in a more
// readable way.
auto output_tensor = output.toTensor();
auto data_output = output_tensor.const_data_ptr<float>();
for (size_t j = 0; j < output_tensor.numel(); ++j) {
ET_LOG(Info, "%f", data_output[j]);
}
// Print the first and last 100 elements of long lists of scalars.
std::cout << torch::executor::util::evalue_edge_items(100);
for (int i = 0; i < outputs.size(); ++i) {
std::cout << "Output " << i << ": " << outputs[i] << std::endl;
}

// Dump the profiling data to the specified file.
Expand Down
1 change: 1 addition & 0 deletions examples/apple/mps/executor_runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def define_common_targets():
deps = [
"//executorch/backends/apple/mps:mps",
"//executorch/runtime/executor:program",
"//executorch/extension/evalue_util:print_evalue",
"//executorch/extension/data_loader:file_data_loader",
"//executorch/kernels/portable:generated_lib_all_ops",
"//executorch/extension/data_loader:file_data_loader",
Expand Down

0 comments on commit e1c740d

Please sign in to comment.