From 72256ab6730a7af20096872a5cca3e3fec55d34b Mon Sep 17 00:00:00 2001
From: Jan-Frederik Schulte <jschulte@cern.ch>
Date: Wed, 24 May 2023 16:07:26 -0400
Subject: [PATCH 01/10] move new pytorch parser to clean branch to get rid of
 messed up git history

---
 .../quartus/passes/pooling_templates.py       |   2 +
 .../vivado/passes/pooling_templates.py        |   2 +
 hls4ml/converters/__init__.py                 |  12 +-
 hls4ml/converters/pytorch/convolution.py      |  64 +-
 hls4ml/converters/pytorch/core.py             |  91 ++-
 hls4ml/converters/pytorch/function.py         |  40 ++
 hls4ml/converters/pytorch/pooling.py          | 135 ++++
 hls4ml/converters/pytorch/reshape.py          |  29 +
 hls4ml/converters/pytorch_to_hls.py           | 274 +++++++--
 hls4ml/converters/utils.py                    | 114 ++++
 hls4ml/model/optimizer/__init__.py            |   1 +
 .../passes/convert_to_channels_last.py        | 105 ++++
 .../firmware/nnet_utils/nnet_pooling.h        |   6 +
 .../vivado/nnet_utils/nnet_pooling.h          |   8 +
 hls4ml/utils/config.py                        |  15 +-
 test/pytest/test_pytorch_api.py               | 574 ++++++++++++++++++
 16 files changed, 1382 insertions(+), 90 deletions(-)
 create mode 100644 hls4ml/converters/pytorch/function.py
 create mode 100644 hls4ml/converters/pytorch/pooling.py
 create mode 100644 hls4ml/converters/pytorch/reshape.py
 create mode 100644 hls4ml/model/optimizer/passes/convert_to_channels_last.py
 create mode 100644 test/pytest/test_pytorch_api.py

diff --git a/hls4ml/backends/quartus/passes/pooling_templates.py b/hls4ml/backends/quartus/passes/pooling_templates.py
index d2ebf114e7..9a3ee41923 100644
--- a/hls4ml/backends/quartus/passes/pooling_templates.py
+++ b/hls4ml/backends/quartus/passes/pooling_templates.py
@@ -18,6 +18,7 @@
 
     static const unsigned pad_left = {pad_left};
     static const unsigned pad_right = {pad_right};
+    static const bool count_pad = {count_pad};
 
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     typedef {accum_t.name} accum_t;
@@ -44,6 +45,7 @@
     static const unsigned pad_bottom = {pad_bottom};
     static const unsigned pad_left = {pad_left};
     static const unsigned pad_right = {pad_right};
+    static const bool count_pad = {count_pad};
 
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     typedef {accum_t.name} accum_t;
diff --git a/hls4ml/backends/vivado/passes/pooling_templates.py b/hls4ml/backends/vivado/passes/pooling_templates.py
index 9e4736b71b..77205a5df7 100644
--- a/hls4ml/backends/vivado/passes/pooling_templates.py
+++ b/hls4ml/backends/vivado/passes/pooling_templates.py
@@ -14,6 +14,7 @@
 
     static const unsigned pad_left = {pad_left};
     static const unsigned pad_right = {pad_right};
+    static const bool count_pad = {count_pad};
     static const unsigned stride_width = {stride_width};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     static const nnet::conv_implementation implementation = nnet::conv_implementation::{implementation};
@@ -40,6 +41,7 @@
     static const unsigned pad_bottom = {pad_bottom};
     static const unsigned pad_left = {pad_left};
     static const unsigned pad_right = {pad_right};
+    static const bool count_pad = {count_pad};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     static const nnet::conv_implementation implementation = nnet::conv_implementation::{implementation};
     static const unsigned reuse_factor = {reuse};
diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
index 7ebc543b80..b965a159ef 100644
--- a/hls4ml/converters/__init__.py
+++ b/hls4ml/converters/__init__.py
@@ -247,7 +247,7 @@ def convert_from_pytorch_model(
 
     Args:
         model: PyTorch model to conert.
-        input_shape (list): The shape of the input tensor.
+        input_shape (list): The shape of the input tensor. First element is the batch size, needs to be None
         output_dir (str, optional): Output directory of the generated HLS project. Defaults to 'my-hls-test'.
         project_name (str, optional): Name of the HLS project. Defaults to 'myproject'.
         input_data_tb (str, optional): String representing the path of input data in .npy or .dat format that will be
@@ -270,6 +270,16 @@ def convert_from_pytorch_model(
     Raises:
         Exception: If precision and reuse factor are not present in 'hls_config'.
 
+    Notes:
+        Pytorch uses the "channels_first" data format for its tensors, while hls4ml expects the "channels_last" format
+        used by keras. By default, hls4ml will automatically add layers to the model which transpose the inputs to the
+        "channels_last"format. Not that this is not supported for the "io_stream" io_type, for which the user will have
+        to transpose the input by hand before passing it to hls4ml. In that case the "inputs_channel_last" argument of
+        the "config_from_pytorch_model" function needs to be set to True. By default, the output of the model remains
+        in the "channels_last" data format. The "transpose_outputs" argument of the "config_from_pytorch_model" can be
+        used to add a layer to the model that transposes back to "channels_first". As before, this will not work for
+        io_stream.
+
     Returns:
         ModelGraph: hls4ml model.
     """
diff --git a/hls4ml/converters/pytorch/convolution.py b/hls4ml/converters/pytorch/convolution.py
index 920686d0f0..c17de86e9d 100644
--- a/hls4ml/converters/pytorch/convolution.py
+++ b/hls4ml/converters/pytorch/convolution.py
@@ -1,10 +1,10 @@
-from hls4ml.converters.pytorch_to_hls import pytorch_handler
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d, parse_data_format
+from hls4ml.converters.pytorch_to_hls import get_weights_data, pytorch_handler
+from hls4ml.converters.utils import compute_padding_1d_pytorch, compute_padding_2d_pytorch, parse_data_format
 
 
 @pytorch_handler('Conv1d')
-def parse_conv1d_layer(pytorch_layer, layer_name, input_shapes, data_reader, config):
-    assert 'Conv1d' in pytorch_layer.__class__.__name__
+def parse_conv1d_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'Conv1d' in operation
 
     layer = {}
 
@@ -12,27 +12,35 @@ def parse_conv1d_layer(pytorch_layer, layer_name, input_shapes, data_reader, con
     layer['class_name'] = 'Conv1D'
     layer['data_format'] = 'channels_first'  # Pytorch default (can't change)
 
+    layer['weight_data'] = get_weights_data(data_reader, layer['name'], 'weight')
+    layer['bias_data'] = get_weights_data(data_reader, layer['name'], 'bias')
     # Input info
     (layer['in_width'], layer['n_chan']) = parse_data_format(
         input_shapes[0], 'channels_first'
     )  # Keras's default is channels_last
 
     # Additional parameters
-    layer['n_filt'] = pytorch_layer.out_channels
-    layer['filt_width'] = pytorch_layer.kernel_size[0]
-    layer['stride_width'] = pytorch_layer.stride[0]
-    layer['pad_left'] = layer['pad_right'] = pytorch_layer.padding[0]
-    layer['dilation'] = pytorch_layer.dilation[0]
+    layer['n_filt'] = class_object.out_channels
+    layer['filt_width'] = class_object.kernel_size[0]
+    layer['stride_width'] = class_object.stride[0]
+    layer['dilation'] = class_object.dilation[0]
 
-    if pytorch_layer.padding[0] == 0:  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
+    if type(class_object.padding) is tuple:
+        padding = class_object.padding[0]
+    else:
+        padding = class_object.padding
+
+    if padding == 0:  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
         layer['padding'] = 'valid'
     else:  # Only 'valid' and 'same' padding are available in Keras
         layer['padding'] = 'same'
 
     # Ouput info
-    (layer['out_width'], _, _) = compute_padding_1d(
-        layer['padding'], layer['in_width'], layer['stride_width'], layer['filt_width']
+    (layer['out_width'], pad_left, pad_right) = compute_padding_1d_pytorch(
+        padding, layer['in_width'], layer['stride_width'], layer['filt_width'], layer['dilation']
     )
+    layer['pad_left'] = pad_left
+    layer['pad_right'] = pad_right
 
     output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_width']]  # Channel first as default
 
@@ -40,8 +48,8 @@ def parse_conv1d_layer(pytorch_layer, layer_name, input_shapes, data_reader, con
 
 
 @pytorch_handler('Conv2d')
-def parse_conv2d_layer(pytorch_layer, layer_name, input_shapes, data_reader, config):
-    assert 'Conv2d' in pytorch_layer.__class__.__name__
+def parse_conv2d_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'Conv2d' in operation
 
     layer = {}
 
@@ -49,35 +57,39 @@ def parse_conv2d_layer(pytorch_layer, layer_name, input_shapes, data_reader, con
     layer['class_name'] = 'Conv2D'
     layer['data_format'] = 'channels_first'  # Pytorch default (can't change)
 
+    layer['weight_data'] = get_weights_data(data_reader, layer['name'], 'weight')
+    layer['bias_data'] = get_weights_data(data_reader, layer['name'], 'bias')
     # Input info
     (layer['in_height'], layer['in_width'], layer['n_chan']) = parse_data_format(
         input_shapes[0], 'channels_first'
     )  # Keras's default is channels_last
 
     # Additional parameters
-    layer['n_filt'] = pytorch_layer.out_channels
-    layer['filt_height'] = pytorch_layer.kernel_size[0]
-    layer['filt_width'] = pytorch_layer.kernel_size[1]
-    layer['stride_height'] = pytorch_layer.stride[0]
-    layer['stride_width'] = pytorch_layer.stride[1]
-    layer['dilation'] = pytorch_layer.dilation[0]
-    layer['pad_top'] = layer['pad_bottom'] = pytorch_layer.padding[0]
-    layer['pad_left'] = layer['pad_right'] = pytorch_layer.padding[1]
-
-    if all(x == 0 for x in pytorch_layer.padding):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
+    layer['n_filt'] = class_object.out_channels
+    layer['filt_height'] = class_object.kernel_size[0]
+    layer['filt_width'] = class_object.kernel_size[1]
+    layer['stride_height'] = class_object.stride[0]
+    layer['stride_width'] = class_object.stride[1]
+    layer['dilation'] = class_object.dilation[0]
+    layer['pad_top'] = layer['pad_bottom'] = class_object.padding[0]
+    layer['pad_left'] = layer['pad_right'] = class_object.padding[1]
+
+    if all(x == 0 for x in class_object.padding):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
         layer['padding'] = 'valid'
     else:  # Only 'valid' and 'same' padding are available in Keras
         layer['padding'] = 'same'
 
     # Ouput info
-    (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d(
-        layer['padding'],
+    (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d_pytorch(
+        class_object.padding,
         layer['in_height'],
         layer['in_width'],
         layer['stride_height'],
         layer['stride_width'],
         layer['filt_height'],
         layer['filt_width'],
+        class_object.dilation[0],
+        class_object.dilation[1],
     )
 
     output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
index fbf1e377e2..cc19464d3f 100644
--- a/hls4ml/converters/pytorch/core.py
+++ b/hls4ml/converters/pytorch/core.py
@@ -1,23 +1,24 @@
-from hls4ml.converters.pytorch_to_hls import pytorch_handler
+from hls4ml.converters.pytorch_to_hls import get_weights_data, pytorch_handler
 
 
-# TODO: propagate use_bias info properly
-# https://github.com/fastmachinelearning/hls4ml/issues/409
 @pytorch_handler('Linear')
-def parse_linear_layer(pytorch_layer, layer_name, input_shapes, data_reader, config):
-    assert 'Linear' in pytorch_layer.__class__.__name__
+def parse_linear_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'Linear' in operation
 
     layer = {}
 
     layer['class_name'] = 'Dense'
     layer['name'] = layer_name
 
-    layer['n_in'] = pytorch_layer.in_features
-    layer['n_out'] = pytorch_layer.out_features
+    layer['weight_data'], layer['bias_data'] = get_weights_data(data_reader, layer['name'], ['weight', 'bias'])
+    if class_object is not None:
+        layer['n_in'] = class_object.in_features
+        layer['n_out'] = class_object.out_features
+    else:
+        raise Exception('parsing of torch.nn.functional.linear not supported yet, please use torch.nn.Linear class')
 
     # Handling whether bias is used or not
-    assert pytorch_layer.bias is not None, "PyTorch Linear with bias=False not yet supported"
-    if pytorch_layer.bias is None:
+    if class_object.bias is None:
         layer['use_bias'] = False
     else:
         layer['use_bias'] = True
@@ -27,34 +28,63 @@ def parse_linear_layer(pytorch_layer, layer_name, input_shapes, data_reader, con
     return layer, output_shape
 
 
-# TODO: propagate parametrized activation parameters
-# https://github.com/fastmachinelearning/hls4ml/issues/409
-# activation_layers = ['LeakyReLU', 'ThresholdedReLU', 'ELU', 'PReLU', 'Softmax', 'ReLU']
-activation_layers = ['Softmax', 'ReLU']
+activation_layers = ['Softmax', 'ReLU', 'LeakyReLU', 'Threshold', 'ELU', 'PReLU', 'Sigmoid']
 
 
 @pytorch_handler(*activation_layers)
-def parse_activation_layer(pytorch_layer, layer_name, input_shapes, data_reader, config):
+def parse_activation_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
     layer = {}
 
-    layer['class_name'] = pytorch_layer.__class__.__name__
+    layer['class_name'] = operation
     layer['activation'] = layer['class_name']
     layer['name'] = layer_name
 
-    if layer['class_name'] == 'ReLU':
-        layer['class_name'] = 'Activation'
+    # if layer['class_name'] != 'Activation':
+    #    layer['activation'] = layer['class_name']
+    if node.op == "call_module":
+        if layer['class_name'] == 'ReLU' or layer['class_name'] == 'Sigmoid':
+            layer['class_name'] = 'Activation'
+        if layer['class_name'] == 'LeakyReLU':
+            layer['activ_param'] = class_object.negative_slope
+        if layer['class_name'] == 'ELU':
+            layer['activ_param'] = class_object.alpha
+        if layer['class_name'] == 'PReLU':
+            layer['alpha_data'] = get_weights_data(data_reader, layer['name'], 'weight')
+        if layer['class_name'] == 'Threshold':
+            layer['activ_param'] = class_object.threshold
+            layer['class_name'] = 'ThresholdedReLU'
+            layer['activation'] = 'ThresholdedReLU'
+            if layer['activ_param'] < 0:
+                raise Exception('negative threshold values not supported')
+
+        if hasattr(node, "dim"):
+            layer['axis'] = class_object.dim
+    else:
+        if layer['class_name'] == 'ReLU' or layer['class_name'] == 'Sigmoid':
+            layer['class_name'] = 'Activation'
+        if layer['class_name'] == 'LeakyReLU':
+            layer['activ_param'] = node.kwargs["negative_slope"]
+        if layer['class_name'] == 'ELU':
+            layer['activ_param'] = node.kwargs["alpha"]
+        if layer['class_name'] == 'Threshold':
+            layer['activ_param'] = node.args[1]
+            if layer['activ_param'] < 0:
+                raise Exception('negative threshold values not supported')
+            layer['class_name'] = 'ThresholdedReLU'
+            layer['activation'] = 'ThresholdedReLU'
+        if "dim" in node.kwargs:
+            layer['axis'] = node.kwargs["dim"]
 
     output_shape = input_shapes[0]
-
     return layer, output_shape
 
 
-batchnorm_layers = ['BatchNorm2d', 'BatchNorm1d']
+batchnorm_layers = ['BatchNorm2d', 'BatchNorm1d', 'Batch_norm']
 
 
 @pytorch_handler(*batchnorm_layers)
-def parse_batchnorm_layer(pytorch_layer, layer_name, input_shapes, data_reader, config):
-    assert 'BatchNorm' in pytorch_layer.__class__.__name__
+def parse_batchnorm_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'BatchNorm' in operation
 
     layer = {}
 
@@ -63,8 +93,23 @@ def parse_batchnorm_layer(pytorch_layer, layer_name, input_shapes, data_reader,
     layer['name'] = layer_name
 
     # batchnorm para
-    layer['epsilon'] = pytorch_layer.eps
-    layer['use_gamma'] = layer['use_beta'] = pytorch_layer.affine
+    if node.op == "call_module":
+        layer['epsilon'] = class_object.eps
+        layer['use_gamma'] = layer['use_beta'] = class_object.affine
+
+        if layer['use_gamma']:
+            layer['gamma_data'] = get_weights_data(data_reader, layer['name'], 'weight')
+        else:
+            layer['gamma_data'] = 1
+
+        if layer['use_beta']:
+            layer['beta_data'] = get_weights_data(data_reader, layer['name'], 'bias')
+        else:
+            layer['beta_data'] = 0
+
+        layer['mean_data'], layer['variance_data'] = get_weights_data(
+            data_reader, layer['name'], ['running_mean', 'running_variance']
+        )
 
     in_size = 1
     for dim in input_shapes[0][1:]:
diff --git a/hls4ml/converters/pytorch/function.py b/hls4ml/converters/pytorch/function.py
new file mode 100644
index 0000000000..89fc0ecde6
--- /dev/null
+++ b/hls4ml/converters/pytorch/function.py
@@ -0,0 +1,40 @@
+from hls4ml.converters.pytorch_to_hls import pytorch_handler
+
+merge_layers = ['Add', 'Subtract', 'Multiply', 'Average', 'Maximum', 'Minimum', 'Cat', 'Dot']
+
+
+@pytorch_handler(*merge_layers)
+def parse_merge_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert operation in merge_layers
+
+    layer = {}
+    if operation == "Cat":
+        layer['class_name'] = 'Concatenate'
+    else:
+        layer['class_name'] = operation
+    layer['name'] = layer_name
+
+    layer['op'] = operation
+
+    if input_names is not None:
+        layer['inputs'] = input_names
+
+    output_shape = input_shapes[0][:]
+    if layer['class_name'] == 'Concatenate':
+        rank = len(input_shapes[0][1:])
+        if rank > 3:
+            raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
+        layer['op'] = layer['class_name'].lower() + f'{rank}d'
+        layer['axis'] = int(node.args[1])
+        output_shape[layer['axis']] += input_shapes[1][layer['axis']]
+    elif layer['class_name'] == 'Dot':
+        rank = len(input_shapes[0][1:])
+        if rank > 1:
+            raise Exception('ERROR: Dot of tensors with rank > 1 is not yet supported.')
+        layer['op'] = layer['class_name'].lower() + f'{rank}d'
+    else:
+        layer['class_name'] = 'Merge'
+    if len(layer['inputs']) > 2:
+        raise Exception('ERROR: Merging more than two tensors is not yet supported.')
+
+    return layer, output_shape
diff --git a/hls4ml/converters/pytorch/pooling.py b/hls4ml/converters/pytorch/pooling.py
new file mode 100644
index 0000000000..05fe9626f2
--- /dev/null
+++ b/hls4ml/converters/pytorch/pooling.py
@@ -0,0 +1,135 @@
+from hls4ml.converters.pytorch_to_hls import pytorch_handler
+from hls4ml.converters.utils import compute_padding_1d_pytorch, compute_padding_2d_pytorch, parse_data_format
+
+pooling_layers = ['MaxPool1d', 'MaxPool2d', 'AvgPool1d', 'AvgPool2d']
+
+
+@pytorch_handler(*pooling_layers)
+def parse_pooling_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'Pool' in operation or 'pool' in operation
+
+    layer = {}
+
+    if operation == 'MaxPool1d':
+        layer['class_name'] = 'MaxPooling1D'
+    if operation == 'MaxPool2d':
+        layer['class_name'] = 'MaxPooling2D'
+    if operation == 'AvgPool1d':
+        layer['class_name'] = 'AveragePooling1D'
+    if operation == 'AvgPool2d':
+        layer['class_name'] = 'AveragePooling2D'
+
+    layer['name'] = layer_name
+    layer['data_format'] = 'channels_first'  # Pytorch default (can't change)
+    if node.op == "call_module" and "Avg" in operation:
+        if class_object.count_include_pad:
+            layer['count_pad'] = 'true'
+        else:
+            layer['count_pad'] = 'false'
+    else:
+        layer['count_pad'] = 'true'
+
+    if int(layer['class_name'][-2]) == 1:
+        (layer['n_in'], layer['n_filt']) = parse_data_format(input_shapes[0], layer['data_format'])
+        if node.op == "call_module":
+            layer['pool_width'] = (
+                class_object.kernel_size if not type(class_object.kernel_size) is tuple else class_object.kernel_size[0]
+            )
+            layer['stride_width'] = class_object.stride if not type(class_object.stride) is tuple else class_object.stride[0]
+
+            if type(class_object.padding) is tuple:
+                padding = class_object.padding[0]
+            else:
+                padding = class_object.padding
+
+        else:
+            layer['pool_width'] = int(node.args[1])
+            layer['stride_width'] = node.kwargs['stride'] if node.kwargs['stride'] is not None else int(node.args[1])
+            padding = node.kwargs['padding']
+
+        if padding == 0:  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
+            layer['padding'] = 'valid'
+        else:  # Only 'valid' and 'same' padding are available in Keras
+            layer['padding'] = 'same'
+
+        (layer['n_out'], layer['pad_left'], layer['pad_right']) = compute_padding_1d_pytorch(
+            padding, layer['n_in'], layer['stride_width'], layer['pool_width'], 1
+        )
+
+        if layer['data_format'] == 'channels_last':
+            output_shape = [input_shapes[0][0], layer['n_out'], layer['n_filt']]
+        elif layer['data_format'] == 'channels_first':
+            output_shape = [input_shapes[0][0], layer['n_filt'], layer['n_out']]
+
+    elif int(layer['class_name'][-2]) == 2:
+        (layer['in_height'], layer['in_width'], layer['n_filt']) = parse_data_format(input_shapes[0], layer['data_format'])
+
+        if node.op == "call_module":
+            if type(class_object.stride) is tuple:
+                layer['stride_height'] = class_object.stride[0]
+                layer['stride_width'] = class_object.stride[1]
+            else:
+                layer['stride_height'] = class_object.stride
+                layer['stride_width'] = class_object.stride
+
+            if type(class_object.kernel_size) is tuple:
+                layer['pool_height'] = class_object.kernel_size[0]
+                layer['pool_width'] = class_object.kernel_size[1]
+            else:
+                layer['pool_height'] = class_object.kernel_size
+                layer['pool_width'] = class_object.kernel_size
+
+            if type(class_object.padding) is tuple:
+                padding = class_object.padding
+            else:
+                padding = [class_object.padding, class_object.padding]
+
+        else:
+            if type(node.kwargs['stride']) is tuple:
+                layer['stride_height'] = node.kwargs['stride'][0]
+                layer['stride_width'] = node.kwargs['stride'][1]
+            else:
+                layer['stride_height'] = node.kwargs['stride']
+                layer['stride_width'] = node.kwargs['stride']
+            if type(node.kwargs['kernel_size']) is tuple:
+                layer['pool_height'] = node.kwargs['kernel_size'][0]
+                layer['pool_width'] = node.kwargs['kernel_size'][1]
+            else:
+                layer['pool_height'] = node.kwargs['kernel_size']
+                layer['pool_width'] = node.kwargs['kernel_size']
+
+            if type(node.kwargs['padding']) is tuple:
+                padding = node.kwargs['padding']
+            else:
+                padding = [node.kwargs['padding'], node.kwargs['padding']]
+
+        if all(x == 0 for x in padding):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
+            layer['padding'] = 'valid'
+        else:  # Only 'valid' and 'same' padding are available in Keras
+            layer['padding'] = 'same'
+
+        (
+            layer['out_height'],
+            layer['out_width'],
+            layer['pad_top'],
+            layer['pad_bottom'],
+            layer['pad_left'],
+            layer['pad_right'],
+        ) = compute_padding_2d_pytorch(
+            padding,
+            layer['in_height'],
+            layer['in_width'],
+            layer['stride_height'],
+            layer['stride_width'],
+            layer['pool_height'],
+            layer['pool_width'],
+            1,
+            1,
+        )
+
+        if layer['data_format'] == 'channels_last':
+            output_shape = [input_shapes[0][0], layer['out_height'], layer['out_width'], layer['n_filt']]
+        elif layer['data_format'] == 'channels_first':
+            output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
+
+    return layer, output_shape
diff --git a/hls4ml/converters/pytorch/reshape.py b/hls4ml/converters/pytorch/reshape.py
new file mode 100644
index 0000000000..50b15dc47a
--- /dev/null
+++ b/hls4ml/converters/pytorch/reshape.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+from hls4ml.converters.pytorch_to_hls import pytorch_handler
+
+reshape_layers = ['View']
+
+
+@pytorch_handler(*reshape_layers)
+def parse_reshape_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert operation == 'View'
+
+    layer = {}
+    layer['class_name'] = 'Reshape'
+    layer['name'] = layer_name
+
+    layer['target_shape'] = [int(i) for i in node.args[1:]]
+    # View can have -1 as one as the dimensions,
+    # leaving it to us to deduce it from the other dimensions and the overall size
+    if -1 in layer['target_shape']:
+        size = np.prod(input_shapes[0][1:])
+        for i in range(0, len(layer['target_shape'])):
+            if layer['target_shape'][i] == -1:
+                cl = layer['target_shape'][:]
+                cl.remove(-1)
+                layer['target_shape'][i] = int(size / np.prod(cl))
+
+    output_shape = input_shapes[0][:1] + layer['target_shape']
+
+    return layer, output_shape
diff --git a/hls4ml/converters/pytorch_to_hls.py b/hls4ml/converters/pytorch_to_hls.py
index 676e7e380c..7a454a6c04 100644
--- a/hls4ml/converters/pytorch_to_hls.py
+++ b/hls4ml/converters/pytorch_to_hls.py
@@ -24,7 +24,7 @@ def get_weights_data(self, layer_name, var_name):
         Parameters
         ----------
         layer_name : string
-            layer's name in the ONNX model
+            layer's name in the Pytorch model
         var_name : string
             variable to be extracted
 
@@ -43,22 +43,35 @@ def get_weights_data(self, layer_name, var_name):
             'kernel': 'weight',
             # Batchnorm
             'gamma': 'weight',
+            # Activiation
+            'alpha': 'weight',
             'beta': 'bias',
             'moving_mean': 'running_mean',
             'moving_variance': 'running_var',
         }
 
+        # Workaround for naming schme in nn.Sequential,
+        # have to remove the prefix we previously had to add to make sure the tensors are found
+        if 'layer_' in layer_name:
+            layer_name = layer_name.split('layer_')[-1]
+
         if var_name not in list(torch_paramap.keys()) + ['weight', 'bias']:
             raise Exception('Pytorch parameter not yet supported!')
 
         elif var_name in list(torch_paramap.keys()):
             var_name = torch_paramap[var_name]
 
-        data = (
-            self.state_dict[layer_name + '.' + var_name].numpy().transpose()
-        )  # Look at transpose when systhesis produce lousy results. Might need to remove it.
+        # if a layer is reused in the model, torch.FX will append a "_n" for the n-th use
+        # have to snap that off to find the tensors
+        if layer_name.split("_")[-1].isdigit() and len(layer_name.split("_")) > 1:
+            layer_name = "_".join(layer_name.split("_")[:-1])
 
-        return data
+        if layer_name + '.' + var_name in self.state_dict:
+            data = self.state_dict[layer_name + '.' + var_name].numpy()
+            return data
+
+        else:
+            return None
 
 
 class PyTorchFileReader(PyTorchModelReader):  # Inherit get_weights_data method
@@ -82,6 +95,22 @@ def __init__(self, config):
 
         self.state_dict = self.torch_model.state_dict()
 
+        data = {}  # this is just to shut up pre-commit, this function is broken somehow
+
+        return data
+
+
+def get_weights_data(data_reader, layer_name, var_name):
+    if not isinstance(var_name, (list, tuple)):
+        var_name = [var_name]
+
+    data = [data_reader.get_weights_data(layer_name, var) for var in var_name]
+
+    if len(data) == 1:
+        return data[0]
+    else:
+        return (*data,)
+
 
 # ----------------------Layer handling--------------------- #
 layer_handlers = {}
@@ -106,6 +135,22 @@ def decorator(function):
     return decorator
 
 
+# map names of operations between toch.nn and torch.nn.functionals
+layer_name_map = {
+    'relu': 'ReLU',
+    'leaky_relu': 'LeakyReLU',
+    'elu': 'ELU',
+    'prelu': 'PReLU',
+    'sigmoid': 'Sigmoid',
+    'layer_threshold': 'Threshold',
+    'softmax': 'Softmax',
+    'max_pool1d': 'MaxPool1d',
+    'max_pool2d': 'MaxPool2d',
+    'avg_pool1d': 'AvgPool1d',
+    'avg_pool2d': 'AvgPool2d',
+}
+
+
 # ----------------------------------------------------------------
 
 
@@ -132,16 +177,33 @@ def pytorch_to_hls(config):
     print('Interpreting Model ...')
 
     reader = PyTorchFileReader(config) if isinstance(config['PytorchModel'], str) else PyTorchModelReader(config)
-    input_shapes = [list(reader.input_shape)]
+    if type(reader.input_shape) is tuple:
+        input_shapes = [list(reader.input_shape)]
+    else:
+        input_shapes = list(reader.input_shape)
 
     model = reader.torch_model
 
+    # dict of layer objects in non-traced form for access lateron
+    children = {c[0]: c[1] for c in model.named_children()}
+    # use symbolic_trace to get a full graph of the model
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
     # Define layers to skip for conversion to HLS
     skip_layers = ['Dropout', 'Flatten', 'Sequential']
 
     # All supported layers
     supported_layers = get_supported_pytorch_layers() + skip_layers
 
+    # Map inputs of skipped and split (activation) layers
+    # inputs_map = {}
+
+    input_layers = None
+    # output_layers = None
+
+    # layer_config = None
+
     # Output shape tracking
     output_shapes = {}
     output_shape = None
@@ -150,56 +212,190 @@ def pytorch_to_hls(config):
     print('Topology:')
     layer_counter = 0
 
-    # First add input layer
-    input_layer = {}
-    input_layer['name'] = 'input1'
-    input_layer['class_name'] = 'InputLayer'
-    input_layer['input_shape'] = input_shapes[0][1:]
-    layer_list.insert(0, input_layer)
-    print("Input Shape: ", input_shapes)
+    n_inputs = 0
+
+    for node in traced_model.graph.nodes:
+        # If part of a nn.Sequntial, the node name will start with an "_" which messes up the parsing
+        if node.name[0] == "_":
+            node.name = 'layer' + node.name
+
+        if node.op == 'call_module':
+            # modules that are part of a torch.nn.Sequential with name 'name' have target names 'name.x',
+            # where x is an integer numbering the elements of the Sequential
+            if "." in node.target:
+                class_object = children[node.target.split(".")[0]][int(node.target.split(".")[1])]
+            else:
+                class_object = children[node.target]
+
+            pytorch_class = class_object.__class__.__name__
 
-    for layer_name, pytorch_layer in model.named_modules():
-        pytorch_class = pytorch_layer.__class__.__name__
+            if pytorch_class not in supported_layers:
+                raise Exception(f'Unsupported layer {pytorch_class}')
 
-        # First module is the whole model's class
-        if pytorch_class == model.__class__.__name__:
-            continue
+            if layer_counter != 0:
+                input_shapes = [output_shape]  # In case there are multiple inputs
 
-        if pytorch_class not in supported_layers:
-            raise Exception(f'Unsupported layer {pytorch_class}')
+            layer_name = node.name
 
-        # If not the first layer then input shape is taken from last layer's output
-        if layer_counter != 0:
-            input_shapes = [output_shape]  # In case there are multiple inputs
+            # Handle skipped layers
+            if pytorch_class in skip_layers:
+                if pytorch_class == 'Sequential':  # Ignore the mother module's class name
+                    continue
 
-        # Handle skipped layers
-        if pytorch_class in skip_layers:
-            if pytorch_class == 'Sequential':  # Ignore the mother module's class name
+                if pytorch_class == 'Flatten':
+                    output_shapes[layer_name] = [input_shapes[0][0], np.prod(input_shapes[0][1:])]
+                else:
+                    output_shapes[layer_name] = input_shapes[0]
                 continue
 
-            if pytorch_class == 'Flatten':
-                output_shapes[layer_name] = [input_shapes[0][0], np.prod(input_shapes[0][1:])]
+            # Increment the layer counter after initial screenings
+            if pytorch_class in supported_layers:
+                layer_counter += 1
+
+            # parse info from class object
+            input_names = tuple([str(i) for i in node.args])
+            input_shapes = [output_shapes[str(i)] for i in node.args]
+
+            # for Conv layers
+            if 'Conv' in pytorch_class:
+                if not class_object.padding_mode == 'zeros':
+                    raise Exception('padding modes other than "zeros" not implemented yet')
+                if not class_object.groups == 1:
+                    raise Exception('non-default options for groups not implemented yet')
+
+            # Process the layer
+            layer, output_shape = layer_handlers[pytorch_class](
+                pytorch_class, layer_name, input_names, input_shapes, node, class_object, reader, config
+            )
+
+            print(
+                'Layer name: {}, layer type: {}, input shape: {}'.format(
+                    layer['name'],
+                    layer['class_name'],
+                    input_shapes,
+                )
+            )
+            layer_list.append(layer)
+
+            assert output_shape is not None
+            output_shapes[layer['name']] = output_shape
+
+            layer_counter += 1
+
+        if node.op == 'placeholder':
+            # 'placeholder' indicates an input layer. Multiple inputs are supported
+
+            input_layer = {}
+            input_layer['name'] = node.name
+            input_layer['class_name'] = 'InputLayer'
+            input_layer['input_shape'] = input_shapes[n_inputs][1:]
+            layer_list.insert(n_inputs, input_layer)
+
+            output_shapes[input_layer['name']] = input_shapes[n_inputs]
+            n_inputs += 1
+
+            layer_counter += 1
+
+        if node.op == 'call_function':
+            # Function calls in the graph have to be transformed to layers known to hls4ml
+
+            # operations that appear repeatedly have '_n' appended to their name for the nth repetition
+            operation = node.name
+            if node.name.split("_")[-1].isdigit():
+                operation = "_".join(node.name.split("_")[:-1])
+
+            if operation in layer_name_map:
+                operation = layer_name_map[operation]
+
+            # only a limited number of functions are supported
+            if operation not in supported_layers:
+                raise Exception(f'Unsupported function {operation}')
+            if operation == "PReLU" or operation == "batch_norm" or operation == "conv1d" or operation == "conv2d":
+                raise Exception(
+                    f'Function {operation} cannot be parsed as torch.nn.functional. Use the torch.nn implementation instead'
+                )
+
+            layer_name = node.name
+
+            layer_counter += 1
+
+            input_names = tuple([str(node.args[0])])
+            if 'Cat' in operation:
+                input_names = tuple([str(i) for i in node.args[0]])
+
+            input_shapes = [list(output_shapes[str(i)]) for i in list(input_names)]
+
+            # Process the layer
+            layer, output_shape = layer_handlers[operation](
+                operation, layer_name, input_names, input_shapes, node, None, reader, config
+            )
+
+            print('Layer name: {}, layer type: {}, input shape: {}'.format(layer['name'], layer['class_name'], input_shapes))
+            layer_list.append(layer)
+
+            assert output_shape is not None
+            output_shapes[layer['name']] = output_shape
+
+        if node.op == 'get_attr':
+            # Deals with tensors that are member variables of the model class
+            # We insert these tensors are input layer nodes into the hls4ML model graph
+            if "." not in node.target:
+                obj = getattr(model, node.name)
             else:
-                output_shapes[layer_name] = input_shapes[0]
-            continue  # !!
+                obj = getattr(children[node.target.split('.')[0], node.name])
+
+            input_layer = {}
+            input_layer['name'] = node.name
+            input_layer['class_name'] = 'InputLayer'
+            input_layer['input_shape'] = [None] + list(obj.size())
+            layer_list.insert(n_inputs, input_layer)
+
+            output_shapes[input_layer['name']] = [None] + list(obj.size())
+            n_inputs += 1
 
-        # Increment the layer counter after initial screenings
-        if pytorch_class in supported_layers:
             layer_counter += 1
 
-        # Process the layer
-        layer, output_shape = layer_handlers[pytorch_class](pytorch_layer, layer_name, input_shapes, reader, config)
+        if node.op == 'call_method':
+            # Method calls in the graph have to be transformed to layers known to hls4ml
+
+            # operations that appear repeatedly have '_n' appended to their name for the nth repetition
+            operation = node.name
+            if node.name.split("_")[-1].isdigit():
+                operation = "_".join(node.name.split("_")[:-1])
+
+            if operation in layer_name_map:
+                operation = layer_name_map[operation]
+
+            # only a limited number of functions are supported
+            if operation not in supported_layers:
+                raise Exception(f'Unsupported function {operation}')
+
+            layer_name = node.name
+
+            layer_counter += 1
+
+            if 'View' in operation:
+                input_names = tuple([str(node.args[0])])
+            else:
+                input_names = tuple([str(i) for i in node.args])
+
+            # Process the layer
+            input_shapes = [list(output_shapes[str(i)]) for i in list(input_names)]
+
+            layer, output_shape = layer_handlers[operation](
+                operation, layer_name, input_names, input_shapes, node, None, reader, config
+            )
 
-        print('Layer name: {}, layer type: {}, input shape: {}'.format(layer['name'], layer['class_name'], input_shapes))
-        layer_list.append(layer)
+            print('Layer name: {}, layer type: {}, input shape: {}'.format(layer['name'], layer['class_name'], input_shapes))
+            layer_list.append(layer)
 
-        assert output_shape is not None
-        output_shapes[layer['name']] = output_shape
+            assert output_shape is not None
+            output_shapes[layer['name']] = output_shape
 
     #################
     # Generate HLS
     #################
 
     print('Creating HLS model')
-    hls_model = ModelGraph(config, reader, layer_list)
+    hls_model = ModelGraph(config, layer_list, inputs=input_layers)
     return hls_model
diff --git a/hls4ml/converters/utils.py b/hls4ml/converters/utils.py
index da0458417b..de0cf82b6e 100644
--- a/hls4ml/converters/utils.py
+++ b/hls4ml/converters/utils.py
@@ -132,3 +132,117 @@ def compute_padding_2d(pad_type, in_height, in_width, stride_height, stride_widt
         raise Exception(f'Unknown padding type: {pad_type}')
 
     return (out_height, out_width, pad_top, pad_bottom, pad_left, pad_right)
+
+
+def compute_padding_1d_pytorch(pad_type, in_size, stride, filt_size, dilation):
+
+    if isinstance(pad_type, str):
+        if pad_type.lower() == 'same':
+            n_out = int(
+                math.floor((float(in_size) + 2 - float(dilation) * (float(filt_size) - 1) - 1) / float(stride) + 1)
+            )  # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            if in_size % stride == 0:
+                pad_along_size = max(filt_size - stride, 0)
+            else:
+                pad_along_size = max(filt_size - (in_size % stride), 0)
+            pad_right = pad_along_size // 2
+            pad_left = pad_along_size - pad_right
+        elif pad_type.lower() == 'valid':
+            n_out = int(
+                math.floor((float(in_size) - float(dilation) * (float(filt_size) - 1) - 1) / float(stride) + 1)
+            )  # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            pad_left = 0
+            pad_right = 0
+        else:
+            raise Exception(f'Unknown padding type: {pad_type}')
+    else:
+        if pad_type > 0:
+            n_out = int(
+                math.floor(
+                    (float(in_size) + 2 * pad_type - float(dilation) * (float(filt_size) - 1) - 1) / float(stride) + 1
+                )
+            )  # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            pad_right = pad_type
+            pad_left = pad_type
+        else:
+            n_out = int(
+                math.floor((float(in_size) - float(dilation) * (float(filt_size) - 1) - 1) / float(stride) + 1)
+            )  # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            pad_left = 0
+            pad_right = 0
+
+    return (n_out, pad_left, pad_right)
+
+
+def compute_padding_2d_pytorch(
+    pad_type, in_height, in_width, stride_height, stride_width, filt_height, filt_width, dilation_height, dilation_width
+):
+
+    if isinstance(pad_type, str):
+
+        if pad_type.lower() == 'same':
+            # Height
+            out_height = int(
+                math.floor(float(in_height + 2 - dilation_height * (filt_height - 1) - 1) / float(stride_height) + 1)
+            )
+            if in_height % stride_height == 0:
+                pad_along_height = max(filt_height - stride_height, 0)
+            else:
+                pad_along_height = max(filt_height - (in_height % stride_height), 0)
+            pad_bottom = pad_along_height // 2
+            pad_top = pad_along_height - pad_bottom
+            pad_top = 1
+            # Width
+            out_width = int(
+                math.floor(float(in_width + 2 - dilation_width * (filt_width - 1) - 1) / float(stride_width) + 1)
+            )
+            if in_width % stride_width == 0:
+                pad_along_width = max(filt_width - stride_width, 0)
+            else:
+                pad_along_width = max(filt_width - (in_width % stride_width), 0)
+            pad_right = pad_along_width // 2
+            pad_left = pad_along_width - pad_right
+        elif pad_type.lower() == 'valid':
+            out_height = int(
+                math.floor(float(in_height - dilation_height * (filt_height - 1) - 1) / float(stride_height) + 1)
+            )
+            out_width = int(math.floor(float(in_width - dilation_width * (filt_width - 1) - 1) / float(stride_width) + 1))
+
+            pad_top = 0
+            pad_bottom = 0
+            pad_left = 0
+            pad_right = 0
+        else:
+            raise Exception(f'Unknown padding type: {pad_type}')
+
+    else:
+        if pad_type[0] == 0 and pad_type[1] == 0:
+            out_height = int(
+                math.floor(float(in_height - dilation_height * (filt_height - 1) - 1) / float(stride_height) + 1)
+            )
+            out_width = int(math.floor(float(in_width - dilation_width * (filt_width - 1) - 1) / float(stride_width) + 1))
+
+            pad_top = 0
+            pad_bottom = 0
+            pad_left = 0
+            pad_right = 0
+
+        else:
+            # Height
+            pad_height = pad_type[0]
+            pad_width = pad_type[1]
+            out_height = int(
+                math.floor(
+                    float(in_height + 2 * pad_height - dilation_height * (filt_height - 1) - 1) / float(stride_height) + 1
+                )
+            )
+            pad_bottom = pad_height
+            pad_top = pad_height
+            # Width
+            out_width = int(
+                math.floor(float(in_width + 2 * pad_width - dilation_width * (filt_width - 1) - 1) / float(stride_width) + 1)
+            )
+            pad_right = pad_width
+            pad_left = pad_width
+
+    return (out_height, out_width, pad_top, pad_bottom, pad_left, pad_right)
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index f0d3b1c1cb..32aa2688cc 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,6 +33,7 @@
 register_flow(
     'convert',
     [
+        'channels_last_converter',
         'fuse_bias_add',
         'remove_useless_transpose',
         'output_rounding_saturation_mode',
diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
new file mode 100644
index 0000000000..33f3fda0ef
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
@@ -0,0 +1,105 @@
+# Conversion of model from channels_first to channels_last data format
+# Based on https://github.com/fastmachinelearning/qonnx/blob/
+# 12c96a3ded06beacab08e0f554e4ed014476c0aa/src/qonnx/transformation/channels_last.py
+
+from hls4ml.model.layers import Input
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class ChannelsLastConverter(OptimizerPass):
+    '''Converts a model from channels_first to channels_last data format by transposing the weights of relevant layers
+    and adding a transpose layer for the inputs and outputs, if necessary'''
+
+    def match(self, node):
+        if not hasattr(node, 'channels_last_converted'):
+            return True
+
+    def transform(self, model, node):
+        # If this parameter has not been set, this model does not need to be converted
+        if 'InputsChannelLast' not in model.config.config['HLSConfig']['Model']:
+            node.channels_last_converted = True
+            return False
+        outshape = node.get_output_variable().shape
+        # if inputs are not yet transposed into channels_last, add transpose layer
+        if (
+            not model.config.config['HLSConfig']['Model']['InputsChannelLast']
+            and isinstance(node, Input)
+            and len(outshape) > 1
+        ):
+            # Add transpose for input layer
+            input = node.name
+            if len(outshape) == 2:
+                attributes = {'perm': [1, 0]}
+            else:
+                attributes = {'perm': [1, 2, 0]}
+
+            transpose_node = model.make_node(
+                'Transpose', f'transpose_input_for_{node.get_attr("name")}', attributes, [input]
+            )
+            transpose_node.set_attr('name', f'transpose_input_for_{node.get_attr("name")}')
+            transpose_node.channels_last_converted = True
+
+            model.insert_node(transpose_node)
+
+        if not isinstance(node, Input):
+            # Transpose tensors tensors
+            tensors = ['weight', 'depthwise', 'pointwise', 'zero_bias', 'scale', 'recurrent_weight']
+            for tensor in tensors:
+                try:
+                    if len(node.get_weights(tensor).shape) == 2:
+                        weights_channels_last = node.get_weights(tensor).data.transpose()
+                        node.get_weights(tensor).data = weights_channels_last
+                    elif len(node.get_weights(tensor).shape) == 3:
+                        weights_channels_last = node.get_weights(tensor).data.transpose([2, 1, 0])
+                        node.get_weights(tensor).data = weights_channels_last
+                    elif len(node.get_weights(tensor).shape) == 4:
+                        weights_channels_last = node.get_weights(tensor).data.transpose([2, 3, 1, 0])
+                        node.get_weights(tensor).data = weights_channels_last
+                except KeyError:
+                    pass
+            try:
+                node.set_attr('data_format', 'channels_last')
+            except AttributeError:
+                pass
+
+            # Adjust output shape
+            outdims = node.get_output_variable().dim_names
+            if len(outshape) == 2:
+                shape = [outshape[1], outshape[0]]
+                dims = [outdims[1], outdims[0]]
+                node.add_output_variable(shape, dims)
+            elif len(outshape) == 3:
+                shape = [outshape[1], outshape[2], outshape[0]]
+                dims = [outdims[1], outdims[2], outdims[0]]
+                node.add_output_variable(shape, dims)
+
+            # add transpose for output layer
+            if (
+                node.get_attr("name") in model.outputs
+                and len(outshape) > 1
+                and model.config.config['HLSConfig']['Model']['TransposeOutputs']
+            ):
+                input = node.name
+                outshape = node.get_output_variable().shape
+                print(outshape)
+                if len(outshape) == 2:
+                    attributes = {'perm': [1, 0]}
+                else:
+                    attributes = {'perm': [2, 0, 1]}
+
+                transpose_node = model.make_node(
+                    'Transpose', f'transpose_ouput_for_{node.get_attr("name")}', attributes, [input]
+                )
+                transpose_node.channels_last_converted = True
+
+                model.insert_node(transpose_node)
+        else:
+            input_shape = node.get_output_variable().shape
+            input_shape.append(input_shape.pop(0))
+            node.get_output_variable().shape = input_shape
+            dim_names = node.get_output_variable().dim_names
+            dim_names.append(dim_names.pop(0))
+            node.get_output_variable().dim_names = dim_names
+
+        node.channels_last_converted = True
+        return True
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h
index 1f31a73292..bbfc0908ef 100644
--- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h
+++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_pooling.h
@@ -114,6 +114,7 @@ struct pooling1d_config {
     // Padding
     static const unsigned pad_left = 0;
     static const unsigned pad_right = 0;
+    static const bool count_pad = false;
 
     // Pooling function
     static const Pool_Op pool_op = Max;
@@ -147,6 +148,8 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
                 if (inp_col + pool_col < CONFIG_T::pad_left || inp_col + pool_col >= (padded_width - CONFIG_T::pad_right)) {
                     // Add padding
                     pool[pool_col] = pad_val<data_T, CONFIG_T::pool_op>();
+                    if (CONFIG_T::count_pad)
+                        img_overlap++;
                 } else {
                     // Current element is from input image
                     pool[pool_col] = data[(inp_col + pool_col - CONFIG_T::pad_left) * CONFIG_T::n_filt + filt];
@@ -208,6 +211,7 @@ struct pooling2d_config {
     static const unsigned pad_bottom = 0;
     static const unsigned pad_left = 0;
     static const unsigned pad_right = 0;
+    static const bool count_pad = false;
 
     // Pooling function
     static const Pool_Op pool_op = Max;
@@ -256,6 +260,8 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
                             inp_width + pool_row >= (padded_width - CONFIG_T::pad_right)) {
                             // Add padding
                             pool[pool_col * CONFIG_T::stride_width + pool_row] = pad_val<data_T, CONFIG_T::pool_op>();
+                            if (CONFIG_T::count_pad)
+                                img_overlap++;
                         } else {
                             // Current element is from input image
                             pool[pool_col * CONFIG_T::stride_width + pool_row] =
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_pooling.h b/hls4ml/templates/vivado/nnet_utils/nnet_pooling.h
index 16a0be0690..f7facf54a0 100644
--- a/hls4ml/templates/vivado/nnet_utils/nnet_pooling.h
+++ b/hls4ml/templates/vivado/nnet_utils/nnet_pooling.h
@@ -89,6 +89,7 @@ struct pooling1d_config {
     static const unsigned n_out = (n_in - pool_width) / stride_width + 1;
     static const unsigned pad_left = 0;
     static const unsigned pad_right = 0;
+    static const bool count_pad = false;
     // Pooling function
     static const Pool_Op pool_op = Max;
 };
@@ -122,6 +123,8 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
                 if (ii + jj < CONFIG_T::pad_left || ii + jj >= (padded_width - CONFIG_T::pad_right)) {
                     // Add padding
                     pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
+                    if (CONFIG_T::count_pad)
+                        img_overlap++;
                 } else {
                     pool[jj] = data[(ii + jj - CONFIG_T::pad_left) * CONFIG_T::n_filt + ff];
                     img_overlap++;
@@ -179,6 +182,7 @@ struct pooling2d_config {
     static const unsigned pad_bottom = 0;
     static const unsigned pad_left = 0;
     static const unsigned pad_right = 0;
+    static const bool count_pad = false;
     // Pooling function
     static const Pool_Op pool_op = Max;
     // Reuse factor
@@ -225,6 +229,8 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
                             jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
                             // Add padding
                             pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
+                            if (CONFIG_T::count_pad)
+                                img_overlap++;
                         } else {
                             pool[kk * CONFIG_T::stride_width + ll] =
                                 data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width * CONFIG_T::n_filt +
@@ -284,6 +290,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
                             jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
                             // Add padding
                             pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
+                            if (CONFIG_T::count_pad)
+                                img_overlap++;
                         } else {
                             pool[kk * CONFIG_T::stride_width + ll] =
                                 data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width +
diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 0368d3276a..99c703aff6 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -242,7 +242,13 @@ def make_layer_config(layer):
 
 
 def config_from_pytorch_model(
-    model, granularity='model', backend=None, default_precision='ap_fixed<16,6>', default_reuse_factor=1
+    model,
+    granularity='model',
+    backend=None,
+    default_precision='ap_fixed<16,6>',
+    default_reuse_factor=1,
+    inputs_channel_last=False,
+    transpose_outputs=True,
 ):
     """Create an HLS conversion config given the PyTorch model.
 
@@ -264,6 +270,11 @@ def config_from_pytorch_model(
         backend(str, optional): Name of the backend to use
         default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'.
         default_reuse_factor (int, optional): Default reuse factor. Defaults to 1.
+        inputs_channel_last (bool, optional): Set to 'True' if input to the model comes in format
+            'channels_last'. Defaults to 'False'. If False, inputs will be transposed internally.
+        transpose_outputs (bool, optional): Set to 'False' if the output should not be transposed from
+            channels_last into channels_first data format. Defaults to 'False'. If False, outputs needs
+            to be transposed manually.
 
     Raises:
         Exception: If PyTorch model has layers not supported by hls4ml.
@@ -277,6 +288,8 @@ def config_from_pytorch_model(
     model_config = {}
     model_config['Precision'] = default_precision
     model_config['ReuseFactor'] = default_reuse_factor
+    model_config['InputsChannelLast'] = inputs_channel_last
+    model_config['TransposeOutputs'] = transpose_outputs
     model_config['Strategy'] = 'Latency'
 
     config['Model'] = model_config
diff --git a/test/pytest/test_pytorch_api.py b/test/pytest/test_pytorch_api.py
new file mode 100644
index 0000000000..ff2bae2a43
--- /dev/null
+++ b/test/pytest/test_pytorch_api.py
@@ -0,0 +1,574 @@
+import math
+from pathlib import Path
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+from torch.nn import AvgPool1d, AvgPool2d, MaxPool1d, MaxPool2d
+
+from hls4ml.converters import convert_from_pytorch_model
+from hls4ml.utils.config import config_from_pytorch_model
+
+test_root_path = Path(__file__).parent
+
+
+class LinearModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(1, 1)
+
+    def forward(self, x):
+        return self.linear(x)
+
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_linear(backend, io_type):
+    model = LinearModel()
+    model.eval()
+
+    X_input = np.random.rand(1)
+
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    config = config_from_pytorch_model(model)
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_api_linear_{backend}_{io_type}')
+
+    hls_model = convert_from_pytorch_model(
+        model, (None, 1), hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
+    )
+
+    hls_model.compile()
+
+    hls_prediction = hls_model.predict(X_input)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=1e-2, atol=0.01)
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+
+    nNodes = 0
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+
+    assert nNodes - 1 == len(hls_model.get_layers())
+    assert list(hls_model.get_layers())[0].attributes['class_name'] == "InputLayer"
+    assert list(hls_model.get_layers())[1].attributes["class_name"] == "Dense"
+    assert list(hls_model.get_layers())[0].attributes['input_shape'] == [1]
+    assert list(hls_model.get_layers())[1].attributes['n_in'] == 1
+    assert list(hls_model.get_layers())[1].attributes['n_out'] == 1
+
+
+# TODO: add ThresholdedReLU test when it can be made to pass
+@pytest.mark.parametrize(
+    "activation_function",
+    [
+        nn.ReLU(),
+        nn.LeakyReLU(negative_slope=1.0),
+        nn.ELU(alpha=1.0),
+        nn.PReLU(init=0.25),
+        nn.Sigmoid(),
+        nn.Threshold(threshold=1.0, value=0.0),
+    ],
+)
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_activations(activation_function, backend, io_type):
+    model = torch.nn.Sequential(nn.Linear(1, 1), activation_function).to()
+    model.eval()
+
+    X_input = np.random.rand(1)
+
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    config = config_from_pytorch_model(model)
+    output_dir = str(
+        test_root_path / f'hls4mlprj_pytorch_api_activations_{activation_function.__class__.__name__}_{backend}_{io_type}'
+    )
+    hls_model = convert_from_pytorch_model(
+        model, (None, 1), hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
+    )
+    hls_model.compile()
+
+    hls_prediction = hls_model.predict(X_input)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=1e-2, atol=0.01)
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+
+    nNodes = 0
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+
+    assert nNodes - 1 == len(hls_model.get_layers())
+
+    if activation_function.__class__.__name__ == 'ReLU' or activation_function.__class__.__name__ == 'Sigmoid':
+        assert list(hls_model.get_layers())[2].attributes['class_name'] == 'Activation'
+    elif activation_function.__class__.__name__ == 'Threshold':
+        assert list(hls_model.get_layers())[2].attributes['class_name'] == 'ThresholdedReLU'
+    else:
+        assert list(hls_model.get_layers())[2].attributes['class_name'] == activation_function.__class__.__name__
+
+
+class ReLuModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.relu(x)
+
+
+class LeakyReLuModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.leaky_relu(x, negative_slope=1.0)
+
+
+class EluModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.elu(x, alpha=1.0)
+
+
+class ThresholdModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.threshold(x, threshold=1.0, value=0.0)
+
+
+class SigmoidModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.sigmoid(x)
+
+
+@pytest.mark.parametrize(
+    "activation_function",
+    [
+        ReLuModel(),
+        LeakyReLuModel(),
+        EluModel(),
+        SigmoidModel(),
+        ThresholdModel(),
+    ],
+)
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_activation_functionals(activation_function, backend, io_type):
+    model = activation_function
+    model.eval()
+
+    X_input = np.random.rand(1)
+
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    config = config_from_pytorch_model(model)
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_api_activations_functional_relu_{backend}_{io_type}')
+    hls_model = convert_from_pytorch_model(
+        model, (None, 1), hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
+    )
+    hls_model.compile()
+
+    hls_prediction = hls_model.predict(X_input)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=1e-2, atol=0.01)
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+
+    nNodes = 0
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+
+    assert nNodes - 1 == len(hls_model.get_layers())
+
+
+padds_options = [0, 1]
+
+
+@pytest.mark.parametrize('padds', padds_options)
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_conv1d(padds, backend, io_type):
+    n_in = 2
+    n_out = 2
+    kernel_size = 3
+    size_in = 4
+
+    model = torch.nn.Sequential(nn.Conv1d(n_in, n_out, kernel_size, padding=padds), nn.ReLU()).to()
+    model.eval()
+
+    X_input = np.random.rand(1, n_in, size_in)
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    if io_type == 'io_stream':
+        X_input = np.ascontiguousarray(X_input.transpose(0, 2, 1))
+        config = config_from_pytorch_model(model, inputs_channel_last=True, transpose_outputs=False)
+    else:
+        config = config_from_pytorch_model(model, inputs_channel_last=False, transpose_outputs=True)
+
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_api_conv1d_{padds}_{backend}_{io_type}')
+    hls_model = convert_from_pytorch_model(
+        model, (None, n_in, size_in), hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
+    )
+    hls_model.compile()
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+    nNodes = 0
+    convNode = None
+    reluNode = None
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+        if nNodes == 2:
+            convNode = _node
+        if nNodes == 3:
+            reluNode = _node
+
+    if io_type == 'io_stream':
+        # Vivado inserts and additional layer for 'same' padding in io_stream
+        if backend == "Vivado" and padds == 1:
+            assert nNodes == len(hls_model.get_layers())
+        else:
+            assert nNodes - 1 == len(hls_model.get_layers())
+    else:
+        assert nNodes + 1 == len(hls_model.get_layers())
+
+    children = {c[0]: c[1] for c in model.named_children()}
+    class_object_conv = children[convNode.target]
+    class_object_relu = children[reluNode.target]
+
+    out_width = int(
+        (size_in + 2 * padds - class_object_conv.dilation[0] * (class_object_conv.kernel_size[0] - 1) - 1)
+        / class_object_conv.stride[0]
+        + 1
+    )  # following https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+
+    if io_type == 'io_stream':
+        hls_prediction = np.transpose(np.reshape(hls_model.predict(X_input), (1, out_width, n_out)), (0, 2, 1))
+    else:
+        hls_prediction = np.reshape(hls_model.predict(X_input), (1, n_out, out_width))
+    # results are not very good at the moment
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=0, atol=5e-2)
+
+    # if not (backend == 'Vivado' and io_type == 'io_stream' and padds == 1):
+    conv_index = 2
+    act_index = 3
+    if io_type == "io_stream" and not (backend == "Vivado" and padds == 1):
+        conv_index = 1
+        act_index = 2
+    assert list(hls_model.get_layers())[conv_index].attributes['name'] == 'layer' + convNode.name
+    assert list(hls_model.get_layers())[conv_index].attributes['class_name'] == 'Conv1D'
+    assert list(hls_model.get_layers())[act_index].attributes['activation'] == class_object_relu.__class__.__name__
+    if io_type == "io_stream" and backend == "Vivado" and padds == 1:
+        assert list(hls_model.get_layers())[conv_index].attributes["in_width"] == size_in + 2
+    else:
+        assert list(hls_model.get_layers())[conv_index].attributes["in_width"] == size_in
+    assert list(hls_model.get_layers())[conv_index].attributes['filt_width'] == class_object_conv.kernel_size[0]
+    assert list(hls_model.get_layers())[conv_index].attributes['n_chan'] == class_object_conv.in_channels
+    assert list(hls_model.get_layers())[conv_index].attributes['n_filt'] == class_object_conv.out_channels
+    assert list(hls_model.get_layers())[conv_index].attributes['stride_width'] == class_object_conv.stride[0]
+    if list(hls_model.get_layers())[conv_index].attributes['padding'] == 'valid':
+        padding = 0
+    else:
+        padding = 1
+    if io_type == "io_stream" and backend == "Vivado" and padds == 1:
+        padding = 1
+        padds = 0
+
+    assert padding == class_object_conv.padding[0]
+    assert list(hls_model.get_layers())[conv_index].attributes['data_format'] == 'channels_last'
+    assert list(hls_model.get_layers())[conv_index].attributes["out_width"] == out_width
+
+    pad_along_width = max((out_width - 1) * class_object_conv.stride[0] + class_object_conv.kernel_size[0] - size_in, 0)
+    pad_left = pad_along_width // 2
+    pad_right = pad_along_width - pad_left
+
+    if padds == 1:
+        assert list(hls_model.get_layers())[conv_index].attributes['pad_left'] == pad_left
+        assert list(hls_model.get_layers())[conv_index].attributes['pad_right'] == pad_right
+    elif padds == 0:
+        assert list(hls_model.get_layers())[conv_index].attributes['pad_left'] == 0
+        assert list(hls_model.get_layers())[conv_index].attributes['pad_right'] == 0
+
+
+padds_options = [0, 1]
+
+
+@pytest.mark.parametrize('padds', padds_options)
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+def test_conv2d(padds, backend, io_type):
+    n_in = 2
+    n_out = 2
+    kernel_size = 3
+    size_in_width = 4
+    size_in_height = 4
+
+    model = torch.nn.Sequential(nn.Conv2d(n_in, n_out, kernel_size, padding=padds), nn.ReLU()).to()
+    model.eval()
+
+    X_input = np.random.rand(100, n_in, size_in_height, size_in_width)
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    if io_type == 'io_stream':
+        X_input = np.ascontiguousarray(X_input.transpose(0, 2, 3, 1))
+        config = config_from_pytorch_model(model, inputs_channel_last=True, transpose_outputs=False)
+    else:
+        config = config_from_pytorch_model(model, inputs_channel_last=False, transpose_outputs=True)
+
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_api_conv2d_{padds}_{backend}_{io_type}')
+    hls_model = convert_from_pytorch_model(
+        model,
+        (None, n_in, size_in_height, size_in_width),
+        hls_config=config,
+        output_dir=output_dir,
+        backend=backend,
+        io_type=io_type,
+    )
+    hls_model.compile()
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+    nNodes = 0
+    convNode = None
+    reluNode = None
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+        if nNodes == 2:
+            convNode = _node
+        if nNodes == 3:
+            reluNode = _node
+    # if io_type == 'io_stream':
+    #    assert nNodes -1 == len(hls_model.get_layers())
+    # else:
+    #    assert nNodes == len(hls_model.get_layers())
+
+    children = {c[0]: c[1] for c in model.named_children()}
+    class_object_conv = children[convNode.target]
+    class_object_relu = children[reluNode.target]
+
+    from hls4ml.converters.utils import compute_padding_2d
+
+    padding = 'valid' if padds == 0 else 'same'
+    out_dims_hls = compute_padding_2d(
+        padding,
+        size_in_height,
+        size_in_width,
+        1,
+        1,
+        kernel_size,
+        kernel_size,
+    )
+
+    out_width = int(
+        (
+            size_in_width
+            + 2 * class_object_conv.padding[1]
+            - class_object_conv.dilation[1] * (class_object_conv.kernel_size[1] - 1)
+            - 1
+        )
+        / class_object_conv.stride[1]
+        + 1
+    )  # following https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
+    assert out_dims_hls[0] == out_width
+    out_height = int(
+        (
+            size_in_height
+            + 2 * class_object_conv.padding[0]
+            - class_object_conv.dilation[0] * (class_object_conv.kernel_size[0] - 1)
+            - 1
+        )
+        / class_object_conv.stride[0]
+        + 1
+    )  # following https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
+    assert out_dims_hls[1] == out_height
+
+    if io_type == 'io_stream':
+        hls_prediction = np.transpose(
+            np.reshape(hls_model.predict(X_input), (100, out_height, out_width, n_out)), (0, 3, 1, 2)
+        )
+    else:
+        hls_prediction = np.reshape(hls_model.predict(X_input), (100, n_out, out_height, out_width))
+    # results are not very good at the moment
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=0, atol=5e-2)
+
+    if not (backend == 'Vivado' and io_type == 'io_stream' and padds == 1):
+        # Vivado inserts and additional layer for 'same' padding in io_stream
+        conv_index = 2
+        act_index = 3
+        if io_type == "io_stream":
+            conv_index = 1
+            act_index = 2
+        assert list(hls_model.get_layers())[conv_index].attributes['name'] == 'layer' + convNode.name
+        assert list(hls_model.get_layers())[conv_index].attributes['class_name'] == 'Conv2D'
+        assert list(hls_model.get_layers())[act_index].attributes['activation'] == class_object_relu.__class__.__name__
+        assert list(hls_model.get_layers())[conv_index].attributes["in_width"] == size_in_width
+        assert list(hls_model.get_layers())[conv_index].attributes["in_height"] == size_in_height
+        assert list(hls_model.get_layers())[conv_index].attributes['filt_width'] == class_object_conv.kernel_size[1]
+        assert list(hls_model.get_layers())[conv_index].attributes['filt_height'] == class_object_conv.kernel_size[0]
+        assert list(hls_model.get_layers())[conv_index].attributes['n_chan'] == class_object_conv.in_channels
+        assert list(hls_model.get_layers())[conv_index].attributes['n_filt'] == class_object_conv.out_channels
+        assert list(hls_model.get_layers())[conv_index].attributes['stride_width'] == class_object_conv.stride[1]
+        assert list(hls_model.get_layers())[conv_index].attributes['stride_height'] == class_object_conv.stride[0]
+        if list(hls_model.get_layers())[conv_index].attributes['padding'] == 'valid':
+            padding = 0
+        else:
+            padding = 1
+        assert padding == class_object_conv.padding[0]
+        assert list(hls_model.get_layers())[conv_index].attributes['data_format'] == 'channels_last'
+
+        pad_along_width = max(
+            (out_width - 1) * class_object_conv.stride[1] + class_object_conv.kernel_size[1] - size_in_width, 0
+        )
+        pad_along_height = max(
+            (out_height - 1) * class_object_conv.stride[0] + class_object_conv.kernel_size[0] - size_in_height, 0
+        )
+
+        pad_top = pad_along_height // 2
+        pad_bottom = pad_along_height - pad_top
+        pad_left = pad_along_width // 2
+        pad_right = pad_along_width - pad_left
+
+        if padds == 1:
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_left'] == pad_left
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_right'] == pad_right
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_top'] == pad_top
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_bottom'] == pad_bottom
+        elif padds == 0:
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_left'] == 0
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_right'] == 0
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_top'] == 0
+            assert list(hls_model.get_layers())[conv_index].attributes['pad_bottom'] == 0
+
+
+padds_options = [0, 1]
+pooling_layers = [MaxPool1d, MaxPool2d, AvgPool1d, AvgPool2d]
+
+
+@pytest.mark.parametrize('pooling', pooling_layers)
+@pytest.mark.parametrize('padds', padds_options)
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+def test_pooling(pooling, padds, backend):
+    assert '1d' in pooling.__name__ or '2d' in pooling.__name__
+
+    if '2d' in pooling.__name__:
+        n_in = 2
+        size_in_height = 15
+        size_in_width = 18
+    else:
+        n_in = 2
+        size_in_width = 121
+        size_in_height = 0
+
+    input_shape = (1, n_in, size_in_height, size_in_width) if '2d' in pooling.__name__ else (1, n_in, size_in_width)
+    input_shape_forHLS = (
+        (None, n_in, size_in_height, size_in_width) if '2d' in pooling.__name__ else (None, n_in, size_in_width)
+    )
+    X_input = np.random.rand(*input_shape)
+
+    model = torch.nn.Sequential(pooling(2, padding=padds)).to()
+    model.eval()
+    pytorch_prediction = model(torch.Tensor(X_input)).detach().numpy()
+
+    config = config_from_pytorch_model(model)
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_api_pooling_{pooling.__name__}_padds_{padds}_backend_{backend}')
+    hls_model = convert_from_pytorch_model(
+        model, input_shape_forHLS, hls_config=config, output_dir=output_dir, backend=backend
+    )
+    hls_model.compile()
+
+    from torch.fx import symbolic_trace
+
+    traced_model = symbolic_trace(model)
+    nNodes = 0
+    poolNode = None
+    for _node in traced_model.graph.nodes:
+        nNodes += 1
+        if nNodes == 2:
+            poolNode = _node
+    assert nNodes + 1 == len(hls_model.get_layers())
+    children = {c[0]: c[1] for c in model.named_children()}
+    class_object_pool = children[poolNode.target]
+
+    if "Max" in pooling.__name__:
+        out_height = int(
+            math.floor(
+                float(size_in_height + 2 * padds - class_object_pool.dilation * (class_object_pool.kernel_size - 1) - 1)
+                / float(class_object_pool.stride)
+                + 1
+            )
+        )
+        out_width = int(
+            math.floor(
+                float(size_in_width + 2 * padds - class_object_pool.dilation * (class_object_pool.kernel_size - 1) - 1)
+                / float(class_object_pool.stride)
+                + 1
+            )
+        )
+    else:
+        if '2d' in pooling.__name__:
+            out_height = int(
+                math.floor((size_in_height + 2 * padds - class_object_pool.kernel_size) / class_object_pool.stride + 1)
+            )
+            out_width = int(
+                math.floor((size_in_width + 2 * padds - class_object_pool.kernel_size) / class_object_pool.stride + 1)
+            )
+        else:
+            out_height = int(
+                math.floor((size_in_height + 2 * padds - class_object_pool.kernel_size[0]) / class_object_pool.stride[0] + 1)
+            )
+            out_width = int(
+                math.floor((size_in_width + 2 * padds - class_object_pool.kernel_size[0]) / class_object_pool.stride[0] + 1)
+            )
+
+    if '2d' in pooling.__name__:
+        hls_prediction = np.reshape(hls_model.predict(X_input), (1, n_in, out_height, out_width))
+
+    else:
+        pred = hls_model.predict(X_input)
+        hls_prediction = np.reshape(pred, (1, n_in, out_width))
+
+    # results are not very good at the moment
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=0, atol=5e-2)
+
+    # Verify correct parsing of layer
+    hls_pool = list(hls_model.get_layers())[-2]
+    if '2d' in pooling.__name__:
+        assert hls_pool.attributes['name'] == "layer" + poolNode.name
+        assert hls_pool.attributes['class_name'][-2] == str(2)
+        assert hls_pool.attributes['stride_height'] == class_object_pool.stride
+        assert hls_pool.attributes['stride_width'] == class_object_pool.stride
+        assert hls_pool.attributes['pool_height'] == class_object_pool.kernel_size
+        assert hls_pool.attributes['pool_width'] == class_object_pool.kernel_size
+        assert hls_pool.attributes['padding'] == 'valid' if class_object_pool.padding == 0 else 'same'
+
+    elif '1d' in pooling.__name__:
+        if "Max" in pooling.__name__:
+            assert hls_pool.attributes['name'] == "layer" + poolNode.name
+            assert hls_pool.attributes['class_name'][-2] == str(1)
+            assert hls_pool.attributes['pool_width'] == class_object_pool.kernel_size
+            assert hls_pool.attributes['stride_width'] == class_object_pool.stride
+            assert hls_pool.attributes['padding'] == 'valid' if class_object_pool.padding == 0 else 'same'
+
+        else:
+            assert hls_pool.attributes['name'] == "layer" + poolNode.name
+            assert hls_pool.attributes['class_name'][-2] == str(1)
+            assert hls_pool.attributes['pool_width'] == class_object_pool.kernel_size[0]
+            assert hls_pool.attributes['stride_width'] == class_object_pool.stride[0]
+            assert hls_pool.attributes['padding'] == 'same' if class_object_pool.padding == 0 else 'valid'

From 7fdd393ff0f11b39de6fcf3d531f83fdf7cecf38 Mon Sep 17 00:00:00 2001
From: Jan-Frederik Schulte <jschulte@cern.ch>
Date: Thu, 25 May 2023 08:36:33 -0400
Subject: [PATCH 02/10] run pre-commit

---
 hls4ml/converters/utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hls4ml/converters/utils.py b/hls4ml/converters/utils.py
index de0cf82b6e..d1c9e050d5 100644
--- a/hls4ml/converters/utils.py
+++ b/hls4ml/converters/utils.py
@@ -135,7 +135,6 @@ def compute_padding_2d(pad_type, in_height, in_width, stride_height, stride_widt
 
 
 def compute_padding_1d_pytorch(pad_type, in_size, stride, filt_size, dilation):
-
     if isinstance(pad_type, str):
         if pad_type.lower() == 'same':
             n_out = int(
@@ -177,9 +176,7 @@ def compute_padding_1d_pytorch(pad_type, in_size, stride, filt_size, dilation):
 def compute_padding_2d_pytorch(
     pad_type, in_height, in_width, stride_height, stride_width, filt_height, filt_width, dilation_height, dilation_width
 ):
-
     if isinstance(pad_type, str):
-
         if pad_type.lower() == 'same':
             # Height
             out_height = int(

From 352d57b5f1a33bce6cbf48322973e2ae4a2eb737 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Sun, 11 Jun 2023 19:21:38 +0200
Subject: [PATCH 03/10] Style fixes (mostly)

---
 hls4ml/converters/__init__.py        |   2 +-
 hls4ml/converters/pytorch/core.py    |  14 ++--
 hls4ml/converters/pytorch/pooling.py |   6 +-
 hls4ml/converters/pytorch_to_hls.py  | 102 +++++++++------------------
 4 files changed, 44 insertions(+), 80 deletions(-)

diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
index b965a159ef..4bf139b673 100644
--- a/hls4ml/converters/__init__.py
+++ b/hls4ml/converters/__init__.py
@@ -246,7 +246,7 @@ def convert_from_pytorch_model(
     """Convert PyTorch model to hls4ml model based on the provided configuration.
 
     Args:
-        model: PyTorch model to conert.
+        model: PyTorch model to convert.
         input_shape (list): The shape of the input tensor. First element is the batch size, needs to be None
         output_dir (str, optional): Output directory of the generated HLS project. Defaults to 'my-hls-test'.
         project_name (str, optional): Name of the HLS project. Defaults to 'myproject'.
diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
index cc19464d3f..792830821e 100644
--- a/hls4ml/converters/pytorch/core.py
+++ b/hls4ml/converters/pytorch/core.py
@@ -41,7 +41,7 @@ def parse_activation_layer(operation, layer_name, input_names, input_shapes, nod
 
     # if layer['class_name'] != 'Activation':
     #    layer['activation'] = layer['class_name']
-    if node.op == "call_module":
+    if node.op == 'call_module':
         if layer['class_name'] == 'ReLU' or layer['class_name'] == 'Sigmoid':
             layer['class_name'] = 'Activation'
         if layer['class_name'] == 'LeakyReLU':
@@ -57,23 +57,23 @@ def parse_activation_layer(operation, layer_name, input_names, input_shapes, nod
             if layer['activ_param'] < 0:
                 raise Exception('negative threshold values not supported')
 
-        if hasattr(node, "dim"):
+        if hasattr(node, 'dim'):
             layer['axis'] = class_object.dim
     else:
         if layer['class_name'] == 'ReLU' or layer['class_name'] == 'Sigmoid':
             layer['class_name'] = 'Activation'
         if layer['class_name'] == 'LeakyReLU':
-            layer['activ_param'] = node.kwargs["negative_slope"]
+            layer['activ_param'] = node.kwargs['negative_slope']
         if layer['class_name'] == 'ELU':
-            layer['activ_param'] = node.kwargs["alpha"]
+            layer['activ_param'] = node.kwargs['alpha']
         if layer['class_name'] == 'Threshold':
             layer['activ_param'] = node.args[1]
             if layer['activ_param'] < 0:
                 raise Exception('negative threshold values not supported')
             layer['class_name'] = 'ThresholdedReLU'
             layer['activation'] = 'ThresholdedReLU'
-        if "dim" in node.kwargs:
-            layer['axis'] = node.kwargs["dim"]
+        if 'dim' in node.kwargs:
+            layer['axis'] = node.kwargs['dim']
 
     output_shape = input_shapes[0]
     return layer, output_shape
@@ -93,7 +93,7 @@ def parse_batchnorm_layer(operation, layer_name, input_names, input_shapes, node
     layer['name'] = layer_name
 
     # batchnorm para
-    if node.op == "call_module":
+    if node.op == 'call_module':
         layer['epsilon'] = class_object.eps
         layer['use_gamma'] = layer['use_beta'] = class_object.affine
 
diff --git a/hls4ml/converters/pytorch/pooling.py b/hls4ml/converters/pytorch/pooling.py
index 05fe9626f2..133178d7c8 100644
--- a/hls4ml/converters/pytorch/pooling.py
+++ b/hls4ml/converters/pytorch/pooling.py
@@ -21,7 +21,7 @@ def parse_pooling_layer(operation, layer_name, input_names, input_shapes, node,
 
     layer['name'] = layer_name
     layer['data_format'] = 'channels_first'  # Pytorch default (can't change)
-    if node.op == "call_module" and "Avg" in operation:
+    if node.op == 'call_module' and 'Avg' in operation:
         if class_object.count_include_pad:
             layer['count_pad'] = 'true'
         else:
@@ -31,7 +31,7 @@ def parse_pooling_layer(operation, layer_name, input_names, input_shapes, node,
 
     if int(layer['class_name'][-2]) == 1:
         (layer['n_in'], layer['n_filt']) = parse_data_format(input_shapes[0], layer['data_format'])
-        if node.op == "call_module":
+        if node.op == 'call_module':
             layer['pool_width'] = (
                 class_object.kernel_size if not type(class_object.kernel_size) is tuple else class_object.kernel_size[0]
             )
@@ -64,7 +64,7 @@ def parse_pooling_layer(operation, layer_name, input_names, input_shapes, node,
     elif int(layer['class_name'][-2]) == 2:
         (layer['in_height'], layer['in_width'], layer['n_filt']) = parse_data_format(input_shapes[0], layer['data_format'])
 
-        if node.op == "call_module":
+        if node.op == 'call_module':
             if type(class_object.stride) is tuple:
                 layer['stride_height'] = class_object.stride[0]
                 layer['stride_width'] = class_object.stride[1]
diff --git a/hls4ml/converters/pytorch_to_hls.py b/hls4ml/converters/pytorch_to_hls.py
index 7a454a6c04..ddddbc04c7 100644
--- a/hls4ml/converters/pytorch_to_hls.py
+++ b/hls4ml/converters/pytorch_to_hls.py
@@ -6,7 +6,7 @@
 
 class PyTorchModelReader:
     """
-    Pytorch data reader to be used for extracting relevant information during conversion.
+    PyTorch reader to extract weights data.
     """
 
     def __init__(self, config):
@@ -15,26 +15,6 @@ def __init__(self, config):
         self.input_shape = config['InputShape']
 
     def get_weights_data(self, layer_name, var_name):
-        """Get weights data from layers.
-
-        The hls layer classes are based on Keras's default parameters.
-        Thus, this function will also need to account for some differences
-        between Keras and Pytorch terminology.
-
-        Parameters
-        ----------
-        layer_name : string
-            layer's name in the Pytorch model
-        var_name : string
-            variable to be extracted
-
-        Returns
-        -------
-        data : numpy array
-            extracted weights data
-
-        """
-
         data = None
 
         # Parameter mapping from pytorch to keras
@@ -63,8 +43,8 @@ def get_weights_data(self, layer_name, var_name):
 
         # if a layer is reused in the model, torch.FX will append a "_n" for the n-th use
         # have to snap that off to find the tensors
-        if layer_name.split("_")[-1].isdigit() and len(layer_name.split("_")) > 1:
-            layer_name = "_".join(layer_name.split("_")[:-1])
+        if layer_name.split('_')[-1].isdigit() and len(layer_name.split('_')) > 1:
+            layer_name = '_'.join(layer_name.split('_')[:-1])
 
         if layer_name + '.' + var_name in self.state_dict:
             data = self.state_dict[layer_name + '.' + var_name].numpy()
@@ -95,10 +75,6 @@ def __init__(self, config):
 
         self.state_dict = self.torch_model.state_dict()
 
-        data = {}  # this is just to shut up pre-commit, this function is broken somehow
-
-        return data
-
 
 def get_weights_data(data_reader, layer_name, var_name):
     if not isinstance(var_name, (list, tuple)):
@@ -155,20 +131,16 @@ def decorator(function):
 
 
 def pytorch_to_hls(config):
-    """Convert Pytorch model to hls model from configuration.
+    """Convert PyTorch model to hls4ml ModelGraph.
 
-    Parameters
-    ----------
-    config: dict
-        pytorch configuration from yaml file or passed through API.
+    Args:
+        config (dict): The conversion config
 
-    Returns
-    -------
-    ModelGraph : hls4ml model object.
+    Raises:
+        Exception: On unsupported features of the model.
 
-    Notes
-    -----
-    Only sequential pytorch models are supported for now.
+    Returns:
+        ModelGraph: hls4ml model object.
     """
 
     # This is a list of dictionaries to hold all the layer info we need to generate HLS
@@ -196,13 +168,7 @@ def pytorch_to_hls(config):
     # All supported layers
     supported_layers = get_supported_pytorch_layers() + skip_layers
 
-    # Map inputs of skipped and split (activation) layers
-    # inputs_map = {}
-
-    input_layers = None
-    # output_layers = None
-
-    # layer_config = None
+    input_layers = []
 
     # Output shape tracking
     output_shapes = {}
@@ -216,14 +182,14 @@ def pytorch_to_hls(config):
 
     for node in traced_model.graph.nodes:
         # If part of a nn.Sequntial, the node name will start with an "_" which messes up the parsing
-        if node.name[0] == "_":
+        if node.name[0] == '_':
             node.name = 'layer' + node.name
 
         if node.op == 'call_module':
             # modules that are part of a torch.nn.Sequential with name 'name' have target names 'name.x',
             # where x is an integer numbering the elements of the Sequential
-            if "." in node.target:
-                class_object = children[node.target.split(".")[0]][int(node.target.split(".")[1])]
+            if '.' in node.target:
+                class_object = children[node.target.split('.')[0]][int(node.target.split('.')[1])]
             else:
                 class_object = children[node.target]
 
@@ -253,15 +219,15 @@ def pytorch_to_hls(config):
                 layer_counter += 1
 
             # parse info from class object
-            input_names = tuple([str(i) for i in node.args])
+            input_names = [str(i) for i in node.args]
             input_shapes = [output_shapes[str(i)] for i in node.args]
 
             # for Conv layers
             if 'Conv' in pytorch_class:
                 if not class_object.padding_mode == 'zeros':
-                    raise Exception('padding modes other than "zeros" not implemented yet')
+                    raise Exception('Padding modes other than "zeros" not implemented yet')
                 if not class_object.groups == 1:
-                    raise Exception('non-default options for groups not implemented yet')
+                    raise Exception('Non-default options for groups not implemented yet')
 
             # Process the layer
             layer, output_shape = layer_handlers[pytorch_class](
@@ -288,10 +254,11 @@ def pytorch_to_hls(config):
             input_layer = {}
             input_layer['name'] = node.name
             input_layer['class_name'] = 'InputLayer'
-            input_layer['input_shape'] = input_shapes[n_inputs][1:]
+            input_layer['input_shape'] = list(input_shapes[n_inputs][1:])
             layer_list.insert(n_inputs, input_layer)
 
             output_shapes[input_layer['name']] = input_shapes[n_inputs]
+            input_layers.append(input_layer['name'])
             n_inputs += 1
 
             layer_counter += 1
@@ -301,8 +268,8 @@ def pytorch_to_hls(config):
 
             # operations that appear repeatedly have '_n' appended to their name for the nth repetition
             operation = node.name
-            if node.name.split("_")[-1].isdigit():
-                operation = "_".join(node.name.split("_")[:-1])
+            if node.name.split('_')[-1].isdigit():
+                operation = '_'.join(node.name.split('_')[:-1])
 
             if operation in layer_name_map:
                 operation = layer_name_map[operation]
@@ -310,7 +277,7 @@ def pytorch_to_hls(config):
             # only a limited number of functions are supported
             if operation not in supported_layers:
                 raise Exception(f'Unsupported function {operation}')
-            if operation == "PReLU" or operation == "batch_norm" or operation == "conv1d" or operation == "conv2d":
+            if operation == 'PReLU' or operation == 'batch_norm' or operation == 'conv1d' or operation == 'conv2d':
                 raise Exception(
                     f'Function {operation} cannot be parsed as torch.nn.functional. Use the torch.nn implementation instead'
                 )
@@ -319,11 +286,8 @@ def pytorch_to_hls(config):
 
             layer_counter += 1
 
-            input_names = tuple([str(node.args[0])])
-            if 'Cat' in operation:
-                input_names = tuple([str(i) for i in node.args[0]])
-
-            input_shapes = [list(output_shapes[str(i)]) for i in list(input_names)]
+            input_names = [str(i) for i in node.all_input_nodes]
+            input_shapes = [list(output_shapes[str(i)]) for i in input_names]
 
             # Process the layer
             layer, output_shape = layer_handlers[operation](
@@ -339,7 +303,7 @@ def pytorch_to_hls(config):
         if node.op == 'get_attr':
             # Deals with tensors that are member variables of the model class
             # We insert these tensors are input layer nodes into the hls4ML model graph
-            if "." not in node.target:
+            if '.' not in node.target:
                 obj = getattr(model, node.name)
             else:
                 obj = getattr(children[node.target.split('.')[0], node.name])
@@ -351,6 +315,7 @@ def pytorch_to_hls(config):
             layer_list.insert(n_inputs, input_layer)
 
             output_shapes[input_layer['name']] = [None] + list(obj.size())
+            input_layers.append(input_layer['name'])
             n_inputs += 1
 
             layer_counter += 1
@@ -360,8 +325,8 @@ def pytorch_to_hls(config):
 
             # operations that appear repeatedly have '_n' appended to their name for the nth repetition
             operation = node.name
-            if node.name.split("_")[-1].isdigit():
-                operation = "_".join(node.name.split("_")[:-1])
+            if node.name.split('_')[-1].isdigit():
+                operation = '_'.join(node.name.split('_')[:-1])
 
             if operation in layer_name_map:
                 operation = layer_name_map[operation]
@@ -375,12 +340,12 @@ def pytorch_to_hls(config):
             layer_counter += 1
 
             if 'View' in operation:
-                input_names = tuple([str(node.args[0])])
+                input_names = [str(node.args[0])]
             else:
-                input_names = tuple([str(i) for i in node.args])
+                input_names = [str(i) for i in node.args]
 
             # Process the layer
-            input_shapes = [list(output_shapes[str(i)]) for i in list(input_names)]
+            input_shapes = [list(output_shapes[str(i)]) for i in input_names]
 
             layer, output_shape = layer_handlers[operation](
                 operation, layer_name, input_names, input_shapes, node, None, reader, config
@@ -392,9 +357,8 @@ def pytorch_to_hls(config):
             assert output_shape is not None
             output_shapes[layer['name']] = output_shape
 
-    #################
-    # Generate HLS
-    #################
+    if len(input_layers) == 0:
+        input_layers = None
 
     print('Creating HLS model')
     hls_model = ModelGraph(config, layer_list, inputs=input_layers)

From fb036bc1e39cb0f08a5f52e5830f6a39e95117b3 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Sun, 11 Jun 2023 19:22:55 +0200
Subject: [PATCH 04/10] Support Tanh activation

---
 hls4ml/converters/pytorch/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
index 792830821e..7121dbe289 100644
--- a/hls4ml/converters/pytorch/core.py
+++ b/hls4ml/converters/pytorch/core.py
@@ -28,7 +28,7 @@ def parse_linear_layer(operation, layer_name, input_names, input_shapes, node, c
     return layer, output_shape
 
 
-activation_layers = ['Softmax', 'ReLU', 'LeakyReLU', 'Threshold', 'ELU', 'PReLU', 'Sigmoid']
+activation_layers = ['Softmax', 'ReLU', 'LeakyReLU', 'Threshold', 'ELU', 'PReLU', 'Sigmoid', 'Tanh']
 
 
 @pytorch_handler(*activation_layers)

From d07ea4d43f58122ea190da7cbbf1bca5c650ae47 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Sun, 11 Jun 2023 19:27:38 +0200
Subject: [PATCH 05/10] Support merging layers (cat, add, sub, etc)

---
 hls4ml/converters/pytorch/function.py         | 40 ------------
 hls4ml/converters/pytorch/merge.py            | 61 +++++++++++++++++
 hls4ml/model/layers.py                        |  8 +--
 .../passes/convert_to_channels_last.py        |  3 +-
 test/pytest/test_merge_pytorch.py             | 65 +++++++++++++++++++
 5 files changed, 131 insertions(+), 46 deletions(-)
 delete mode 100644 hls4ml/converters/pytorch/function.py
 create mode 100644 hls4ml/converters/pytorch/merge.py
 create mode 100644 test/pytest/test_merge_pytorch.py

diff --git a/hls4ml/converters/pytorch/function.py b/hls4ml/converters/pytorch/function.py
deleted file mode 100644
index 89fc0ecde6..0000000000
--- a/hls4ml/converters/pytorch/function.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from hls4ml.converters.pytorch_to_hls import pytorch_handler
-
-merge_layers = ['Add', 'Subtract', 'Multiply', 'Average', 'Maximum', 'Minimum', 'Cat', 'Dot']
-
-
-@pytorch_handler(*merge_layers)
-def parse_merge_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
-    assert operation in merge_layers
-
-    layer = {}
-    if operation == "Cat":
-        layer['class_name'] = 'Concatenate'
-    else:
-        layer['class_name'] = operation
-    layer['name'] = layer_name
-
-    layer['op'] = operation
-
-    if input_names is not None:
-        layer['inputs'] = input_names
-
-    output_shape = input_shapes[0][:]
-    if layer['class_name'] == 'Concatenate':
-        rank = len(input_shapes[0][1:])
-        if rank > 3:
-            raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
-        layer['op'] = layer['class_name'].lower() + f'{rank}d'
-        layer['axis'] = int(node.args[1])
-        output_shape[layer['axis']] += input_shapes[1][layer['axis']]
-    elif layer['class_name'] == 'Dot':
-        rank = len(input_shapes[0][1:])
-        if rank > 1:
-            raise Exception('ERROR: Dot of tensors with rank > 1 is not yet supported.')
-        layer['op'] = layer['class_name'].lower() + f'{rank}d'
-    else:
-        layer['class_name'] = 'Merge'
-    if len(layer['inputs']) > 2:
-        raise Exception('ERROR: Merging more than two tensors is not yet supported.')
-
-    return layer, output_shape
diff --git a/hls4ml/converters/pytorch/merge.py b/hls4ml/converters/pytorch/merge.py
new file mode 100644
index 0000000000..1f1e11dcb7
--- /dev/null
+++ b/hls4ml/converters/pytorch/merge.py
@@ -0,0 +1,61 @@
+from hls4ml.converters.pytorch_to_hls import pytorch_handler
+
+concat_layers = ['cat', 'concat', 'concatenate']
+
+
+@pytorch_handler(*concat_layers)
+def parse_concat_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert operation in concat_layers
+
+    layer = {}
+    layer['class_name'] = 'Concatenate'
+    layer['name'] = layer_name
+    layer['op'] = 'concatenate'
+    layer['inputs'] = input_names
+
+    if len(layer['inputs']) > 2:
+        raise Exception('ERROR: Merging more than two tensors is not yet supported.')
+
+    rank = len(input_shapes[0][1:])
+    if rank > 3:
+        raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
+    layer['op'] = layer['class_name'].lower() + f'{rank}d'
+    layer['axis'] = node.kwargs.get('dim', 0)
+
+    output_shape = input_shapes[0][:]
+    output_shape[layer['axis']] += input_shapes[1][layer['axis']]
+
+    return layer, output_shape
+
+
+add_layers = ['add']
+multiply_layers = ['mul', 'multiply']
+subtract_layers = ['sub', 'subtract']
+min_layers = ['fmin', 'minimum']
+max_layers = ['fmax', 'maximum']
+merge_layers = [*add_layers, *multiply_layers, *subtract_layers, *min_layers, *max_layers]
+
+
+@pytorch_handler(*merge_layers)
+def parse_merge_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert operation in merge_layers
+
+    layer = {}
+    layer['class_name'] = 'Merge'
+    layer['name'] = layer_name
+    if operation in add_layers:
+        layer['op'] = 'add'
+    elif operation in multiply_layers:
+        layer['op'] = 'multiply'
+    elif operation in subtract_layers:
+        layer['op'] = 'subtract'
+    elif operation in min_layers:
+        layer['op'] = 'minimum'
+    elif operation in max_layers:
+        layer['op'] = 'maximum'
+
+    layer['inputs'] = input_names
+
+    output_shape = input_shapes[0][:]
+
+    return layer, output_shape
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index f1c84aedaf..3154d62291 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -845,11 +845,11 @@ def initialize(self):
         inp1 = self.get_input_variable(self.inputs[0])
         inp2 = self.get_input_variable(self.inputs[1])
         if np.prod(inp2.shape) > np.prod(inp1.shape):
-            shape = inp2.shape
-            dims = inp2.dim_names
+            shape = inp2.shape.copy()
+            dims = inp2.dim_names.copy()
         else:
-            shape = inp1.shape
-            dims = inp1.dim_names
+            shape = inp1.shape.copy()
+            dims = inp1.dim_names.copy()
         self.add_output_variable(shape, dims)
 
 
diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
index 33f3fda0ef..22371eefc3 100644
--- a/hls4ml/model/optimizer/passes/convert_to_channels_last.py
+++ b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
@@ -97,8 +97,7 @@ def transform(self, model, node):
             input_shape = node.get_output_variable().shape
             input_shape.append(input_shape.pop(0))
             node.get_output_variable().shape = input_shape
-            dim_names = node.get_output_variable().dim_names
-            dim_names.append(dim_names.pop(0))
+            dim_names = [f'N_INPUT_{i}_{node.index}' for i in range(1, len(input_shape) + 1)]
             node.get_output_variable().dim_names = dim_names
 
         node.channels_last_converted = True
diff --git a/test/pytest/test_merge_pytorch.py b/test/pytest/test_merge_pytorch.py
new file mode 100644
index 0000000000..2be7fdcd07
--- /dev/null
+++ b/test/pytest/test_merge_pytorch.py
@@ -0,0 +1,65 @@
+from pathlib import Path
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+
+import hls4ml
+
+test_root_path = Path(__file__).parent
+
+
+class MergeModule(nn.Module):
+    def __init__(self, merge_op):
+        super().__init__()
+        self.op = getattr(torch, merge_op)
+
+    def forward(self, x, y):
+        return self.op(x, y)
+
+
+class ConcatModule(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y):
+        # The default, dim=0, is the batch dimension, we set it to -1 to be equivalent to Keras's default
+        return torch.cat([x, y], dim=-1)
+
+
+@pytest.mark.parametrize('merge_op', ['cat', 'add', 'mul', 'sub', 'minimum', 'maximum'])
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
+def test_merge(merge_op, io_type, backend):
+    input_shape = (3, 10, 10)
+    input_shape_cl = input_shape[1:] + input_shape[:1]
+
+    if merge_op == 'cat':  # Meow!
+        model = ConcatModule()
+    else:
+        model = MergeModule(merge_op)
+    model.eval()
+
+    batch_input_shape = (None,) + input_shape
+    config = hls4ml.utils.config_from_pytorch_model(
+        model, default_precision='ap_fixed<32,16>', inputs_channel_last=True, transpose_outputs=False
+    )
+    output_dir = str(test_root_path / f'hls4mlprj_merge_pytorch_{merge_op}_{backend}_{io_type}')
+    hls_model = hls4ml.converters.convert_from_pytorch_model(
+        model,
+        [batch_input_shape, batch_input_shape],
+        hls_config=config,
+        output_dir=output_dir,
+        io_type=io_type,
+        backend=backend,
+    )
+    hls_model.compile()
+
+    X_input1 = np.random.rand(100, *input_shape_cl)
+    X_input2 = np.random.rand(100, *input_shape_cl)
+
+    pytorch_prediction = model(torch.Tensor(X_input1), torch.Tensor(X_input2)).detach().numpy()
+    hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(pytorch_prediction.shape)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=0, atol=0.001)

From 287c5f2a43c7c196431336d9d54abbb6489c5ce0 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Sun, 11 Jun 2023 19:33:46 +0200
Subject: [PATCH 06/10] Support Linear op over multidimensional input

---
 hls4ml/converters/pytorch/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
index 7121dbe289..e45e3c9fae 100644
--- a/hls4ml/converters/pytorch/core.py
+++ b/hls4ml/converters/pytorch/core.py
@@ -23,7 +23,8 @@ def parse_linear_layer(operation, layer_name, input_names, input_shapes, node, c
     else:
         layer['use_bias'] = True
 
-    output_shape = [input_shapes[0][0], layer['n_out']]
+    output_shape = input_shapes[0][:]
+    output_shape[-1] = layer['n_out']
 
     return layer, output_shape
 

From 67bf9a5967557193eb53d34522c290cdd63e18b3 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Thu, 15 Jun 2023 12:57:12 +0200
Subject: [PATCH 07/10] Properly handle boolean attributes in templates

---
 hls4ml/backends/template.py | 14 ++++++++++----
 hls4ml/model/layers.py      |  2 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/hls4ml/backends/template.py b/hls4ml/backends/template.py
index 4ce6c5c8e6..5da9bce04a 100644
--- a/hls4ml/backends/template.py
+++ b/hls4ml/backends/template.py
@@ -26,6 +26,14 @@ def format(self, node):
     def get_name(self):
         return self.name
 
+    def _default_params(self, node):
+        params = {}
+        params.update(node.attributes)
+        # Convert all bool attributes to lowercase strings
+        params = {key: str(val).lower() if type(val) == bool else val for key, val in params.items()}
+
+        return params
+
 
 class LayerConfigTemplate(Template):
     def __init__(self, layer_class):
@@ -37,8 +45,7 @@ def __init__(self, layer_class):
         super().__init__(name, layer_class, 'config_cpp')
 
     def _default_config_params(self, layer):
-        params = {}
-        params.update(layer.attributes)
+        params = self._default_params(layer)
         params['iotype'] = layer.model.config.get_config_value('IOType')
         params['reuse'] = layer.get_attr('reuse_factor')
 
@@ -59,8 +66,7 @@ def __init__(self, layer_class, include_header=None):
             self.include_header = include_header
 
     def _default_function_params(self, layer):
-        params = {}
-        params.update(layer.attributes)
+        params = self._default_params(layer)
         params['config'] = f'config{layer.index}'
         params['input_t'] = layer.get_input_variable().type.name
         params['output_t'] = layer.get_output_variable().type.name
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 3154d62291..e0ef7736e4 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -626,6 +626,7 @@ class Pooling1D(Layer):
         Attribute('stride_width'),
         Attribute('pad_left'),
         Attribute('pad_right'),
+        Attribute('count_pad', value_type=bool, default=False),
         ChoiceAttribute('pool_op', ['Max', 'Average'], configurable=False),
     ]
 
@@ -655,6 +656,7 @@ class Pooling2D(Layer):
         Attribute('pad_bottom'),
         Attribute('pad_left'),
         Attribute('pad_right'),
+        Attribute('count_pad', value_type=bool, default=False),
         ChoiceAttribute('pool_op', ['Max', 'Average'], configurable=False),
     ]
 

From 49dabdf3d82cc06c79aaa1928421209a7afff646 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Thu, 15 Jun 2023 21:52:24 +0200
Subject: [PATCH 08/10] Change axis when transforming to channels_last

---
 .../passes/convert_to_channels_last.py        | 18 +++++++++++++++++-
 test/pytest/test_merge_pytorch.py             | 19 +++++++++++++------
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
index 22371eefc3..c37ee5f68c 100644
--- a/hls4ml/model/optimizer/passes/convert_to_channels_last.py
+++ b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
@@ -2,7 +2,7 @@
 # Based on https://github.com/fastmachinelearning/qonnx/blob/
 # 12c96a3ded06beacab08e0f554e4ed014476c0aa/src/qonnx/transformation/channels_last.py
 
-from hls4ml.model.layers import Input
+from hls4ml.model.layers import Concatenate, Input
 from hls4ml.model.optimizer import OptimizerPass
 
 
@@ -62,6 +62,22 @@ def transform(self, model, node):
             except AttributeError:
                 pass
 
+            # Adjust axis of operation
+            if isinstance(node, Concatenate):
+                old_axis = node.get_attr('axis')
+                if len(outshape) == 2:
+                    if old_axis == -1 or old_axis == 2:
+                        node.set_attr('axis', 1)
+                    else:
+                        node.set_attr('axis', 2)
+                elif len(outshape) == 3:
+                    if old_axis == 3 or old_axis == -1:
+                        node.set_attr('axis', 1)
+                    elif old_axis == 2 or old_axis == -2:
+                        node.set_attr('axis', 2)  # Not required, but left for clarity
+                    else:
+                        node.set_attr('axis', 3)
+
             # Adjust output shape
             outdims = node.get_output_variable().dim_names
             if len(outshape) == 2:
diff --git a/test/pytest/test_merge_pytorch.py b/test/pytest/test_merge_pytorch.py
index 2be7fdcd07..17aa4d075e 100644
--- a/test/pytest/test_merge_pytorch.py
+++ b/test/pytest/test_merge_pytorch.py
@@ -24,8 +24,9 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y):
-        # The default, dim=0, is the batch dimension, we set it to -1 to be equivalent to Keras's default
-        return torch.cat([x, y], dim=-1)
+        # In this test the shape will be (batch, 3, 10, 10), but since we test with channels_last data format, this
+        # will be equivalent to the Keras default of concatenation along the last axis (axis=-1)
+        return torch.cat([x, y], dim=1)
 
 
 @pytest.mark.parametrize('merge_op', ['cat', 'add', 'mul', 'sub', 'minimum', 'maximum'])
@@ -33,7 +34,6 @@ def forward(self, x, y):
 @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
 def test_merge(merge_op, io_type, backend):
     input_shape = (3, 10, 10)
-    input_shape_cl = input_shape[1:] + input_shape[:1]
 
     if merge_op == 'cat':  # Meow!
         model = ConcatModule()
@@ -56,10 +56,17 @@ def test_merge(merge_op, io_type, backend):
     )
     hls_model.compile()
 
-    X_input1 = np.random.rand(100, *input_shape_cl)
-    X_input2 = np.random.rand(100, *input_shape_cl)
+    X_input1 = np.random.rand(100, *input_shape)
+    X_input2 = np.random.rand(100, *input_shape)
+
+    X_input1_cl = np.ascontiguousarray(np.transpose(X_input1, axes=[0, 2, 3, 1]))
+    X_input2_cl = np.ascontiguousarray(np.transpose(X_input2, axes=[0, 2, 3, 1]))
 
     pytorch_prediction = model(torch.Tensor(X_input1), torch.Tensor(X_input2)).detach().numpy()
-    hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(pytorch_prediction.shape)
+    hls_prediction = hls_model.predict([X_input1_cl, X_input2_cl])
+
+    output_shape = pytorch_prediction.shape
+    output_shape_cl = [output_shape[0], output_shape[2], output_shape[3], output_shape[1]]
+    hls_prediction = np.transpose(hls_prediction.reshape(output_shape_cl), axes=[0, 3, 1, 2])
 
     np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=0, atol=0.001)

From 578017889fb23b306a7a84670936d2f659fdd6d1 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Thu, 15 Jun 2023 23:11:04 +0200
Subject: [PATCH 09/10] Reorganize code to make Jan happy

---
 .../passes/convert_to_channels_last.py        | 51 +++++++++----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
index c37ee5f68c..cef4d947d1 100644
--- a/hls4ml/model/optimizer/passes/convert_to_channels_last.py
+++ b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
@@ -20,29 +20,32 @@ def transform(self, model, node):
             node.channels_last_converted = True
             return False
         outshape = node.get_output_variable().shape
-        # if inputs are not yet transposed into channels_last, add transpose layer
-        if (
-            not model.config.config['HLSConfig']['Model']['InputsChannelLast']
-            and isinstance(node, Input)
-            and len(outshape) > 1
-        ):
-            # Add transpose for input layer
-            input = node.name
-            if len(outshape) == 2:
-                attributes = {'perm': [1, 0]}
-            else:
-                attributes = {'perm': [1, 2, 0]}
 
-            transpose_node = model.make_node(
-                'Transpose', f'transpose_input_for_{node.get_attr("name")}', attributes, [input]
-            )
-            transpose_node.set_attr('name', f'transpose_input_for_{node.get_attr("name")}')
-            transpose_node.channels_last_converted = True
+        if isinstance(node, Input):
+            # if inputs are not yet transposed into channels_last, add transpose layer
+            if not model.config.config['HLSConfig']['Model']['InputsChannelLast'] and len(outshape) > 1:
+                # Add transpose for input layer
+                input = node.name
+                if len(outshape) == 2:
+                    attributes = {'perm': [1, 0]}
+                else:
+                    attributes = {'perm': [1, 2, 0]}
 
-            model.insert_node(transpose_node)
+                transpose_node = model.make_node(
+                    'Transpose', f'transpose_input_for_{node.get_attr("name")}', attributes, [input]
+                )
+                transpose_node.set_attr('name', f'transpose_input_for_{node.get_attr("name")}')
+                transpose_node.channels_last_converted = True
 
-        if not isinstance(node, Input):
-            # Transpose tensors tensors
+                model.insert_node(transpose_node)
+            else:
+                input_shape = node.get_output_variable().shape
+                input_shape.append(input_shape.pop(0))
+                node.get_output_variable().shape = input_shape
+                dim_names = [f'N_INPUT_{i}_{node.index}' for i in range(1, len(input_shape) + 1)]
+                node.get_output_variable().dim_names = dim_names
+        else:
+            # Transpose weight tensors
             tensors = ['weight', 'depthwise', 'pointwise', 'zero_bias', 'scale', 'recurrent_weight']
             for tensor in tensors:
                 try:
@@ -89,7 +92,7 @@ def transform(self, model, node):
                 dims = [outdims[1], outdims[2], outdims[0]]
                 node.add_output_variable(shape, dims)
 
-            # add transpose for output layer
+            # Add transpose for output layer
             if (
                 node.get_attr("name") in model.outputs
                 and len(outshape) > 1
@@ -109,12 +112,6 @@ def transform(self, model, node):
                 transpose_node.channels_last_converted = True
 
                 model.insert_node(transpose_node)
-        else:
-            input_shape = node.get_output_variable().shape
-            input_shape.append(input_shape.pop(0))
-            node.get_output_variable().shape = input_shape
-            dim_names = [f'N_INPUT_{i}_{node.index}' for i in range(1, len(input_shape) + 1)]
-            node.get_output_variable().dim_names = dim_names
 
         node.channels_last_converted = True
         return True

From 6f28a410b5e2c0c35f5a66dbd64adada1a464165 Mon Sep 17 00:00:00 2001
From: Vladimir Loncar <vloncar@users.noreply.github.com>
Date: Thu, 15 Jun 2023 23:37:48 +0200
Subject: [PATCH 10/10] Fix cound_pad type in pytorch converter

---
 hls4ml/converters/pytorch/pooling.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hls4ml/converters/pytorch/pooling.py b/hls4ml/converters/pytorch/pooling.py
index 133178d7c8..3076f1f38a 100644
--- a/hls4ml/converters/pytorch/pooling.py
+++ b/hls4ml/converters/pytorch/pooling.py
@@ -23,11 +23,11 @@ def parse_pooling_layer(operation, layer_name, input_names, input_shapes, node,
     layer['data_format'] = 'channels_first'  # Pytorch default (can't change)
     if node.op == 'call_module' and 'Avg' in operation:
         if class_object.count_include_pad:
-            layer['count_pad'] = 'true'
+            layer['count_pad'] = True
         else:
-            layer['count_pad'] = 'false'
+            layer['count_pad'] = False
     else:
-        layer['count_pad'] = 'true'
+        layer['count_pad'] = True
 
     if int(layer['class_name'][-2]) == 1:
         (layer['n_in'], layer['n_filt']) = parse_data_format(input_shapes[0], layer['data_format'])