Skip to content

Commit

Permalink
Merge pull request #23614 from Abdurrahheem:lstm_layout_attribute
Browse files Browse the repository at this point in the history
LSTM ONNX Layout Attribute Support #23614 

### Explanation

This PR contains necessary changes to support `layout` attribute. This attributes is present in [ONNX](https://github.com/onnx/onnx/blob/main/docs/Operators.md#lstm) and [Torch](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#lstm) (in touch it is name as `batch_first=True`) libraries. When `layout = 1` input to LSTM layer is expected to have batch dimension first -> `[batch_size, sequence_length, features]` vs `layout = 0` - default `[sequence_length, batch_size, features]`

### Test Data

Test data and data generator for PR located here [#1063](opencv/opencv_extra#1063)

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
  • Loading branch information
Abdurrahheem authored May 17, 2023
1 parent d2618bf commit d2143bc
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 8 deletions.
56 changes: 50 additions & 6 deletions modules/dnn/src/layers/recurrent_layers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,19 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
MatShape outTailShape; //shape of single output sample
MatShape outTsShape; //shape of N output samples

enum layout_t : int {
SEQ_BATCH_HID = 0,
BATCH_SEQ_HID = 1
};

bool useTimestampDim;
bool produceCellOutput;
float forgetBias, cellClip;
bool useCellClip, usePeephole;
bool reverse; // If true, go in negative direction along the time axis
bool bidirectional; // If true, produces both forward and reversed directions along time axis
layout_t layout; // If layout == BATCH_SEQ_HID, uses batch_size x seq_length x num_hidden for input and output
// else uses seq_length x batch_size x num_hidden

ActivationFunction f_activation;
ActivationFunction g_activation;
Expand Down Expand Up @@ -198,6 +205,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
}
}
}
layout = (layout_t) params.get<int>("layout", SEQ_BATCH_HID);
useTimestampDim = params.get<bool>("use_timestamp_dim", true);
produceCellOutput = params.get<bool>("produce_cell_output", false);
forgetBias = params.get<float>("forget_bias", 0.0f);
Expand Down Expand Up @@ -291,8 +299,13 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
if (useTimestampDim)
{
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
_numSamples = inp0[1];
outResShape.push_back(inp0[0]);
if (layout == SEQ_BATCH_HID) {
_numSamples = inp0[1];
outResShape.push_back(inp0[0]);
} else {
_numSamples = inp0[0];
outResShape.push_back(inp0[1]);
}
}
else
{
Expand Down Expand Up @@ -349,8 +362,13 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
if (useTimestampDim)
{
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
numTimeStamps = inp0.size[0];
numSamples = inp0.size[1];
if (layout == SEQ_BATCH_HID){
numTimeStamps = inp0.size[0];
numSamples = inp0.size[1];
}else{
numTimeStamps = inp0.size[1];
numSamples = inp0.size[0];
}
}
else
{
Expand Down Expand Up @@ -383,6 +401,21 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);

if (layout == BATCH_SEQ_HID){
//swap axis 0 and 1 input x
cv::Mat tmp;
// Since python input is 4 dimentional and C++ input 3 dimentinal
// we need to proccess each differently
if (input[0].dims == 4){
// here !!!
CV_Assert(input[0].size[3] == 1);
cv::transposeND(input[0], {1, 0, 2, 3}, tmp); //back to seq_len, batch_size, hidden_size format
}else{
cv::transposeND(input[0], {1, 0, 2}, tmp); //back to seq_len, batch_size, hidden_size format
}
input[0] = tmp;
}

Mat cOut = produceCellOutput ? output[0].clone() : Mat();
const bool needYcTransform = !originalBlobs.empty(); // if the producer is onnx
const int numDirs = 1 + static_cast<int>(bidirectional);
Expand Down Expand Up @@ -599,7 +632,12 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
cInternal.copyTo(cOutTs.rowRange(curRowRange));
}
}

// transpose to match batch first output
if (layout == BATCH_SEQ_HID){
cv::Mat tmp;
cv::transposeND(output[0], {1, 0, 2}, tmp);
output[0] = tmp;
}
if (needYcTransform && produceCellOutput)
{
fixCellState(cOut, numDirs);
Expand All @@ -618,7 +656,13 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer

// permute to {0, 2, 1, 3};
cv::Mat newCellState;
cv::transposeND(cOut, {0, 2, 1, 3}, newCellState);
// transpose to match batch first output
if (layout == BATCH_SEQ_HID){
cv::transposeND(cOut, {2, 0, 1, 3}, newCellState);
}
else{
cv::transposeND(cOut, {0, 2, 1, 3}, newCellState);
}
cOut = newCellState;

if (numDirs == 1)
Expand Down
12 changes: 10 additions & 2 deletions modules/dnn/src/onnx/onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1637,8 +1637,16 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
CV_Assert(shapeIt != outShapes.end());
const MatShape x_shape = shapeIt->second;

const int seq_length = x_shape[0];
const int batch_size = x_shape[1];
//if layout is 1, change batch and sequence dims
const int layout = layerParams.get<int>("layout", 0);
int batch_size, seq_length;
if (layout == 1){
batch_size = x_shape[0];
seq_length = x_shape[1];
}else{
seq_length = x_shape[0];
batch_size = x_shape[1];
}
const int input_size = x_shape[2];
const int hidden_size = layerParams.get<int>("hidden_size");
const int num_directions = constBlobs[lstm_proto.input(1)].size[0];
Expand Down
14 changes: 14 additions & 0 deletions modules/dnn/test/test_onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,20 @@ TEST_P(Test_ONNX_layers, LSTM_init_h0_c0)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
testONNXModels("lstm_init_h0_c0", npy, 0, 0, false, false, 3);
}
// epsilon is larger because onnx does not match with torch/opencv exactly
TEST_P(Test_ONNX_layers, LSTM_layout_seq)
{
if(backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
testONNXModels("lstm_layout_0", npy, 0.005, 0.005, false, false, 3);
}
// epsilon is larger because onnx does not match with torch/opencv exactly
TEST_P(Test_ONNX_layers, LSTM_layout_batch)
{
if(backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
testONNXModels("lstm_layout_1", npy, 0.005, 0.005, false, false, 3);
}

TEST_P(Test_ONNX_layers, Pad2d_Unfused)
{
Expand Down

0 comments on commit d2143bc

Please sign in to comment.