Skip to content

Commit

Permalink
Implement multi-dimensional reduction and refactor cuTENSOR support (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
tbennun authored Mar 21, 2024
1 parent f3172ac commit 811af60
Show file tree
Hide file tree
Showing 20 changed files with 691 additions and 90 deletions.
1 change: 1 addition & 0 deletions ReleaseNotes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Support for new training algorithms:
Support for new network structures:

Support for new layers:
- Multi-dimensional reduction (requires cuTENSOR)

Python front-end:

Expand Down
55 changes: 55 additions & 0 deletions ci_test/unit_tests/test_unit_layer_multidim_reduction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import lbann
import numpy as np
import test_util
import pytest


@test_util.lbann_test(check_gradients=True)
def test_multidim_reduction():
if not lbann.has_feature('CUTENSOR'):
pytest.skip('Test requires LBANN to be built with cuTENSOR')

# Prepare reference output
np.random.seed(20240228)
shape = [25, 3, 4, 5, 6]
x = np.random.rand(*shape).astype(np.float32)
ref = x.sum(axis=(2, 4))

tester = test_util.ModelTester()

x = tester.inputs(x)
reference = tester.make_reference(ref)

# Test layer
# Note that the axes here are different (as the mini-batch dimension is
# ignored).
y = lbann.MultiDimReduction(x, axes=(1, 3))

# Set test loss
tester.set_loss(lbann.MeanSquaredError(y, reference))
tester.set_check_gradients_tensor(lbann.Square(y))
return tester


@test_util.lbann_test(check_gradients=False)
def test_multidim_reduction_max():
if not lbann.has_feature('CUTENSOR'):
pytest.skip('Test requires LBANN to be built with cuTENSOR')

# Prepare reference output
np.random.seed(20240228)
shape = [25, 3, 4, 5, 6]
x = np.random.rand(*shape).astype(np.float32)
ref = x.max(axis=(3, 1))

tester = test_util.ModelTester()

x = tester.inputs(x)
reference = tester.make_reference(ref)

# Test layer
y = lbann.MultiDimReduction(x, axes=(2, 0), mode='max')

# Set test loss
tester.set_loss(lbann.MeanSquaredError(y, reference))
return tester
36 changes: 36 additions & 0 deletions docs/layers/transform_layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
:ref:`Hadamard`, "Entry-wise tensor product"
:ref:`IdentityZero`, "Identity/zero function if layer is unfrozen/frozen."
:ref:`InTopK`, "One-hot vector indicating top-k entries"
:ref:`MultiDimReduction`, "Reduce certain dimensions of a given tensor"
:ref:`Pooling`, "Traverses the spatial dimensions of a data tensor
with a sliding window and applies a reduction operation"
:ref:`Reduction`, "Reduce tensor to scalar"
Expand Down Expand Up @@ -369,6 +370,41 @@ Arguments:
________________________________________


.. _MultiDimReduction:

----------------------------------------
MultiDimReduction
----------------------------------------

The :python:`MultiDimReduction` layer reduces one or more dimensions in the
given tensor. The reduction operation can be chosen separately.

It expects one input tensor of order N, and an up to length N array of
reduced axes [0..N-1], with respect to the input tensor
dimensions (not including the mini-batch dimension). Therefore, passing
``axes=[0,1,2]`` for a rank-3 tensor will reduce the entire tensor into a
scalar.

Arguments:

:mode:

(``string``, optional) Reduction operation

Options: ``sum`` (default), ``product``, ``max``, or ``min``.
Only ``sum`` is currently supported for backpropagation.

:axes:

(``uint32``) Reduced tensor dimensions

List of integers

:ref:`Back to Top<transform-layers>`

________________________________________


.. _Pooling:

----------------------------------------
Expand Down
1 change: 1 addition & 0 deletions include/lbann/layers/transform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ set_full_path(THIS_DIR_HEADERS
tessellate.hpp
scatter.hpp
gather.hpp
multidim_reduction.hpp
)

if (LBANN_HAS_DISTCONV AND LBANN_HAS_NVSHMEM)
Expand Down
148 changes: 148 additions & 0 deletions include/lbann/layers/transform/multidim_reduction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory.
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
//
// LLNL-CODE-697807.
// All rights reserved.
//
// This file is part of LBANN: Livermore Big Artificial Neural Network
// Toolkit. For details, see http://software.llnl.gov/LBANN or
// https://github.com/LLNL/LBANN.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#ifndef LBANN_LAYER_MULTIDIM_REDUCTION_HPP_INCLUDED
#define LBANN_LAYER_MULTIDIM_REDUCTION_HPP_INCLUDED

#include <sstream>
#include <unordered_map>
#include <vector>

#include "lbann/layers/data_type_layer.hpp"

namespace lbann {

enum class multidim_reduction_mode
{
INVALID,
SUM,
PRODUCT,
MAX,
MIN,
};

/** @brief Reduce tensor to scalar
*
* @todo Reduction over specified dimensions.
*/
template <typename TensorDataType, data_layout Layout, El::Device Device>
class multidim_reduction_layer : public data_type_layer<TensorDataType>
{
private:
/** Reduction dimensions. */
std::vector<unsigned int> m_axes;

/** Reduction mode. */
multidim_reduction_mode m_mode;

/** Reduced dimensions */
std::vector<int32_t> m_input_modes, m_output_modes;

public:
multidim_reduction_layer(
const std::vector<unsigned int>& axes = {},
multidim_reduction_mode mode = multidim_reduction_mode::SUM);

multidim_reduction_layer* copy() const override
{
return new multidim_reduction_layer(*this);
}

/** @name Serialization */
///@{

template <typename ArchiveT>
void serialize(ArchiveT& ar);

///@}

std::string get_type() const override { return "multidim reduction"; }
data_layout get_data_layout() const override { return Layout; }
El::Device get_device_allocation() const override { return Device; }
bool can_run_inplace() const override { return false; }
int get_backprop_requirements() const override { return ERROR_SIGNALS; }

description get_description() const override
{
auto desc = data_type_layer<TensorDataType>::get_description();
std::string mode_str;
switch (m_mode) {
case multidim_reduction_mode::SUM:
mode_str = "sum";
break;
case multidim_reduction_mode::PRODUCT:
mode_str = "product";
break;
case multidim_reduction_mode::MAX:
mode_str = "max";
break;
case multidim_reduction_mode::MIN:
mode_str = "min";
break;
case multidim_reduction_mode::INVALID:
default:
mode_str = "invalid";
}
desc.add("Mode", mode_str);

std::stringstream axes_str;
for (size_t i = 0; i < m_axes.size(); ++i) {
axes_str << m_axes[i];
if (i != m_axes.size() - 1) {
axes_str << ", ";
}
}
desc.add("Axes", axes_str.str());

return desc;
}

protected:
/** Add layer specific data to prototext */
void write_specific_proto(lbann_data::Layer& proto) const final;

void setup_dims() override;

void fp_compute() override;

void bp_compute() override;
};

#ifndef LBANN_MULTIDIM_REDUCTION_LAYER_INSTANTIATE
#define PROTO_DEVICE(T, Device) \
extern template class multidim_reduction_layer<T, \
data_layout::DATA_PARALLEL, \
Device>; \
extern template class multidim_reduction_layer<T, \
data_layout::MODEL_PARALLEL, \
Device>
#include "lbann/macros/instantiate_device.hpp"
#undef PROTO_DEVICE
#endif // LBANN_MULTIDIM_REDUCTION_LAYER_INSTANTIATE

} // namespace lbann

#endif // LBANN_LAYER_REDUCTION_HPP_INCLUDED
1 change: 1 addition & 0 deletions include/lbann/layers/transform/transform_builders.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ LBANN_DEFINE_LAYER_BUILDER(gaussian);
LBANN_DEFINE_LAYER_BUILDER(hadamard);
LBANN_DEFINE_LAYER_BUILDER(identity_zero);
LBANN_DEFINE_LAYER_BUILDER(in_top_k);
LBANN_DEFINE_LAYER_BUILDER(multidim_reduction);
LBANN_DEFINE_LAYER_BUILDER(permute);
LBANN_DEFINE_LAYER_BUILDER(pooling);
LBANN_DEFINE_LAYER_BUILDER(reduction);
Expand Down
5 changes: 5 additions & 0 deletions include/lbann/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ set_full_path(THIS_DIR_HEADERS
sync_info_helpers.hpp
system_info.hpp
tensor.hpp
tensor_dims_utils.hpp
tensor_impl.hpp
timer.hpp
trainer_file_utils.hpp
Expand Down Expand Up @@ -114,6 +115,10 @@ if (LBANN_HAS_CNPY)
list(APPEND THIS_DIR_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/cnpy_utils.hpp")
endif ()

if (LBANN_HAS_CUTENSOR)
list(APPEND THIS_DIR_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/cutensor_support.hpp")
endif ()

# Add the subdirectories
add_subdirectory(threads)
add_subdirectory(impl)
Expand Down
Loading

0 comments on commit 811af60

Please sign in to comment.