Implement multi-dimensional reduction and refactor cuTENSOR support (#…

…2430)
LLNL · Mar 21, 2024 · 811af60 · 811af60
1 parent f3172ac
commit 811af60
Show file tree

Hide file tree

Showing 20 changed files with 691 additions and 90 deletions.
diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
@@ -6,6 +6,7 @@ Support for new training algorithms:
 Support for new network structures:
 
 Support for new layers:
+ - Multi-dimensional reduction (requires cuTENSOR)
 
 Python front-end:
 

diff --git a/ci_test/unit_tests/test_unit_layer_multidim_reduction.py b/ci_test/unit_tests/test_unit_layer_multidim_reduction.py
@@ -0,0 +1,55 @@
+import lbann
+import numpy as np
+import test_util
+import pytest
+
+
+@test_util.lbann_test(check_gradients=True)
+def test_multidim_reduction():
+    if not lbann.has_feature('CUTENSOR'):
+        pytest.skip('Test requires LBANN to be built with cuTENSOR')
+
+    # Prepare reference output
+    np.random.seed(20240228)
+    shape = [25, 3, 4, 5, 6]
+    x = np.random.rand(*shape).astype(np.float32)
+    ref = x.sum(axis=(2, 4))
+
+    tester = test_util.ModelTester()
+
+    x = tester.inputs(x)
+    reference = tester.make_reference(ref)
+
+    # Test layer
+    # Note that the axes here are different (as the mini-batch dimension is
+    # ignored).
+    y = lbann.MultiDimReduction(x, axes=(1, 3))
+
+    # Set test loss
+    tester.set_loss(lbann.MeanSquaredError(y, reference))
+    tester.set_check_gradients_tensor(lbann.Square(y))
+    return tester
+
+
+@test_util.lbann_test(check_gradients=False)
+def test_multidim_reduction_max():
+    if not lbann.has_feature('CUTENSOR'):
+        pytest.skip('Test requires LBANN to be built with cuTENSOR')
+
+    # Prepare reference output
+    np.random.seed(20240228)
+    shape = [25, 3, 4, 5, 6]
+    x = np.random.rand(*shape).astype(np.float32)
+    ref = x.max(axis=(3, 1))
+
+    tester = test_util.ModelTester()
+
+    x = tester.inputs(x)
+    reference = tester.make_reference(ref)
+
+    # Test layer
+    y = lbann.MultiDimReduction(x, axes=(2, 0), mode='max')
+
+    # Set test loss
+    tester.set_loss(lbann.MeanSquaredError(y, reference))
+    return tester
diff --git a/docs/layers/transform_layers.rst b/docs/layers/transform_layers.rst
@@ -28,6 +28,7 @@
    :ref:`Hadamard`, "Entry-wise tensor product"
    :ref:`IdentityZero`, "Identity/zero function if layer is unfrozen/frozen."
    :ref:`InTopK`, "One-hot vector indicating top-k entries"
+   :ref:`MultiDimReduction`, "Reduce certain dimensions of a given tensor"
    :ref:`Pooling`, "Traverses the spatial dimensions of a data tensor
    with a sliding window and applies a reduction operation"
    :ref:`Reduction`, "Reduce tensor to scalar"
@@ -369,6 +370,41 @@ Arguments:
 ________________________________________
 
 
+.. _MultiDimReduction:
+
+----------------------------------------
+MultiDimReduction
+----------------------------------------
+
+The :python:`MultiDimReduction` layer reduces one or more dimensions in the
+given tensor. The reduction operation can be chosen separately.
+
+It expects one input tensor of order N, and an up to length N array of
+reduced axes [0..N-1], with respect to the input tensor
+dimensions (not including the mini-batch dimension). Therefore, passing 
+``axes=[0,1,2]`` for a rank-3 tensor will reduce the entire tensor into a
+scalar.
+
+Arguments:
+
+   :mode:
+
+      (``string``, optional) Reduction operation
+
+      Options: ``sum`` (default), ``product``, ``max``, or ``min``.
+      Only ``sum`` is currently supported for backpropagation.
+
+   :axes:
+
+      (``uint32``) Reduced tensor dimensions 
+
+      List of integers
+
+:ref:`Back to Top<transform-layers>`
+
+________________________________________
+
+
 .. _Pooling:
 
 ----------------------------------------

diff --git a/include/lbann/layers/transform/CMakeLists.txt b/include/lbann/layers/transform/CMakeLists.txt
@@ -55,6 +55,7 @@ set_full_path(THIS_DIR_HEADERS
   tessellate.hpp
   scatter.hpp
   gather.hpp
+  multidim_reduction.hpp
   )
 
 if (LBANN_HAS_DISTCONV AND LBANN_HAS_NVSHMEM)

diff --git a/include/lbann/layers/transform/multidim_reduction.hpp b/include/lbann/layers/transform/multidim_reduction.hpp
@@ -0,0 +1,148 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_LAYER_MULTIDIM_REDUCTION_HPP_INCLUDED
+#define LBANN_LAYER_MULTIDIM_REDUCTION_HPP_INCLUDED
+
+#include <sstream>
+#include <unordered_map>
+#include <vector>
+
+#include "lbann/layers/data_type_layer.hpp"
+
+namespace lbann {
+
+enum class multidim_reduction_mode
+{
+  INVALID,
+  SUM,
+  PRODUCT,
+  MAX,
+  MIN,
+};
+
+/** @brief Reduce tensor to scalar
+ *
+ *  @todo Reduction over specified dimensions.
+ */
+template <typename TensorDataType, data_layout Layout, El::Device Device>
+class multidim_reduction_layer : public data_type_layer<TensorDataType>
+{
+private:
+  /** Reduction dimensions. */
+  std::vector<unsigned int> m_axes;
+
+  /** Reduction mode. */
+  multidim_reduction_mode m_mode;
+
+  /** Reduced dimensions */
+  std::vector<int32_t> m_input_modes, m_output_modes;
+
+public:
+  multidim_reduction_layer(
+    const std::vector<unsigned int>& axes = {},
+    multidim_reduction_mode mode = multidim_reduction_mode::SUM);
+
+  multidim_reduction_layer* copy() const override
+  {
+    return new multidim_reduction_layer(*this);
+  }
+
+  /** @name Serialization */
+  ///@{
+
+  template <typename ArchiveT>
+  void serialize(ArchiveT& ar);
+
+  ///@}
+
+  std::string get_type() const override { return "multidim reduction"; }
+  data_layout get_data_layout() const override { return Layout; }
+  El::Device get_device_allocation() const override { return Device; }
+  bool can_run_inplace() const override { return false; }
+  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
+
+  description get_description() const override
+  {
+    auto desc = data_type_layer<TensorDataType>::get_description();
+    std::string mode_str;
+    switch (m_mode) {
+    case multidim_reduction_mode::SUM:
+      mode_str = "sum";
+      break;
+    case multidim_reduction_mode::PRODUCT:
+      mode_str = "product";
+      break;
+    case multidim_reduction_mode::MAX:
+      mode_str = "max";
+      break;
+    case multidim_reduction_mode::MIN:
+      mode_str = "min";
+      break;
+    case multidim_reduction_mode::INVALID:
+    default:
+      mode_str = "invalid";
+    }
+    desc.add("Mode", mode_str);
+
+    std::stringstream axes_str;
+    for (size_t i = 0; i < m_axes.size(); ++i) {
+      axes_str << m_axes[i];
+      if (i != m_axes.size() - 1) {
+        axes_str << ", ";
+      }
+    }
+    desc.add("Axes", axes_str.str());
+
+    return desc;
+  }
+
+protected:
+  /** Add layer specific data to prototext */
+  void write_specific_proto(lbann_data::Layer& proto) const final;
+
+  void setup_dims() override;
+
+  void fp_compute() override;
+
+  void bp_compute() override;
+};
+
+#ifndef LBANN_MULTIDIM_REDUCTION_LAYER_INSTANTIATE
+#define PROTO_DEVICE(T, Device)                                                \
+  extern template class multidim_reduction_layer<T,                            \
+                                                 data_layout::DATA_PARALLEL,   \
+                                                 Device>;                      \
+  extern template class multidim_reduction_layer<T,                            \
+                                                 data_layout::MODEL_PARALLEL,  \
+                                                 Device>
+#include "lbann/macros/instantiate_device.hpp"
+#undef PROTO_DEVICE
+#endif // LBANN_MULTIDIM_REDUCTION_LAYER_INSTANTIATE
+
+} // namespace lbann
+
+#endif // LBANN_LAYER_REDUCTION_HPP_INCLUDED
diff --git a/include/lbann/layers/transform/transform_builders.hpp b/include/lbann/layers/transform/transform_builders.hpp
@@ -46,6 +46,7 @@ LBANN_DEFINE_LAYER_BUILDER(gaussian);
 LBANN_DEFINE_LAYER_BUILDER(hadamard);
 LBANN_DEFINE_LAYER_BUILDER(identity_zero);
 LBANN_DEFINE_LAYER_BUILDER(in_top_k);
+LBANN_DEFINE_LAYER_BUILDER(multidim_reduction);
 LBANN_DEFINE_LAYER_BUILDER(permute);
 LBANN_DEFINE_LAYER_BUILDER(pooling);
 LBANN_DEFINE_LAYER_BUILDER(reduction);

diff --git a/include/lbann/utils/CMakeLists.txt b/include/lbann/utils/CMakeLists.txt
@@ -79,6 +79,7 @@ set_full_path(THIS_DIR_HEADERS
   sync_info_helpers.hpp
   system_info.hpp
   tensor.hpp
+  tensor_dims_utils.hpp
   tensor_impl.hpp
   timer.hpp
   trainer_file_utils.hpp
@@ -114,6 +115,10 @@ if (LBANN_HAS_CNPY)
   list(APPEND THIS_DIR_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/cnpy_utils.hpp")
 endif ()
 
+if (LBANN_HAS_CUTENSOR)
+  list(APPEND THIS_DIR_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/cutensor_support.hpp")
+endif ()
+
 # Add the subdirectories
 add_subdirectory(threads)
 add_subdirectory(impl)