[Semi-auto] add elementwise spmd rule for auto parallel (PaddlePaddle…

…#54373) * add some basic functions * add elementwise rule for auto parallel * add unit test for elementwise rule * fix the lib name in spmd rule test cmake file * fix some bugs * add unit tests for elementwise spmd rule in python * bug fix * delete cpp unit test for elementwise spmd rule (use python ut now) * add cpp unit test for elementwise rule * use concrete op name in unit test * fix typo * fix code style * delete cpp unit test * add more details in log
cqulilujia · Jul 24, 2023 · b187994 · b187994
1 parent 6e30620
commit b187994
Show file tree

Hide file tree

Showing 9 changed files with 668 additions and 22 deletions.
diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
@@ -1,9 +1,10 @@
+add_subdirectory(spmd_rules)
+
 cc_library(
   op_dist_attr
   SRCS dist_attr.cc
   DEPS phi auto_parallel_proto proto_desc)
 
-cc_library(auto_parallel DEPS op_dist_attr spmd_rule)
+cc_library(auto_parallel DEPS op_dist_attr spmd_rules)
 
 add_subdirectory(test)
-add_subdirectory(spmd_rules)
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt
@@ -1,5 +1,6 @@
-file(GLOB SPMD_SRCS "*.cc")
+file(GLOB spmd_srcs *.cc)
+
 cc_library(
-  spmd_rule
-  SRCS ${SPMD_SRCS}
+  spmd_rules
+  SRCS ${spmd_srcs}
   DEPS phi)
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
@@ -39,7 +39,7 @@ SPMDRuleBase::InferBackward(const std::vector<DistTensorSpec>& output_specs,
 }
 
 std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
-    const std::vector<std::pair<const std::string, const std::vector<int64_t>>>&
+    const std::vector<std::pair<std::string, std::vector<int64_t>>>&
         tensor_axes_to_dim_pairs) {
   std::unordered_map<std::string, int64_t> axis_to_dim_map;
   std::unordered_map<int64_t, std::string> dim_to_axis_map;
@@ -168,6 +168,56 @@ TensorDistAttr ReplicatedOnMesh(const TensorDistAttr& src_dist_attr) {
   return replicated_dist_attr;
 }
 
+void VerifySpecs(const std::vector<DistTensorSpec>& specs,
+                 const std::string& op_name) {
+  for (size_t i = 0, n = specs.size(); i < n; ++i) {
+    std::vector<int64_t> shape = specs[i].shape();
+    std::vector<int64_t> dims_mapping = specs[i].dims_mapping();
+    PADDLE_ENFORCE_EQ(shape.size(),
+                      dims_mapping.size(),
+                      phi::errors::InvalidArgument(
+                          "Mismatch in %s, spec[%d]'s tensor size: [%d] and "
+                          "spec[%d]'s dims_mapping size [%d].",
+                          op_name,
+                          i,
+                          shape.size(),
+                          i,
+                          dims_mapping.size()));
+  }
+}
+
+std::vector<std::pair<std::string, std::vector<int64_t>>>
+GetAxesDimsMappingPair(const std::vector<std::string>& tensor_axes,
+                       const std::vector<DistTensorSpec>& specs) {
+  std::vector<std::pair<std::string, std::vector<int64_t>>> res;
+  size_t ntensor = specs.size();
+  for (size_t i = 0; i < ntensor; ++i) {
+    res.emplace_back(std::pair<std::string, std::vector<int64_t>>(
+        tensor_axes[i], specs[i].dims_mapping()));
+  }
+  return res;
+}
+
+std::vector<int64_t> GetDimsMappingForAxes(
+    const std::string& axes,
+    const std::unordered_map<std::string, int64_t>& axis_to_dim_map) {
+  std::vector<int64_t> dims_mapping;
+  for (int64_t i = 0, n = axes.size(); i < n; i++) {
+    std::string axis = axes.substr(i, 1);
+    if (axis == "1") {
+      dims_mapping.emplace_back(-1);
+    } else {
+      auto iter = axis_to_dim_map.find(axis);
+      if (iter == axis_to_dim_map.end()) {
+        phi::errors::InvalidArgument(
+            "Tensor axis [%s] of not in axis_to_dim_map.", axis);
+      }
+      dims_mapping.emplace_back(iter->second);
+    }
+  }
+  return dims_mapping;
+}
+
 // SPMDRuleMap
 SPMDRuleMap& SPMDRuleMap::Instance() {
   static SPMDRuleMap g_spmd_rule_map;

diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h
@@ -67,22 +67,12 @@ class SPMDRuleBase {
   inline const T ExtractAttr(
       const std::string& name,
       const paddle::framework::AttributeMap& attrs) const {
-    auto& attr = GetAttr(name, attrs);
-
-    // In order to get bool attr properly
-    framework::proto::AttrType attr_type =
-        static_cast<framework::proto::AttrType>(attr.index() - 1);
-    if (attr_type == framework::proto::AttrType::INT) {
-      if (std::is_same<bool, T>::value) {
-        return static_cast<bool>(PADDLE_GET_CONST(int, attr));
-      }
-    }
-
-    return PADDLE_GET_CONST(T, attr);
+    auto attr = GetAttr(name, attrs);
+    return *paddle::framework::ExtractAttribute<T>(name)(attr);
   }
 
-  const Attribute& GetAttr(const std::string& name,
-                           const paddle::framework::AttributeMap& attrs) const {
+  Attribute GetAttr(const std::string& name,
+                    const paddle::framework::AttributeMap& attrs) const {
     auto iter = attrs.find(name);
     PADDLE_ENFORCE_NE(iter,
                       attrs.end(),
@@ -95,7 +85,7 @@ class SPMDRuleBase {
 // Merge sharding specification (dims mapping) of given tensors.
 // The same axes of different tensors will be merged.
 std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
-    const std::vector<std::pair<const std::string, const std::vector<int64_t>>>&
+    const std::vector<std::pair<std::string, std::vector<int64_t>>>&
         tensor_axes_to_dim_pairs);
 
 // Merge the sharding specification (dims mapping) for one tensor Axis.
@@ -133,6 +123,27 @@ std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
 // (unsharded).
 TensorDistAttr ReplicatedOnMesh(const TensorDistAttr& src_dist_attr);
 
+// Check whether the given DistTensorSpec objects are valid. For each
+// DistTensorSpec, the rank of its dimsmapping must be equal to the rank of its
+// corresponding tensor shape. the parameter op_name is used for logging error
+// message.
+void VerifySpecs(const std::vector<DistTensorSpec>& specs,
+                 const std::string& op_name);
+
+// Get dimsmapping for the given tensors. Return the pair of each
+// tensor's einsum notation and the corresponding dimsmapping.
+std::vector<std::pair<std::string, std::vector<int64_t>>>
+GetAxesDimsMappingPair(const std::vector<std::string>& tensor_axes,
+                       const std::vector<DistTensorSpec>& specs);
+
+// Get dims mapping for the given axes according to sharding information of
+// the annotated axes after inferring forward or backward. The parameter axis
+// stores the axes of the tensor. "1" is a special axis, for the axis "1", set
+// its dims mapping to -1.
+std::vector<int64_t> GetDimsMappingForAxes(
+    const std::string& axes,
+    const std::unordered_map<std::string, int64_t>& axis_to_dim_map);
+
 // The static map that stores and initializes all the registered SPMD rules.
 class SPMDRuleMap {
  public:

diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.cc
@@ -0,0 +1,140 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.h"
+#include "paddle/phi/core/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+using phi::distributed::auto_parallel::str_join;
+
+std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
+ElementwiseSPMDRule::InferForward(
+    const std::vector<DistTensorSpec>& input_specs,
+    const paddle::framework::AttributeMap& attrs) {
+  // step0: Verify Input Args Based on Elementwise Logic
+  int64_t ninputs = input_specs.size();
+  PADDLE_ENFORCE_GT(
+      ninputs,
+      0,
+      phi::errors::InvalidArgument("The size of InputSpec in elementwise must "
+                                   "be greater than 0, but got [%d].",
+                                   ninputs));
+  VerifySpecs(input_specs, "elementwise");
+
+  // step1: Build Einsum Notation
+  std::string alphabet = "abcdefghijklmnopqrstuvwxyz";
+  std::vector<std::string> input_axes_vec;
+  int64_t max_ndim = 0;
+  for (int64_t i = 0; i < ninputs; ++i) {
+    int64_t ndim = input_specs[i].shape().size();
+    if (ndim > max_ndim) {
+      max_ndim = ndim;
+    }
+  }
+
+  // get einsum notation for each input, deal with broadcast
+  std::vector<int64_t> broadcast_axis_count(max_ndim, 0);
+  for (int64_t i = 0; i < ninputs; ++i) {
+    std::vector<int64_t> shape = input_specs[i].shape();
+    int64_t ndim = shape.size();
+    int64_t start_dim = max_ndim - ndim;
+    std::string axes_notation = GetBroadcastAxes(ndim, max_ndim, alphabet);
+    if (ninputs > 1) {
+      for (int64_t idim = 0; idim < max_ndim; idim++) {
+        // deal with the broadcast axes, record the
+        // input number at each broadcast axis
+        if (idim < start_dim) {
+          broadcast_axis_count[idim] += 1;
+        } else if (shape[idim - start_dim] == 1) {
+          broadcast_axis_count[idim] += 1;
+          // mark the broadcast axis to a special "1"
+          axes_notation[idim - start_dim] = '1';
+        }
+      }
+    }
+    input_axes_vec.emplace_back(axes_notation);
+  }
+
+  // get einsum notation for output
+  std::string output_axes = GetBroadcastAxes(max_ndim, max_ndim, alphabet);
+  for (int64_t idim = 0; idim < max_ndim; idim++) {
+    // if all inputs broadcast at this dimension,
+    // mark this axis in output as broadcast
+    if (broadcast_axis_count[idim] == ninputs) {
+      output_axes[idim] = '1';
+    }
+  }
+
+  // step2: Sharding Propogation
+  // step2.1: merge input shardings
+  std::vector<std::pair<std::string, std::vector<int64_t>>> axes_sharding_info;
+  axes_sharding_info = GetAxesDimsMappingPair(input_axes_vec, input_specs);
+  std::unordered_map<std::string, int64_t> axis_to_dim_map =
+      ShardingMergeForTensors(axes_sharding_info);
+
+  // step2.2: infer output dimsmapping from merged input dimsmapping
+  std::vector<int64_t> output_dims_mapping =
+      GetDimsMappingForAxes(output_axes, axis_to_dim_map);
+
+  // initialize output dist_attr's process_mesh, batch_dim and dynamic dims with
+  // input dist_attr.
+  TensorDistAttr output_dist_attr =
+      CopyTensorDistAttrForOutput(input_specs[0].dist_attr());
+  output_dist_attr.set_dims_mapping(output_dims_mapping);
+
+  std::vector<TensorDistAttr> new_input_dist_attrs;
+  std::vector<TensorDistAttr> output_dist_attrs;
+
+  // step2.3: update inputs' dims mapping with merged one.
+  for (int64_t i = 0; i < ninputs; i++) {
+    const DistTensorSpec& spec = input_specs[i];
+    TensorDistAttr dist_attr(spec.dist_attr());
+    std::vector<int64_t> new_dims_mapping =
+        GetDimsMappingForAxes(input_axes_vec[i], axis_to_dim_map);
+    dist_attr.set_dims_mapping(new_dims_mapping);
+    new_input_dist_attrs.emplace_back(dist_attr);
+  }
+
+  // step2.4: handle partial
+  // Step2.3.2  handle input tensor partial (TODO)
+  VLOG(4) << "ElementwiseSPMDRule InferForward:";
+  for (int64_t i = 0; i < ninputs; i++) {
+    VLOG(4) << "Input" << std::to_string(i) << " shape: ["
+            << str_join(input_specs[i].shape()) << "] "
+            << "src_dims_mapping: [" << str_join(input_specs[i].dims_mapping())
+            << "] "
+            << "dst_dims_mapping: ["
+            << str_join(new_input_dist_attrs[i].dims_mapping()) << "]";
+  }
+  VLOG(4) << "Output dims_mapping: [" + str_join(output_dims_mapping) + "]\n\n";
+
+  output_dist_attrs.emplace_back(output_dist_attr);
+  return {new_input_dist_attrs, output_dist_attrs};
+}
+
+std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
+ElementwiseSPMDRule::InferBackward(
+    const std::vector<DistTensorSpec>& output_specs,
+    const paddle::framework::AttributeMap& attrs) {
+  PADDLE_THROW(phi::errors::Unimplemented(
+      "InferBackward of ElementwiseSPMDRule is NOT implemented yet."));
+
+  return {};
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <iterator>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/spmd_rules/common.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+class ElementwiseSPMDRule : public SPMDRuleBase {
+ public:
+  std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
+  InferForward(const std::vector<DistTensorSpec>& input_specs,
+               const paddle::framework::AttributeMap& attrs) override;
+
+  std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
+  InferBackward(const std::vector<DistTensorSpec>& output_specs,
+                const paddle::framework::AttributeMap& attrs) override;
+};
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle