src/relay/op/autodiff_integration.cc

/*!
 *  Copyright (c) 2019 by Contributors
 * \file autodiff_integration.cc
 * \brief Integration with autodiff for TVM tensor expressions.
 */

#include <tvm/relay/op.h>
#include <tvm/operation.h>
#include <tvm/autodiff.h>
#include <topi/broadcast.h>
#include "./type_relations.h"
#include "./op_common.h"
#include "../../op/op_util.h"

namespace tvm {
namespace relay {

/*! \brief Attributes for the automatically generated gradient operation. */
struct AutogeneratedGradientAttrs : public tvm::AttrsNode<AutogeneratedGradientAttrs> {
  Op original_op;
  Attrs original_attrs;
  Type original_out_type;

  TVM_DECLARE_ATTRS(AutogeneratedGradientAttrs, "relay.attrs.AutogeneratedGradientAttrs") {
    TVM_ATTR_FIELD(original_op)
      .describe("The original operation.");
    TVM_ATTR_FIELD(original_attrs)
      .describe("The attributes of the original operation.");
    TVM_ATTR_FIELD(original_out_type).set_default(Type(nullptr))
      .describe("The type of the original expression.");
  }
};

bool AutogeneratedGradientRel(const Array<Type>& types,
                              int num_inputs,
                              const Attrs& attrs,
                              const TypeReporter& reporter) {
  const AutogeneratedGradientAttrs* real_attrs = attrs.as<AutogeneratedGradientAttrs>();
  CHECK(real_attrs != nullptr) << "Attrs are null or have an invalid type.";

  // There are just two types: the type of the input tuple and the type of the output tuple.
  CHECK(types.size() == 2) << "The size of the types array must be 2, not " << types.size();
  const auto* tuple_type = types[0].as<TupleTypeNode>();
  CHECK(tuple_type != nullptr) << "The input must be a tuple, not " << types[0];
  // The input tuple contains the original inputs and the last item is the adjoint
  // for the output of the original operation.
  Array<Type> input_types(tuple_type->fields.begin(), tuple_type->fields.end() + (-1));
  // The output of the gradient operation is a containing values of the same types as the
  // original inputs.
  reporter->Assign(types[1], TupleTypeNode::make(input_types));
  return true;
}

Array<Tensor> AutogeneratedGradientCompute(const Attrs& attrs,
                                           const Array<Tensor>& inputs,
                                           const Type& out_type,
                                           const Target& target) {
  static auto fcompute = Op::GetAttr<FTVMCompute>("FTVMCompute");

  const AutogeneratedGradientAttrs* real_attrs = attrs.as<AutogeneratedGradientAttrs>();
  CHECK(real_attrs != nullptr);

  // We need the type of the original output to pass it to the
  // FTVMCompute of the original operation.
  Type original_out_type = real_attrs->original_out_type;

  // The `inputs` array contains both the original inputs and the adjoint, both in the
  // flattened form. In general, the adjoint may consist of several tensors, so we need to know
  // the number of the output tensors of the original operation.
  size_t num_orig_outputs = 1;
  // NOTE: Here we assume that there are no nested tuples
  if (const auto* tuple_type = original_out_type.as<TupleTypeNode>()) {
    num_orig_outputs = tuple_type->fields.size();
  } else if (const auto* tuple_type = out_type.as<TupleTypeNode>()) {
    // Guess the number of outputs of the original op from the number of inputs of the original
    // op (which is the same as the number of outputs of this gradient node).
    num_orig_outputs = inputs.size() - tuple_type->fields.size();
  }

  CHECK(inputs.size() >= num_orig_outputs);

  // If the original output type hasn't been preserved, try to reconstruct it using the
  // number of original outputs.
  if (!original_out_type.defined()) {
    Array<Type> fields;
    for (auto it = inputs.end() + (-num_orig_outputs); it != inputs.end(); ++it) {
      fields.push_back(TensorTypeNode::make((*it)->shape, (*it)->dtype));
    }
    if (num_orig_outputs == 1) {
      // If the number of the outputs is 1 then the output type is probably just a tensor, not
      // a tuple of a single element.
      original_out_type = fields[0];
    } else {
      original_out_type = TupleTypeNode::make(fields);
    }
  }

  Array<Tensor> original_inputs(inputs.begin(), inputs.end() + (-num_orig_outputs));
  Array<Tensor> adjoints(inputs.end() + (-num_orig_outputs), inputs.end());

  // In theory the inputs might contain duplicate entries which won't agree with the automatic
  // differentiation, so we create new placeholders which we will replace with the inputs later.
  Array<Tensor> input_placeholders;
  std::unordered_map<Tensor, Tensor> placeholders_to_inputs;
  for (const Tensor& input : original_inputs) {
    Tensor place =
        tvm::PlaceholderOpNode::make(input->op->name, input->shape, input->dtype).output(0);
    input_placeholders.push_back(place);
    placeholders_to_inputs[place] = input;
  }

  Array<Tensor> forward =
      fcompute[real_attrs->original_op](real_attrs->original_attrs, input_placeholders,
                                        original_out_type, target);

  CHECK(forward.size() == adjoints.size());

  // If there are multiple outputs, we have to propagate gradients from all of them and
  // add up the results. Note that there may be suboptimality, in the future we might want
  // to make the Differentiate function accept arrays of outputs.
  Array<Tensor> res;
  for (size_t i = 0; i < forward.size(); ++i) {
    Array<Tensor> part =
      tvm::ir::Differentiate(forward[i], input_placeholders, adjoints[i])->result;
    part = tvm::op::ReplaceTensorRecursively(part, placeholders_to_inputs);

    if (i == 0) {
      res = part;
    } else {
      for (size_t j = 0; j < res.size(); ++j) {
        res.Set(j, topi::add(res[j], part[j]));
      }
    }
  }

  return res;
}

RELAY_REGISTER_OP("autogenerated_gradient")
.describe(R"doc(Gradients for any specified operation generated using the automatic differentiation
for tensor expressions.

- **input**: A tuple of the form `(x1, ..., xn, g)` where `x1, ..., xn` are the inputs of the
             original operation, and g is the gradient of the loss with respect to the output
             of the original operation.
- **out**: A tuple of the form `(g1, ..., gn)` containing the gradients of the loss with respect to
           the inputs of the original operation.
)doc")
.set_num_inputs(1)
.add_argument("input", "Tuple", "A tuple containing the original inputs and the adjoint.")
.set_attrs_type_key("relay.attrs.AutogeneratedGradientAttrs")
.add_type_rel("AutogeneratedGradient", AutogeneratedGradientRel)
.set_attr<FTVMCompute>("FTVMCompute", AutogeneratedGradientCompute)
.set_attr<TOpPattern>("TOpPattern", kOpaque)
.set_attr<FTVMSchedule>("FTVMSchedule",
  [](const Attrs& attrs, const Array<Tensor>& outs, const Target& target) {
    Array<tvm::Operation> out_ops;
    for (auto t : outs)
      out_ops.push_back(t->op);
    return create_schedule(out_ops);
  });

FPrimalGradient AutogeneratedFPrimalGradient(const Op& op) {
  return [op](const Expr& orig, const Expr& adjoint) -> Array<Expr> {
    const CallNode* call = orig.as<CallNode>();
    CHECK(call != nullptr);

    auto attrs = make_node<AutogeneratedGradientAttrs>();
    attrs->original_op = op;
    attrs->original_attrs = call->attrs;
    if (call->checked_type_.defined()) {
      attrs->original_out_type = call->checked_type();
    }

    Array<Expr> args_in_tuple = call->args;
    args_in_tuple.push_back(adjoint);
    Array<Expr> args = {TupleNode::make(args_in_tuple)};
    auto grad_call = CallNode::make(Op::Get("autogenerated_gradient"), args, Attrs(attrs));

    Array<Expr> res;
    for (size_t i = 0; i < call->args.size(); ++i) {
      res.push_back(TupleGetItemNode::make(grad_call, i));
    }
    return res;
  };
}

/*! \brief Automatically generate primal gradient for the given operation. */
void AutogeneratePrimalGradient(const std::string& op_name, int plevel = 100) {
  OpRegistry& opreg = relay::OpRegistry::Registry()->__REGISTER_OR_GET__(op_name);
  Op op = opreg.op();
  opreg.set_attr<FPrimalGradient>("FPrimalGradient", AutogeneratedFPrimalGradient(op), plevel);
}

/*! \brief Automatically generate primal gradients for all operations in the registry. */
void AutogeneratePrimalGradientForAll(int plevel = 5) {
  for (const OpRegistry* opreg : relay::OpRegistry::Registry()->List()) {
    AutogeneratePrimalGradient(opreg->op()->name, plevel);
  }
}

TVM_REGISTER_API("relay._ir_pass.AutogeneratePrimalGradient")
  .set_body([](tvm::TVMArgs args,  tvm::TVMRetValue *ret) {
      AutogeneratePrimalGradient(args[0]);
    });
TVM_REGISTER_API("relay._ir_pass.AutogeneratePrimalGradientForAll")
  .set_body([](tvm::TVMArgs args,  tvm::TVMRetValue *ret) {
      AutogeneratePrimalGradientForAll();
    });

}  // namespace relay
}  // namespace tvm