[Large Tensor] Add LT support for NN optimizers and 1 activation func…

…tion (apache#17444) * fix hard sigmoid * change int i to index_t i for all Kernel Map functions * fix lint * size t indext fix
anirudh2290 · May 29, 2020 · 02492c2 · 02492c2
1 parent c28d8e4
commit 02492c2
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 40 deletions.
diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h
@@ -225,10 +225,10 @@ struct MultiSGDKernelParam {
 template <typename MPDType, bool has_momentum, bool has_mixed_precision>
 struct MultiSGDKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, const MultiSGDKernelParam<DType, MPDType>& param,
+  MSHADOW_XINLINE static void Map(index_t i, const MultiSGDKernelParam<DType, MPDType>& param,
     const OpReqType req) {
     for (int index = 0; index < param.count; ++index) {
-      if ((size_t)i < param.sizes[index]) {
+      if (i < static_cast<index_t>(param.sizes[index])) {
         MPDType w = has_mixed_precision ? param.weights32[index][i] :
                                           MPDType(param.weights[index][i]);
         MPDType mom = has_momentum ? param.mom[index][i] : MPDType(0);
@@ -381,7 +381,7 @@ inline void MultiSGDMomUpdate(const nnvm::NodeAttrs& attrs,
 
 struct SGDKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data,
     const DType* grad_data, const DType param_clip_gradient,
     const DType param_lr, const DType param_wd, const DType param_rescale_grad,
     const OpReqType req) {
@@ -429,9 +429,9 @@ struct SGDDnsRspKernel<req, gpu> {
   // IType is row sparse idx type
   // i is the ith element in row sparse gradient
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, const index_t row_length, DType* out, const DType* weight,
-                                  const IType* grad_idx, const DType *grad_val,
-                                  const DType clip_gradient, const DType lr,
+  MSHADOW_XINLINE static void Map(index_t i, const index_t row_length, DType* out,
+                                  const DType* weight, const IType* grad_idx,
+                                  const DType *grad_val, const DType clip_gradient, const DType lr,
                                   const DType wd, const DType rescale_grad) {
     using nnvm::dim_t;
     using namespace mshadow_op;
@@ -457,9 +457,9 @@ struct SGDDnsRspKernel<req, cpu> {
   // IType is row sparse idx type
   // i is the ith row in row sparse gradient
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, const index_t row_length, DType* out, const DType* weight,
-                                  const IType* grad_idx, const DType *grad_val,
-                                  const DType clip_gradient, const DType lr,
+  MSHADOW_XINLINE static void Map(index_t i, const index_t row_length, DType* out,
+                                  const DType* weight, const IType* grad_idx,
+                                  const DType *grad_val, const DType clip_gradient, const DType lr,
                                   const DType wd, const DType rescale_grad) {
     for (index_t j = 0; j < row_length; j++) {
       index_t data_i = grad_idx[i] * row_length + j;
@@ -600,10 +600,11 @@ struct SGDMomParam : public dmlc::Parameter<SGDMomParam> {
 
 struct SGDMomKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data, const DType* weight_data,
-    const DType* grad_data, const DType param_clip_gradient, const DType param_momentum,
-    const DType param_lr, const DType param_wd, const DType param_rescale_grad,
-    const OpReqType req) {
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data,
+                                  const DType* weight_data, const DType* grad_data,
+                                  const DType param_clip_gradient, const DType param_momentum,
+                                  const DType param_lr, const DType param_wd,
+                                  const DType param_rescale_grad, const OpReqType req) {
     if (param_clip_gradient >= 0.0f) {
       mom_data[i] = param_momentum*mom_data[i]
               - param_lr*param_wd*weight_data[i]
@@ -654,7 +655,7 @@ inline bool MP_InferType(const nnvm::NodeAttrs& attrs,
 
 struct MP_SGDKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data,
     const DType* grad_data, float* weight32, const float param_clip_gradient,
     const float param_lr, const float param_wd, const float param_rescale_grad,
     const OpReqType req) {
@@ -698,7 +699,7 @@ inline void MP_SGDUpdate(const nnvm::NodeAttrs& attrs,
 
 struct MP_SGDMomKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, float* mom_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, float* mom_data,
     const DType* weight_data, const DType* grad_data, float* weight32,
     const float param_clip_gradient, const float param_momentum, const float param_lr,
     const float param_wd, const float param_rescale_grad, const OpReqType req) {
@@ -749,7 +750,7 @@ struct SGDMomDnsRspDnsKernel;
 template<int req>
 struct SGDMomDnsRspDnsKernel<req, cpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data,
     DType* mom_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType momentum,
     const DType lr, const DType wd, const DType rescale_grad) {
@@ -776,7 +777,7 @@ struct SGDMomDnsRspDnsKernel<req, cpu> {
 template<int req>
 struct SGDMomDnsRspDnsKernel<req, gpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data,
     DType* mom_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType momentum,
     const DType lr, const DType wd, const DType rescale_grad) {
@@ -1060,7 +1061,7 @@ struct NAGMomParam : public dmlc::Parameter<NAGMomParam> {
 
 struct NAGMomKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data,
     const DType* weight_data, const DType* grad_data,
     const DType param_clip_gradient, const DType param_momentum,
     const DType param_lr, const DType param_wd,
@@ -1107,7 +1108,7 @@ inline void NAGMomUpdate(const nnvm::NodeAttrs& attrs,
 
 struct MP_NAGMomKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     float* mom_data, const DType* weight_data,
     const DType* grad_data, float* weight32,
     const float param_clip_gradient,
@@ -1204,7 +1205,7 @@ struct FTMLParam : public dmlc::Parameter<FTMLParam> {
 
 struct FTMLKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out, DType* weight, DType* grad,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out, DType* weight, DType* grad,
     DType* d, DType* v, DType* z, const DType lr, const DType beta1,
     const DType beta2, const DType epsilon, const DType t,
     const DType wd, const DType rescale_grad, const DType clip_grad,
@@ -1291,7 +1292,7 @@ struct AdamParam : public dmlc::Parameter<AdamParam> {
 
 struct AdamUpdateKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     DType* mean_data, DType* var_data, const DType* weight_data, const DType* grad_data,
     const DType clip_gradient, const DType rescale_grad,
     const DType beta1, const DType beta2,
@@ -1350,7 +1351,7 @@ struct AdamDnsRspDnsKernel;
 template<int req>
 struct AdamDnsRspDnsKernel<req, cpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data,
     DType* mean_data, DType* var_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType beta1, const DType beta2,
     const DType lr, const DType wd, const DType epsilon, const DType rescale_grad) {
@@ -1383,7 +1384,7 @@ struct AdamDnsRspDnsKernel<req, cpu> {
 template<int req>
 struct AdamDnsRspDnsKernel<req, gpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data,
     DType* mean_data, DType* var_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType beta1, const DType beta2,
     const DType lr, const DType wd, const DType epsilon, const DType rescale_grad) {
@@ -1620,7 +1621,7 @@ struct LambUpdatePhaseTwoParam : public dmlc::Parameter<LambUpdatePhaseTwoParam>
 
 struct LambUpdatePhaseOneKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     DType* mean_data, DType* var_data, const DType* weight_data, const DType* grad_data,
     const DType clip_gradient, const DType rescale_grad,
     const DType beta1, const DType beta1_t, const DType beta2, const DType beta2_t,
@@ -1704,7 +1705,7 @@ inline bool LambUpdatePhaseTwoShape(const nnvm::NodeAttrs& attrs,
 
 struct LambUpdatePhaseTwoKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     const DType* weight_data, const DType* g,
     const DType* r1, const DType* r2,
     DType lr, const DType lower_bound,
@@ -1771,7 +1772,7 @@ inline bool MPLambPhaseOneType(const nnvm::NodeAttrs& attrs,
 
 struct MPLambUpdatePhaseOneKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, float* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, float* out_data,
     float* mean_data, float* var_data, const DType* weight_data,
     const DType* grad_data, const float* weight32_data,
     const float clip_gradient, const float rescale_grad,
@@ -1861,7 +1862,7 @@ inline bool MPLambUpdatePhaseTwoShape(const nnvm::NodeAttrs& attrs,
 
 struct MPLambUpdatePhaseTwoKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     const DType* weight_data, const float* g,
     const float* r1, const float* r2, const float* weight32_data,
     float lr, const float lower_bound,
@@ -1952,7 +1953,7 @@ struct RMSPropAlexParam : public dmlc::Parameter<RMSPropAlexParam> {
 
 struct RMSPropAlexUpdateKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     DType* state_n_data, DType* state_g_data, DType* delta_data,
     const DType* weight_data, const DType* grad_data,
     const DType clip_gradient, const DType rescale_grad,
@@ -2051,7 +2052,7 @@ struct RMSPropParam : public dmlc::Parameter<RMSPropParam> {
 
 struct RMSPropUpdateKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i,
+  MSHADOW_XINLINE static void Map(index_t i,
     DType* out_data, DType* state_n_data,
     const DType* weight_data, const DType* grad_data,
     const DType clip_gradient, const DType rescale_grad,
@@ -2132,7 +2133,7 @@ struct FtrlParam : public dmlc::Parameter<FtrlParam> {
 
 struct FtrlUpdateKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data,
     DType* n_data, DType* z_data, const DType* weight_data, const DType* grad_data,
     const DType clip_gradient, const DType rescale_grad,
     const DType beta, const DType lamda1,
@@ -2185,7 +2186,7 @@ inline void FtrlUpdate(const nnvm::NodeAttrs& attrs,
 template<int req>
 struct FtrlDnsRspDnsKernel {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data,
     DType* z_data, DType* n_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType lamda1, const DType beta,
     const DType lr, const DType wd, const DType rescale_grad) {
@@ -2343,7 +2344,7 @@ struct SignSGDParam : public dmlc::Parameter<SignSGDParam> {
 
 struct SignSGDKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data,
     const DType* grad_data, const DType param_clip_gradient,
     const DType param_lr, const DType param_wd, const DType param_rescale_grad,
     const OpReqType req) {
@@ -2411,10 +2412,12 @@ struct SignumParam : public dmlc::Parameter<SignumParam> {
 
 struct SignumKernel {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data, const DType* weight_data,
-    const DType* grad_data, const DType param_clip_gradient, const DType param_momentum,
-    const DType param_lr, const DType param_wd, const DType param_rescale_grad,
-    const DType param_wd_lh, const OpReqType req) {
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data,
+                                  const DType* weight_data, const DType* grad_data,
+                                  const DType param_clip_gradient, const DType param_momentum,
+                                  const DType param_lr, const DType param_wd,
+                                  const DType param_rescale_grad, const DType param_wd_lh,
+                                  const OpReqType req) {
     if (param_clip_gradient >= 0.0f) {
       mom_data[i] = param_momentum*mom_data[i]
               - (1-param_momentum)*param_wd*weight_data[i]
@@ -2506,7 +2509,7 @@ struct AdagradDnsRspDnsKernel;
 template<>
 struct AdagradDnsRspDnsKernel<cpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data,
     DType* state_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType epsilon,
     const DType lr, const DType rescale_grad) {
@@ -2533,7 +2536,7 @@ struct AdagradDnsRspDnsKernel<cpu> {
 template<>
 struct AdagradDnsRspDnsKernel<gpu> {
   template<typename DType, typename IType>
-  MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data,
+  MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data,
     DType* state_data, const DType* weight_data, const IType* grad_idx,
     const DType* grad_data, const DType clip_gradient, const DType epsilon,
     const DType lr, const DType rescale_grad) {

diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
@@ -495,7 +495,7 @@ struct HardSigmoidParam : public dmlc::Parameter<HardSigmoidParam> {
 template<int req>
 struct hard_sigmoid_forward {
   template<typename DType>
-    MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+    MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data,
                                     const real_t alpha, const real_t beta) {
       DType result = DType(alpha * in_data[i] + beta);
       result = (DType(1) < result) ? DType(1) : result;
@@ -507,7 +507,7 @@ struct hard_sigmoid_forward {
 template<int req>
 struct hard_sigmoid_backward {
   template<typename DType>
-    MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* in_data,
+    MSHADOW_XINLINE static void Map(index_t i, DType* in_grad, const DType* in_data,
                                     const DType* out_grad, const real_t alpha, const real_t beta) {
       DType out_val = DType(alpha) * in_data[i] + DType(beta);
       DType grad = (out_val > DType(0) && out_val < DType(1)) ?