From 49df2a784be8dabda85f82620ff4601ce113d332 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 25 Dec 2017 20:17:27 +0800 Subject: [PATCH] refine gradient function --- paddle/operators/cos_sim_op.h | 101 +++++++++++----------------------- 1 file changed, 33 insertions(+), 68 deletions(-) diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index e96592ab28ad3..cd5c703c303f4 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -13,7 +13,6 @@ limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/elementwise_op_function.h" @@ -21,16 +20,9 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; -template -using EigenMatrix = framework::EigenMatrix; -template -using EigenVector = framework::EigenVector; template static void ForEachZip(IT1 begin1, IT1 last1, IT2 begin2, Callback callback) { - // This method could be implemented in CUDA for (; begin1 < last1; ++begin1, ++begin2) { callback(*begin1, *begin2); } @@ -66,15 +58,8 @@ struct CosSimFunctor { x_norm_[x_offset] = xx; y_norm_[y_offset] = yy; z_[x_offset] = xy / (xx * yy); - } else { + } else { // This can be wrote in a better way. auto* y = y_; - // if (yy == -1) { - // yy = 0; - // for (size_t i = 0; i < cols_; ++i) { - // yy += y[i] * y[i]; - // } - // y_norm[0] = sqrt(yy); - // } for (size_t i = 0; i < cols_; ++i) { xx += x[i] * x[i]; yy += y[i] * y[i]; // only need @@ -144,22 +129,25 @@ struct CosSimGradFunctor { dx_(dx), cols_(static_cast(cols)) {} - void operator()(const T& x_norm, const T& y_norm) const { + inline void operator()(const T& x_norm, const T& y_norm) const { size_t x_offset = &x_norm - x_norm_; size_t y_offset = &y_norm - y_norm_; auto x_norm_square = x_norm_[x_offset] * x_norm_[x_offset]; - // auto y_norm_square = y_norm_[y_offset] * y_norm_[y_offset]; auto xy_norm_prod = x_norm_[x_offset] * y_norm_[y_offset]; auto dz = dz_[x_offset]; + auto z = z_[x_offset]; auto* dx = dx_ + cols_ * x_offset; auto* x = x_ + cols_ * x_offset; + auto* y = y_ + cols_ * y_offset; - auto z = z_[x_offset]; + auto reciprocal_xy_norm_prod = 1 / xy_norm_prod; + auto reciprocal_x_norm_square = 1 / x_norm_square; for (size_t i = 0; i < cols_; ++i) { - dx[i] = dz * (y[i] / xy_norm_prod - z * x[i] / x_norm_square); + dx[i] = dz * (y[i] * reciprocal_xy_norm_prod - + z * x[i] * reciprocal_x_norm_square); } } @@ -173,10 +161,10 @@ struct CosSimGradFunctor { const size_t cols_; }; -template +template struct CosSimDxFunctor { CosSimDxFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y, - const T* z, const T* dz, T* dx, int cols) + const T* z, const T* dz, T* dx, T* dy, int cols) : x_norm_(x_norm), y_norm_(y_norm), x_(x), @@ -184,58 +172,34 @@ struct CosSimDxFunctor { z_(z), dz_(dz), dx_(dx), - cols_(static_cast(cols)) {} - - void operator()(const T& x_norm, const T& y_norm) const { - size_t x_offset = &x_norm - x_norm_; - - auto x_norm_square = x_norm_[x_offset] * x_norm_[x_offset]; - auto xy_norm_prod = x_norm_[x_offset] * y_norm_[0]; - auto dz = dz_[x_offset]; - auto z = z_[x_offset]; - - auto* dx = dx_ + cols_ * x_offset; - auto* x = x_ + cols_ * x_offset; - - for (size_t i = 0; i < cols_; ++i) { - dx[i] = dz * (y_[i] / xy_norm_prod - z * x[i] / x_norm_square); - } - } - - const T* x_norm_; - const T* y_norm_; - const T* x_; - const T* y_; - const T* z_; - const T* dz_; - T* dx_; - const size_t cols_; -}; - -template -struct CosSimDyFunctor { - CosSimDyFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y, - const T* z, const T* dz, T* dy, int cols) - : x_norm_(x_norm), - y_norm_(y_norm), - x_(x), - y_(y), - z_(z), - dz_(dz), dy_(dy), cols_(static_cast(cols)) {} - void operator()(const T& x_norm, const T& y_norm) const { + inline void operator()(const T& x_norm, const T& y_norm) const { size_t x_offset = &x_norm - x_norm_; - auto y_norm_square = y_norm_[0] * y_norm_[0]; auto xy_norm_prod = x_norm_[x_offset] * y_norm_[0]; auto dz = dz_[x_offset]; auto z = z_[x_offset]; auto* x = x_ + cols_ * x_offset; + auto reciprocal_xy_norm_prod = 1 / xy_norm_prod; - for (size_t i = 0; i < cols_; ++i) { - dy_[i] += dz * (x[i] / xy_norm_prod - z * y_[i] / y_norm_square); + if (Dx) { + auto x_norm_square = x_norm_[x_offset] * x_norm_[x_offset]; + auto* dx = dx_ + cols_ * x_offset; + auto* x = x_ + cols_ * x_offset; + auto reciprocal_x_norm_square = 1 / x_norm_square; + for (size_t i = 0; i < cols_; ++i) { + dx[i] = dz * (y_[i] * reciprocal_xy_norm_prod - + z * x[i] * reciprocal_x_norm_square); + } + } else { + auto y_norm_square = y_norm_[0] * y_norm_[0]; + auto reciprocal_y_norm_square = 1 / y_norm_square; + for (size_t i = 0; i < cols_; ++i) { + dy_[i] += dz * (x[i] * reciprocal_xy_norm_prod - + z * y_[i] * reciprocal_y_norm_square); + } } } @@ -245,6 +209,7 @@ struct CosSimDyFunctor { const T* y_; const T* z_; const T* dz_; + T* dx_; T* dy_; const size_t cols_; }; @@ -287,17 +252,17 @@ class CosSimGradKernel : public framework::OpKernel { } } else { if (out_grad_x) { - CosSimDxFunctor functor( + CosSimDxFunctor functor( in_x_norm->data(), in_y_norm->data(), in_x->data(), in_y->data(), in_z->data(), in_grad_z->data(), - out_grad_x->mutable_data(context.GetPlace()), cols); + out_grad_x->mutable_data(context.GetPlace()), nullptr, cols); ForEachZip(in_x_norm->data(), in_x_norm->data() + rows_x, in_y_norm->data(), functor); } if (out_grad_y) { - CosSimDyFunctor functor( + CosSimDxFunctor functor( in_x_norm->data(), in_y_norm->data(), in_x->data(), - in_y->data(), in_z->data(), in_grad_z->data(), + in_y->data(), in_z->data(), in_grad_z->data(), nullptr, out_grad_y->mutable_data(context.GetPlace()), cols); ForEachZip(in_x_norm->data(), in_x_norm->data() + rows_x, in_y_norm->data(), functor);