Skip to content

Commit

Permalink
[src] Cosmetic changes to natural-gradient code (#3108)
Browse files Browse the repository at this point in the history
  • Loading branch information
danpovey authored Mar 14, 2019
1 parent 633e61c commit 8cafd32
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 16 deletions.
24 changes: 12 additions & 12 deletions src/nnet3/natural-gradient-online.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,26 +119,26 @@ void OnlineNaturalGradient::InitDefault(int32 D) {
t_ = 0;
}

void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &R0) {
int32 D = R0.NumCols();
void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &X0) {
int32 D = X0.NumCols();
// for locking reasons it's better to use a different object.
OnlineNaturalGradient this_copy(*this);
this_copy.InitDefault(D);
this_copy.t_ = 1; // Prevent recursion to Init() again.

CuMatrix<BaseFloat> R0_copy(R0.NumRows(), R0.NumCols(), kUndefined);
CuMatrix<BaseFloat> X0_copy(X0.NumRows(), X0.NumCols(), kUndefined);
// 'num_iters' is number of iterations with the same data from a pseudorandom
// start. this is a faster way of starting than doing eigenvalue
// decomposition.
//
// Note: we only do three iterations of initialization if we have enough data
// that it's reasonably possible to estimate the subspace of dimension
// this_copy.rank_. If we don't have more than that many rows in our initial
// minibatch R0, we just do one iteration... this gives us almost exactly
// (barring small effects due to epsilon_ > 0) the row subspace of R0 after
// minibatch X0, we just do one iteration... this gives us almost exactly
// (barring small effects due to epsilon_ > 0) the row subspace of X0 after
// one iteration anyway.
int32 num_init_iters;
if (R0.NumRows() <= this_copy.rank_)
if (X0.NumRows() <= this_copy.rank_)
num_init_iters = 1;
else
num_init_iters = 3;
Expand All @@ -147,8 +147,8 @@ void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &R0) {
// initialize.
for (int32 i = 0; i < num_init_iters; i++) {
BaseFloat scale;
R0_copy.CopyFromMat(R0);
this_copy.PreconditionDirections(&R0_copy, &scale);
X0_copy.CopyFromMat(X0);
this_copy.PreconditionDirections(&X0_copy, &scale);
}
rank_ = this_copy.rank_;
W_t_.Swap(&this_copy.W_t_);
Expand Down Expand Up @@ -197,7 +197,7 @@ void OnlineNaturalGradient::PreconditionDirections(
t_ += 1;
}

void OnlineNaturalGradient::ReorthogonalizeXt1(
void OnlineNaturalGradient::ReorthogonalizeRt1(
const VectorBase<BaseFloat> &d_t1,
BaseFloat rho_t1,
CuMatrixBase<BaseFloat> *W_t1,
Expand All @@ -214,7 +214,7 @@ void OnlineNaturalGradient::ReorthogonalizeXt1(
ComputeEt(d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1);

temp_O->SymAddMat2(1.0, *W_t1, kNoTrans, 0.0);
// O_t = E_t^{-0.5} W_t W_t^T E_t^{-0.5}
// O_{t+1} = E_{t+1}^{-0.5} W_{t+1} W_{t+1}^T E_{t+1}^{-0.5}
Matrix<BaseFloat> O_mat(*temp_O);
SpMatrix<BaseFloat> O(O_mat, kTakeLower);
for (int32 i = 0; i < R; i++) {
Expand Down Expand Up @@ -439,7 +439,7 @@ void OnlineNaturalGradient::PreconditionDirectionsInternal(
if (self_debug_) {
KALDI_WARN << "Reorthogonalizing.";
}
ReorthogonalizeXt1(d_t1,
ReorthogonalizeRt1(d_t1,
rho_t1,
&W_t1,
&J_t,
Expand Down Expand Up @@ -510,7 +510,7 @@ void OnlineNaturalGradient::ComputeWt1(int32 N,
// B_t = J_t + (1-\eta)/(\eta/N) (D_t + \rho_t I) W_t
J_t->AddDiagVecMat(1.0, w_t_coeff_gpu, W_t, kNoTrans, 1.0);

// A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5} B_t
// A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5}
Matrix<BaseFloat> A_t(U_t, kTrans);
for (int32 i = 0; i < R; i++) {
BaseFloat i_factor = (eta / N) * sqrt_e_t1(i) * inv_sqrt_c_t(i);
Expand Down
8 changes: 4 additions & 4 deletions src/nnet3/natural-gradient-online.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ namespace nnet3 {
* Initialization *
Now, a note on what we do on time t = 0, i.e. for the first minibatch. We
initialize X_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the
minibatch size (num-rows of R0). If L is the corresponding RxR diagonal
initialize R_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the
minibatch size (num-rows of X0). If L is the corresponding RxR diagonal
matrix of eigenvalues, then we will set D_0 = L - \rho_0 I. We set \rho_0
to ensure that
tr(F_0) = 1/N tr(X_0 X_0^T),
Expand Down Expand Up @@ -457,7 +457,7 @@ class OnlineNaturalGradient {
not.
*/
void PreconditionDirections(CuMatrixBase<BaseFloat> *R,
void PreconditionDirections(CuMatrixBase<BaseFloat> *X,
BaseFloat *scale);


Expand Down Expand Up @@ -515,7 +515,7 @@ class OnlineNaturalGradient {
// This function is called if C_t has high condition number; it makes sure
// that R_{t+1} is orthogonal. See the section in the extended comment above
// on "keeping R_t orthogonal".
void ReorthogonalizeXt1(const VectorBase<BaseFloat> &d_t1,
void ReorthogonalizeRt1(const VectorBase<BaseFloat> &d_t1,
BaseFloat rho_t1,
CuMatrixBase<BaseFloat> *W_t1,
CuMatrixBase<BaseFloat> *temp_W,
Expand Down

0 comments on commit 8cafd32

Please sign in to comment.