From 810ece8fe0044b6ca74e75ec283ad21deaa84eab Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 3 Apr 2020 17:59:41 +0800 Subject: [PATCH] Fix learning rate scaling bug this bug is quite peculiar and hard to track down, when learning rate for a parameter is scaled via param_attr and learning rate schedulers are used, `append_optimizer_op` will error out complaining `LearningRate` input is null turns out learning rate scaling is done in `_create_param_lr`, which basically add a scale op, the problem is: it is appended to `orig_prog` (since `global_learning_rate()` variable is in it), therefore the resulting scaled learning rate variable can not be found in `train_prog`. the reason it works previously w/o lr scaling is this: `clone()` will create a variable with the same name as the `global_learning_rate()` variable, which will be used in `append_optimizer_op` --- model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model.py b/model.py index dea21bb9832940..53b9bd5f4f817d 100644 --- a/model.py +++ b/model.py @@ -410,7 +410,8 @@ def _make_program(self, mode): and self.model._optimizer._learning_rate_map: # HACK workaround learning rate map issue lr_var = self.model._optimizer._learning_rate_map[self._orig_prog] - self.model._optimizer._learning_rate_map[prog] = lr_var + new_lr_var = prog.global_block().vars[lr_var.name] + self.model._optimizer._learning_rate_map[prog] = new_lr_var losses = [] metrics = []