From b473bd5288b5cf102ca6ef0900f55b32b7faf1f9 Mon Sep 17 00:00:00 2001 From: Roy Sadaka Date: Sun, 18 Oct 2020 21:41:00 +0300 Subject: [PATCH] * v0.2.2 * Added predict method to trainer * Added State.PREDICT enum * Added Phase.PREDICT_BEGIN and Phase.PREDICT_END enums * Added CollectOutputs callback * Added predict example * Moving some elements from nn.functional to nn * Added sample count to save/load model --- CHANGELOG.txt | 12 ++ README.md | 75 +++++++--- examples/basic/train.py | 2 +- examples/data_loader/train.py | 3 +- examples/multiple_inputs/model.py | 1 - examples/scheduler_step_on_batch_end/train.py | 4 +- examples/train_evaluate_predict/train.py | 67 +++++++++ examples/utils.py | 10 +- lpd/callbacks/__init__.py | 4 +- lpd/callbacks/callback_base.py | 6 +- lpd/callbacks/collect_outputs.py | 39 ++++++ lpd/enums/phase.py | 3 + lpd/enums/state.py | 1 + lpd/extensions/custom_layers.py | 12 +- lpd/trainer.py | 131 ++++++++++++++---- main.py | 5 +- setup.py | 2 +- 17 files changed, 309 insertions(+), 68 deletions(-) create mode 100644 examples/train_evaluate_predict/train.py create mode 100644 lpd/callbacks/collect_outputs.py diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 6a84a71..c834d81 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,18 @@ Change Log ========== + +0.2.2 (18/10/2020) +----------------- +* Added predict method to trainer +* Added State.PREDICT enum +* Added Phase.PREDICT_BEGIN and Phase.PREDICT_END enums +* Added CollectOutputs callback +* Added predict example +* Moving some elements from nn.functional to nn +* Added sample count to save/load model + + 0.2.1 (15/10/2020) ----------------- * Fix in Tensorboard metrics output diff --git a/README.md b/README.md index 5fbe7e7..f938a54 100644 --- a/README.md +++ b/README.md @@ -29,17 +29,18 @@ A Fast, Flexible Trainer with Callbacks and Extensions for PyTorch from lpd.trainer import Trainer from lpd.enums import Phase, State, MonitorType, MonitorMode, StatsType from lpd.callbacks import StatsPrint, ModelCheckPoint, Tensorboard, EarlyStopping, SchedulerStep + from lpd.extensions.custom_schedulers import KerasDecay from lpd.extensions.custom_metrics import binary_accuracy_with_logits from lpd.utils.torch_utils import get_gpu_device_if_available from lpd.utils.general_utils import seed_all - seed_all(seed=42) + seed_all(seed=42) # because its the answer to life and the universe device = get_gpu_device_if_available() # with fallback to CPU if GPU not avilable - model = TestModel(config, num_embeddings).to(device) #this is your model class, and its being sent to the relevant device + model = TestModel(config, num_embeddings).to(device) # this is your model class, and its being sent to the relevant device optimizer = optim.SGD(params=model.parameters()) - scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True) - loss_func = nn.BCEWithLogitsLoss().to(device) #this is your loss class, already sent to the relevant device + scheduler = KerasDecay(optimizer, decay=0.01, last_step=-1) # decay scheduler using keras formula + loss_func = nn.BCEWithLogitsLoss().to(device) # this is your loss class, already sent to the relevant device metric_name_to_func = {'acc':binary_accuracy_with_logits} # add as much metrics as you like # you can use some of the defined callbacks, or you can create your own @@ -73,6 +74,11 @@ A Fast, Flexible Trainer with Callbacks and Extensions for PyTorch trainer.evaluate(test_data_loader, test_steps) ``` +### Making predictions +```python + predictions = trainer.predict(data_loader, steps) +``` + ## TrainerStats ``Trainer`` tracks stats for `train/validate/test` and you can access them in your custom callbacks or any other place that has access to your trainer. @@ -131,25 +137,29 @@ Evaluation phases and states will behave as follow State.EXTERNAL Phase.TEST_END ``` -With phases and states, you have full control over the timing of your callbacks, -### SchedulerStep Callback - -Will invoke ``step()`` on your scheduler. -For example, SchedulerStep callback to control your scheduler, -but only at the end of every batch, and only when in train state (as opposed to validation and test) -then define your SchedulerStep callback like so: +Predict phases and states will behave as follow ```python - from lpd.callbacks import SchedulerStep - from lpd.enums import Phase, State - SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN) -``` -In case you need it on validation state as well, pass a list for ``apply_on_states`` like so: -```python - SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=[State.TRAIN, State.VAL]) + State.EXTERNAL + Phase.PREDICT_BEGIN + State.PREDICT + # batches loop: + Phase.BATCH_BEGIN + # batch + Phase.BATCH_END + State.EXTERNAL + Phase.PREDICT_END ``` +With phases and states, you have full control over the timing of your callbacks, + +### StatsPrint Callback +Below is an output example for ``StatsPrint`` callback that will print an epoch summary at the end of every epoch + +![EpochSummary](https://raw.githubusercontent.com/RoySadaka/ReposMedia/main/lpd/images/epoch_summary.png) + + ### ModelCheckPoint Callback Saving a checkpoint when a monitored loss/metric has improved. The callback will save the model, optimizer, scheduler, and epoch number. @@ -182,6 +192,23 @@ epochs, and stop the trainer in that case monitor_mode=MonitorMode.MIN) ``` +### SchedulerStep Callback + +Will invoke ``step()`` on your scheduler. + +For example, SchedulerStep callback to control your scheduler, +but only at the end of every batch, and only when in train state (as opposed to validation and test) +then define your SchedulerStep callback like so: +```python + from lpd.callbacks import SchedulerStep + from lpd.enums import Phase, State + SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN) +``` +In case you need it on validation state as well, pass a list for ``apply_on_states`` like so: +```python + SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=[State.TRAIN, State.VAL]) +``` + ### Tensorboard Callback Will export the loss and the metrics at a given phase and state, in a format that can be viewed on Tensorboard @@ -192,13 +219,15 @@ Will export the loss and the metrics at a given phase and state, in a format tha ``` +### CollectOutputs Callback +In case you want to collect the outputs of any given state during training +```python + CollectOutputs(apply_on_phase=Phase.BATCH_END, apply_on_states=State.VAL) +``` +CollectOutputs is automatically used in ``trainer.predict(...)`` to collect the predictions -### StatsPrint Callback -Below is an output example for ``StatsPrint`` callback that will print an epoch summary at the end of every epoch - -![EpochSummary](https://raw.githubusercontent.com/RoySadaka/ReposMedia/main/lpd/images/epoch_summary.png) -You can also create custom callbacks +### Create your custom callbacks ```python from lpd.enums import Phase, State diff --git a/examples/basic/train.py b/examples/basic/train.py index 96c1b59..b0453a3 100644 --- a/examples/basic/train.py +++ b/examples/basic/train.py @@ -17,7 +17,7 @@ def get_parameters(): # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = 64, 1000, 100, 10 - num_epochs = 50 + num_epochs = 10 data_loader = eu.examples_data_generator(N, D_in, D_out) data_loader_steps = 100 return N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps diff --git a/examples/data_loader/train.py b/examples/data_loader/train.py index 4e58917..21ed3ad 100644 --- a/examples/data_loader/train.py +++ b/examples/data_loader/train.py @@ -6,7 +6,6 @@ import torch as T import torch.nn as nn import torch.optim as optim -import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader from lpd.trainer import Trainer @@ -88,7 +87,7 @@ def __init__(self, D_in, H, D_out, num_embeddings, embedding_dim): embedding_dim=embedding_dim) # nn.init.uniform_(self.embedding_layer.weight, a=-0.05, b=0.05) # I PREFER THE INIT THAT TensorFlow DO FOR Embedding - self.dense = Dense(embedding_dim, H, use_bias=True, activation=F.relu) + self.dense = Dense(embedding_dim, H, use_bias=True, activation=nn.ReLU()) self.dense_out = Dense(H, D_out, use_bias=True, activation=None) def forward(self, x): # (batch, D_in) diff --git a/examples/multiple_inputs/model.py b/examples/multiple_inputs/model.py index 2adbb51..8bd9760 100644 --- a/examples/multiple_inputs/model.py +++ b/examples/multiple_inputs/model.py @@ -1,6 +1,5 @@ import torch as T import torch.nn as nn -import torch.nn.functional as F import torch.optim as optim from lpd.trainer import Trainer diff --git a/examples/scheduler_step_on_batch_end/train.py b/examples/scheduler_step_on_batch_end/train.py index e897d7a..30a6d90 100644 --- a/examples/scheduler_step_on_batch_end/train.py +++ b/examples/scheduler_step_on_batch_end/train.py @@ -13,7 +13,7 @@ def get_parameters(): # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = 64, 1000, 100, 10 - num_epochs = 50 + num_epochs = 10 data_loader = eu.examples_data_generator(N, D_in, D_out) data_loader_steps = 100 return N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps @@ -40,7 +40,7 @@ def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps): # NOTICE!!! WE USE verbose=1 TO SEE THE PRINTS FOR THIS EXAMPLE, BUT YOU MIGHT PREFER TO USE verbose=0 or verbose=2 # BECAUSE ON BATCH LEVEL IT WILL PRINT A LOT callbacks = [ - SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN, verbose=1), #CAN ALSO BE apply_on_states=[State.TRAIN] + SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN, verbose=1), #CAN ALSO BE IN FORM OF ARRAY - apply_on_states=[State.TRAIN] StatsPrint(apply_on_phase=Phase.EPOCH_END) ] diff --git a/examples/train_evaluate_predict/train.py b/examples/train_evaluate_predict/train.py new file mode 100644 index 0000000..ecbff00 --- /dev/null +++ b/examples/train_evaluate_predict/train.py @@ -0,0 +1,67 @@ +import torch as T +import torch.nn as nn +import torch.optim as optim +from lpd.trainer import Trainer +from lpd.callbacks import StatsPrint +from lpd.extensions.custom_schedulers import DoNothingToLR +import lpd.utils.torch_utils as tu +import lpd.utils.general_utils as gu +import examples.utils as eu + +def get_parameters(): + # N is batch size; D_in is input dimension; + # H is hidden dimension; D_out is output dimension. + N, D_in, H, D_out = 8, 1000, 100, 10 + num_epochs = 5 + data_loader = eu.examples_data_generator(N, D_in, D_out) + data_loader_steps = 100 + return N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps + +def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps): + + device = tu.get_gpu_device_if_available() + + model = eu.get_basic_model(D_in, H, D_out).to(device) + + loss_func = nn.MSELoss(reduction='sum').to(device) + + optimizer = optim.Adam(model.parameters(), lr=1e-4) + + scheduler = DoNothingToLR() #CAN ALSO USE scheduler=None, BUT DoNothingToLR IS MORE EXPLICIT + + metric_name_to_func = None # THIS EXAMPLE DOES NOT USE METRICS, ONLY LOSS + + callbacks = [ + StatsPrint() + ] + + trainer = Trainer(model=model, + device=device, + loss_func=loss_func, + optimizer=optimizer, + scheduler=scheduler, + metric_name_to_func=metric_name_to_func, + train_data_loader=data_loader, + val_data_loader=data_loader, + train_steps=data_loader_steps, + val_steps=data_loader_steps, + num_epochs=num_epochs, + callbacks=callbacks, + name='Train-Evaluate-Predict-Example') + return trainer + +def run(): + gu.seed_all(42) # BECAUSE ITS THE ANSWER TO LIFE AND THE UNIVERSE + + N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps = get_parameters() + + trainer = get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps) + + trainer.summary() + + trainer.train() + + trainer.evaluate(data_loader, data_loader_steps) + + data_generator_for_predictions = eu.examples_prediction_data_generator(data_loader, data_loader_steps) + predictions = trainer.predict(data_generator_for_predictions, data_loader_steps) \ No newline at end of file diff --git a/examples/utils.py b/examples/utils.py index 075e0ab..0459ea6 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -1,6 +1,5 @@ import torch as T import torch.nn as nn -import torch.nn.functional as F from lpd.extensions.custom_layers import Dense def examples_data_generator(N, D_in, D_out, binary_out=False): @@ -13,9 +12,16 @@ def examples_data_generator(N, D_in, D_out, binary_out=False): while True: yield x, y #YIELD THE SAME X,y every time +def examples_prediction_data_generator(origin_data_loader, steps): + for x,y in origin_data_loader: + steps -= 1 + yield x + if steps == 0: + break + def get_basic_model(D_in, H, D_out): return nn.Sequential( - Dense(D_in, H, use_bias=True, activation=F.relu), + Dense(D_in, H, use_bias=True, activation=nn.ReLU()), Dense(H, D_out, use_bias=True, activation=None) ) \ No newline at end of file diff --git a/lpd/callbacks/__init__.py b/lpd/callbacks/__init__.py index 4bdb587..7df4eee 100644 --- a/lpd/callbacks/__init__.py +++ b/lpd/callbacks/__init__.py @@ -1,9 +1,11 @@ +from lpd.callbacks.callback_base import CallbackBase +from lpd.callbacks.callback_monitor import CallbackMonitor, CallbackMonitorResult from lpd.callbacks.stats_print import StatsPrint from lpd.callbacks.model_checkpoint import ModelCheckPoint from lpd.callbacks.tensorboard import Tensorboard from lpd.callbacks.early_stopping import EarlyStopping from lpd.callbacks.scheduler_step import SchedulerStep from lpd.callbacks.callback_context import CallbackContext -from lpd.callbacks.callback_monitor import CallbackMonitor, CallbackMonitorResult +from lpd.callbacks.collect_outputs import CollectOutputs \ No newline at end of file diff --git a/lpd/callbacks/callback_base.py b/lpd/callbacks/callback_base.py index 32c712b..f38fbc3 100644 --- a/lpd/callbacks/callback_base.py +++ b/lpd/callbacks/callback_base.py @@ -82,10 +82,12 @@ def _validations(self): Phase.TRAIN_END:{None, State.EXTERNAL}, Phase.EPOCH_BEGIN:{None, State.EXTERNAL}, Phase.EPOCH_END:{None, State.EXTERNAL}, - Phase.BATCH_BEGIN:{None, State.TRAIN, State.VAL, State.TEST}, - Phase.BATCH_END:{None, State.TRAIN, State.VAL, State.TEST}, + Phase.BATCH_BEGIN:{None, State.TRAIN, State.VAL, State.TEST, State.PREDICT}, + Phase.BATCH_END:{None, State.TRAIN, State.VAL, State.TEST, State.PREDICT}, Phase.TEST_BEGIN:{None, State.EXTERNAL}, Phase.TEST_END:{None, State.EXTERNAL}, + Phase.PREDICT_BEGIN:{None, State.EXTERNAL}, + Phase.PREDICT_END:{None, State.EXTERNAL}, } if self.apply_on_states is not None: diff --git a/lpd/callbacks/collect_outputs.py b/lpd/callbacks/collect_outputs.py new file mode 100644 index 0000000..de1525f --- /dev/null +++ b/lpd/callbacks/collect_outputs.py @@ -0,0 +1,39 @@ +from lpd.enums import Phase, State, MonitorType, MonitorMode, StatsType +from lpd.callbacks.callback_base import CallbackBase +from lpd.callbacks.callback_context import CallbackContext +from lpd.callbacks.callback_monitor import CallbackMonitor, CallbackMonitorResult +from typing import Union, List, Optional, Dict + +class CollectOutputs(CallbackBase): + """ + This callback will collect outputs per each state, (it is currently used in trainer.predict() method.) + It will collect the numpy outputs in the defined states to a dictionary. + + Methods: + get_outputs_for_state - for a given state, returns the collected outputs + + Args: + apply_on_phase - see in CallbackBase + apply_on_states - see in CallbackBase + """ + + def __init__(self, + apply_on_phase: Phase=Phase.BATCH_END, + apply_on_states: Union[State, List[State]]=None): + super(CollectOutputs, self).__init__(apply_on_phase, apply_on_states) + self.state_to_outputs = {} + + def get_outputs_for_state(self, state: State): + return self.state_to_outputs[state] + + def __call__(self, callback_context: CallbackContext): + c = callback_context #READABILITY DOWN THE ROAD + state = c.trainer_state + + if self.should_apply_on_state(c): + + if state not in self.state_to_outputs: + self.state_to_outputs[state] = [] + + last_outputs = c.trainer.get_last_outputs() + self.state_to_outputs[state].append(last_outputs) diff --git a/lpd/enums/phase.py b/lpd/enums/phase.py index 27c2969..5710429 100644 --- a/lpd/enums/phase.py +++ b/lpd/enums/phase.py @@ -10,5 +10,8 @@ class Phase(Enum): BATCH_END = auto() TEST_BEGIN = auto() TEST_END = auto() + PREDICT_BEGIN = auto() + PREDICT_END = auto() + def __str__(self): return self.name \ No newline at end of file diff --git a/lpd/enums/state.py b/lpd/enums/state.py index 521d8ca..e5d4ed6 100644 --- a/lpd/enums/state.py +++ b/lpd/enums/state.py @@ -12,5 +12,6 @@ class State(Enum): TRAIN = auto() VAL = auto() TEST = auto() + PREDICT = auto() def __str__(self): return self.name \ No newline at end of file diff --git a/lpd/extensions/custom_layers.py b/lpd/extensions/custom_layers.py index f5c987b..a562413 100644 --- a/lpd/extensions/custom_layers.py +++ b/lpd/extensions/custom_layers.py @@ -1,6 +1,5 @@ import torch as T import torch.nn as nn -import torch.nn.functional as F import math class MatMul2D(nn.Module): @@ -53,7 +52,7 @@ class Attention(nn.Module): Args: key_dim - as defined in the paper, the number of expected features in the encoder inputs - use_query_dense - weather to pass q input into another Dense layer, mostly used in Usage (2), to + use_query_dense - whether to pass q input into another Dense layer, mostly used in Usage (2), to run q into a transformation that will transform it into the vector space of k and v name - optional, any string to describe this layer """ @@ -65,8 +64,9 @@ def __init__(self, key_dim, use_query_dense=False, name=None): self.use_query_dense = use_query_dense self.name = name if name else 'attention' #LAYERS - self.mat_mul2d = MatMul2D(transpose_b=False, name = f'{self.name}__MatMul2D') - self.mat_mul2d_t = MatMul2D(transpose_b=True, name = f'{self.name}__MatMul2DT') + self.mat_mul2d = MatMul2D(transpose_b=False, name = f'{self.name}__MatMul2D') + self.mat_mul2d_t = MatMul2D(transpose_b=True, name = f'{self.name}__MatMul2DT') + self.softmax_last_dim = nn.Softmax(dim=-1) if self.use_query_dense: # SOMETIMES WE WANT TO GO THROUGH ANOTHER TRANSFORMATION BEFORE RUNNING THE QUERY, # FOR EXAMPLE, WHEN THIS IS USED AS A STANDALONE LAYER @@ -93,7 +93,7 @@ def forward(self, q,k,v, mask = None): mask_ready = T.log(mask) # (batch, 1, num_elements) scores += mask_ready # (batch, ?, num_elements) (+= is doing broadcasting) - attention_weights = F.softmax(scores, dim=-1) # (batch, ?, num_elements) + attention_weights = self.softmax_last_dim(scores) # (batch, ?, num_elements) attention_output = self.mat_mul2d(attention_weights, v) # (batch, ?, key_dim) return attention_output # (batch, ?, key_dim) @@ -163,7 +163,7 @@ def __init__(self, in_dim, self.name = name if name else 'transformer_encoder__feed_forward' #LAYERS - self.hidden_dense = Dense(in_dim=self.in_dim, out_dim=self.out_dim * self.expansion_rate, use_bias=True, activation=F.relu, name = f'{self.name}__Hidden-Dense') + self.hidden_dense = Dense(in_dim=self.in_dim, out_dim=self.out_dim * self.expansion_rate, use_bias=True, activation=nn.ReLU(), name = f'{self.name}__Hidden-Dense') self.output_dense = Dense(in_dim=self.out_dim * self.expansion_rate, out_dim=self.out_dim, use_bias=True, activation=None, name = f'{self.name}__Out-Dense') self.dropout = nn.Dropout(p=self.drop_out_proba) diff --git a/lpd/trainer.py b/lpd/trainer.py index d8f122a..fd6f337 100644 --- a/lpd/trainer.py +++ b/lpd/trainer.py @@ -1,6 +1,6 @@ import torch as T from tqdm import tqdm -from lpd.callbacks import CallbackContext +from lpd.callbacks import CallbackContext, CollectOutputs from lpd.enums import State, Phase from lpd.trainer_stats import TrainerStats import lpd.utils.file_utils as fu @@ -68,7 +68,6 @@ def __init__(self, model, self.sample_count_in_epoch = 0 self.iteration = 0 self.iteration_in_epoch = 0 - self._stopped = False self.state = State.EXTERNAL self.phase = Phase.IDLE @@ -79,6 +78,9 @@ def __init__(self, model, self.test_stats = TrainerStats(self.metric_name_to_func) self.test_last_loss_object = None + self._stopped = False + self._last_outputs = None + def _train_handler(self, loss, batch_size): self.sample_count += batch_size self.sample_count_in_epoch += batch_size @@ -96,6 +98,9 @@ def _val_handler(self, loss, batch_size): def _test_handler(self, loss, batch_size): self.test_last_loss_object = loss + def _predict_handler(self, loss, batch_size): + pass + def _labels_handler(self, labels): return labels.to(self.device) @@ -106,20 +111,32 @@ def _inputs_handler(self, inputs): #SINGLE INPUT return [inputs.to(self.device)] - def _get_tqdm_description(self): - if self.state == State.TEST: - return f'[{self.state}]' + def _get_tqdm_description(self, loop, stats): + if self.state == State.TEST or self.state == State.PREDICT: + desc = f'[{self.state}]' elif self.state == State.VAL: - return f'[Val epoch {self.epoch}/{self.num_epochs}]' + desc = f'[Val epoch {self.epoch}/{self.num_epochs}]' else: #TRAIN - return f'[Train epoch {self.epoch}/{self.num_epochs}]' + desc = f'[Train epoch {self.epoch}/{self.num_epochs}]' - def _fwd_pass_base(self, data_loader, steps, state_handler, stats): + loop.set_description(desc) + if self.state != State.PREDICT: + loop.set_postfix(loss=stats.get_loss(), metrics=stats.get_metrics()) + + def _prepare_next_batch(self, batch): + if self.state == State.PREDICT: + inputs,labels = batch, T.zeros(len(batch)) #FAKE LABELS FOR CODE CONSISTENCY, NO ACTUAL USE TO THEM + else: + inputs,labels = batch + return inputs,labels + + def _fwd_pass_base(self, data_loader, steps, state_handler, stats, loss_f): stats.reset() loop = tqdm(data_loader, total=steps-1) - self.sample_count_in_epoch = 0 # CAN BE INVOKED ON ALL STATES - self.iteration_in_epoch = 0 # CAN BE INVOKED ON ALL STATES - for inputs,labels in loop: + self.sample_count_in_epoch = 0 + self.iteration_in_epoch = 0 + for batch in loop: + inputs,labels = self._prepare_next_batch(batch) steps -= 1 self.phase = Phase.BATCH_BEGIN @@ -129,7 +146,8 @@ def _fwd_pass_base(self, data_loader, steps, state_handler, stats): y = self._labels_handler(labels) batch_size = len(y) outputs = self.model(*x) - loss = self.loss_func(outputs, y) + self._last_outputs = outputs + loss = loss_f(outputs, y) stats.add_loss(loss, batch_size) stats.add_metrics(outputs, y, batch_size) state_handler(loss, batch_size) @@ -137,8 +155,7 @@ def _fwd_pass_base(self, data_loader, steps, state_handler, stats): self.phase = Phase.BATCH_END self._invoke_callbacks() - loop.set_description(self._get_tqdm_description()) - loop.set_postfix(loss=stats.get_loss(), metrics=stats.get_metrics()) + self._get_tqdm_description(loop, stats) if self._stopped: break @@ -146,13 +163,29 @@ def _fwd_pass_base(self, data_loader, steps, state_handler, stats): if steps == 0: break + def _fwd_pass_predict(self, predict_data_loader, predict_steps): + if self._stopped: + return + + with T.no_grad(): + self.model.eval() #MARK STATUS AS EVAL + self._fwd_pass_base(predict_data_loader, + predict_steps, + self._predict_handler, + stats=TrainerStats({}), # NO STATS + loss_f=lambda outputs, y: T.Tensor([0])) # DO NOTHING LOSS + def _fwd_pass_test(self, test_data_loader, test_steps): if self._stopped: return with T.no_grad(): self.model.eval() #MARK STATUS AS EVAL - self._fwd_pass_base(test_data_loader, test_steps, self._test_handler, self.test_stats) + self._fwd_pass_base(test_data_loader, + test_steps, + self._test_handler, + self.test_stats, + loss_f=self.loss_func) def _fwd_pass_val(self): if self._stopped or self.val_data_loader is None or self.val_steps == 0: @@ -160,13 +193,22 @@ def _fwd_pass_val(self): with T.no_grad(): self.model.eval() #MARK STATUS AS EVAL - self._fwd_pass_base(self.val_data_loader, self.val_steps, self._val_handler, self.val_stats) + self._fwd_pass_base(self.val_data_loader, + self.val_steps, + self._val_handler, + self.val_stats, + loss_f=self.loss_func) def _fwd_pass_train(self): if self._stopped: return + self.model.train() #MARK STATUS AS TRAIN - self._fwd_pass_base(self.train_data_loader, self.train_steps, self._train_handler, self.train_stats) + self._fwd_pass_base(self.train_data_loader, + self.train_steps, + self._train_handler, + self.train_stats, + loss_f=self.loss_func) def _invoke_callbacks(self): if self._stopped: @@ -199,6 +241,7 @@ def save_trainer(self, dir_path, file_name, msg='', verbose=1): 'epoch': self.epoch, 'num_epochs': self.num_epochs, 'iteration': self.iteration, + 'sample_count': self.sample_count, 'train_stats': self.train_stats, 'val_stats': self.val_stats, 'test_stats': self.test_stats @@ -239,11 +282,18 @@ def load_trainer(dir_path, callbacks=checkpoint['callbacks'], name=checkpoint['name']) - trainer.epoch = checkpoint['epoch'] - trainer.iteration = checkpoint['iteration'] - trainer.train_stats = checkpoint['train_stats'] - trainer.val_stats = checkpoint['val_stats'] - trainer.test_stats = checkpoint['test_stats'] + if 'epoch' in checkpoint: + trainer.epoch = checkpoint['epoch'] + if 'iteration' in checkpoint: + trainer.iteration = checkpoint['iteration'] + if 'sample_count' in checkpoint: + trainer.sample_count = checkpoint['sample_count'] + if 'train_stats' in checkpoint: + trainer.train_stats = checkpoint['train_stats'] + if 'val_stats' in checkpoint: + trainer.val_stats = checkpoint['val_stats'] + if 'test_stats' in checkpoint: + trainer.test_stats = checkpoint['test_stats'] return trainer @@ -266,14 +316,19 @@ def summary(self): print('') print('optimizer', type(self.optimizer)) print('') - pytorch_total_params = sum(p.numel() for p in self.model.parameters()) - pytorch_total_params_requires_grad = sum(p.numel() for p in self.model.parameters() if p.requires_grad) - print('pytorch_total_params', pytorch_total_params) - print('pytorch_total_params_requires_grad', pytorch_total_params_requires_grad) + total_params = sum(p.numel() for p in self.model.parameters()) + total_params_requires_grad = sum(p.numel() for p in self.model.parameters() if p.requires_grad) + + print(f'Total params: {total_params}') + print(f'Trainable params: {total_params_requires_grad}') + print(f'Non-trainable params: {total_params_requires_grad-total_params}') def stop(self): self._stopped = True + def get_last_outputs(self): + return self._last_outputs.data.numpy() + def train(self): self._stopped = False self.state = State.EXTERNAL @@ -313,5 +368,29 @@ def evaluate(self, test_data_loader, test_steps): self._invoke_callbacks() self.phase = Phase.IDLE + def predict(self, inputs_data_loader, steps): + """ + return numpy array(s) of current trainer model's predictions. + """ + + # ADD COLLECT OUTPUTS CALLBACK + collect_outputs = CollectOutputs(apply_on_phase=Phase.BATCH_END, apply_on_states=State.PREDICT) + self.callbacks.append(collect_outputs) + + self._stopped = False + self.phase = Phase.PREDICT_BEGIN + self._invoke_callbacks() + self.state = State.PREDICT + self._fwd_pass_predict(inputs_data_loader, steps) + self.state = State.EXTERNAL + self.phase = Phase.PREDICT_END + self._invoke_callbacks() + self.phase = Phase.IDLE + + # REMOVE COLLECT OUTPUTS CALLBACK + self.callbacks.pop() + + return collect_outputs.get_outputs_for_state(State.PREDICT) + diff --git a/main.py b/main.py index 17338bf..e2f6367 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,8 @@ import examples.save_and_load.train as save_and_load_example import examples.keras_decay_scheduler.train as keras_decay_scheduler_example import examples.accumulate_grads.train as accumulate_grads_example +import examples.train_evaluate_predict.train as train_evaluate_predict_example + basic_example.run() scheduler_step_on_batch_end_example.run() @@ -12,4 +14,5 @@ data_loader_example.run() save_and_load_example.run() keras_decay_scheduler_example.run() -accumulate_grads_example.run() \ No newline at end of file +accumulate_grads_example.run() +train_evaluate_predict_example.run() \ No newline at end of file diff --git a/setup.py b/setup.py index 8dc84c0..590ac82 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='lpd', - version='0.2.1', + version='0.2.2', description='A Fast, Flexible Trainer with Callbacks and Extensions for PyTorch', long_description_content_type='text/markdown', long_description=README_md,