Skip to content

Commit

Permalink
Create CosineWarmupScheduler class in benchmarks (#1047)
Browse files Browse the repository at this point in the history
Add scheduler.py script file in lightly utils, containing useful classes and methods to schedule training with cosine decay. Add test and reformat code.
  • Loading branch information
Ra1Nik authored Jan 27, 2023
1 parent b4fd2b6 commit 1c08371
Show file tree
Hide file tree
Showing 18 changed files with 263 additions and 272 deletions.
163 changes: 38 additions & 125 deletions docs/source/getting_started/benchmarks/imagenette_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
from lightly.models.modules import masked_autoencoder
from lightly.models import utils
from lightly.utils import BenchmarkModule
from lightly.utils import scheduler
from pytorch_lightning.loggers import TensorBoardLogger
from pl_bolts.optimizers.lars import LARS

Expand Down Expand Up @@ -155,10 +156,7 @@

# Collate function passing geometrical transformation for VICRegL
vicregl_collate_fn = lightly.data.VICRegLCollateFunction(
global_crop_size=128,
local_crop_size=64,
global_grid_size=4,
local_grid_size=2
global_crop_size=128, local_crop_size=64, global_grid_size=4, local_grid_size=2
)

normalize_transform = torchvision.transforms.Normalize(
Expand Down Expand Up @@ -236,7 +234,6 @@ def get_data_loaders(batch_size: int, model):

return dataloader_train_ssl, dataloader_train_kNN, dataloader_test


class MocoModel(BenchmarkModule):
def __init__(self, dataloader_kNN, num_classes):
super().__init__(dataloader_kNN, num_classes)
Expand Down Expand Up @@ -302,8 +299,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=5e-4,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class SimCLRModel(BenchmarkModule):
Expand Down Expand Up @@ -335,8 +332,8 @@ def configure_optimizers(self):
optim = torch.optim.SGD(
self.parameters(), lr=6e-2 * lr_factor, momentum=0.9, weight_decay=5e-4
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class SimSiamModel(BenchmarkModule):
Expand Down Expand Up @@ -374,8 +371,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=5e-4,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class BarlowTwinsModel(BenchmarkModule):
Expand Down Expand Up @@ -411,8 +408,8 @@ def configure_optimizers(self):
optim = torch.optim.SGD(
self.parameters(), lr=6e-2 * lr_factor, momentum=0.9, weight_decay=5e-4
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class BYOLModel(BenchmarkModule):
Expand Down Expand Up @@ -475,8 +472,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=5e-4,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class NNCLRModel(BenchmarkModule):
Expand Down Expand Up @@ -517,8 +514,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=5e-4,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class SwaVModel(BenchmarkModule):
Expand Down Expand Up @@ -571,8 +568,8 @@ def configure_optimizers(self):
lr=1e-3 * lr_factor,
weight_decay=1e-6,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class DINOModel(BenchmarkModule):
Expand Down Expand Up @@ -627,8 +624,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=5e-4,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class DCL(BenchmarkModule):
Expand Down Expand Up @@ -660,8 +657,8 @@ def configure_optimizers(self):
optim = torch.optim.SGD(
self.parameters(), lr=6e-2 * lr_factor, momentum=0.9, weight_decay=5e-4
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class DCLW(BenchmarkModule):
Expand Down Expand Up @@ -693,8 +690,8 @@ def configure_optimizers(self):
optim = torch.optim.SGD(
self.parameters(), lr=6e-2 * lr_factor, momentum=0.9, weight_decay=5e-4
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class MAEModel(BenchmarkModule):
Expand Down Expand Up @@ -771,23 +768,8 @@ def configure_optimizers(self):
weight_decay=0.05,
betas=(0.9, 0.95),
)
cosine_with_warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(
optim, self.scale_lr
)
return [optim], [cosine_with_warmup_scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


class MSNModel(BenchmarkModule):
Expand Down Expand Up @@ -859,23 +841,8 @@ def configure_optimizers(self):
weight_decay=0.05,
betas=(0.9, 0.95),
)
cosine_with_warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(
optim, self.scale_lr
)
return [optim], [cosine_with_warmup_scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


from sklearn.cluster import KMeans
Expand Down Expand Up @@ -982,8 +949,8 @@ def configure_optimizers(self):
momentum=0.9,
weight_decay=1e-6,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [scheduler]
cosine_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
return [optim], [cosine_scheduler]


class SimMIMModel(BenchmarkModule):
Expand Down Expand Up @@ -1049,23 +1016,8 @@ def configure_optimizers(self):
weight_decay=0.05,
betas=(0.9, 0.999),
)
cosine_with_warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(
optim, self.scale_lr
)
return [optim], [cosine_with_warmup_scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


class VICRegModel(BenchmarkModule):
Expand Down Expand Up @@ -1098,21 +1050,8 @@ def configure_optimizers(self):
weight_decay=1e-4,
momentum=0.9,
)
scheduler = torch.optim.lr_scheduler.LambdaLR(optim, self.scale_lr)
return [optim], [scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


class VICRegLModel(BenchmarkModule):
Expand All @@ -1129,7 +1068,7 @@ def __init__(self, dataloader_kNN, num_classes):
self.projection_head = heads.BarlowTwinsProjectionHead(512, 2048, 2048)
self.local_projection_head = heads.VicRegLLocalProjectionHead(512, 128, 128)
self.average_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
self.criterion = lightly.loss.VICRegLLoss(alpha=0.75, num_matches=(16,4))
self.criterion = lightly.loss.VICRegLLoss(alpha=0.75, num_matches=(16, 4))
self.backbone = nn.Sequential(self.train_backbone, self.average_pool)
self.warmup_epochs = 40 if max_epochs >= 800 else 20

Expand Down Expand Up @@ -1163,21 +1102,8 @@ def configure_optimizers(self):
weight_decay=1e-4,
momentum=0.9,
)
scheduler = torch.optim.lr_scheduler.LambdaLR(optim, self.scale_lr)
return [optim], [scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


class TiCoModel(BenchmarkModule):
Expand Down Expand Up @@ -1210,7 +1136,7 @@ def forward_momentum(self, x):

def training_step(self, batch, batch_index):
(x0, x1), _, _ = batch
momentum = utils.cosine_schedule(self.current_epoch, 10, 0.996, 1)
momentum = scheduler.cosine_schedule(self.current_epoch, 10, 0.996, 1)
utils.update_momentum(self.backbone, self.backbone_momentum, m=momentum)
utils.update_momentum(
self.projection_head, self.projection_head_momentum, m=momentum
Expand All @@ -1229,21 +1155,8 @@ def configure_optimizers(self):
weight_decay=1e-4,
momentum=0.9,
)
scheduler = torch.optim.lr_scheduler.LambdaLR(optim, self.scale_lr)
return [optim], [scheduler]

def scale_lr(self, epoch):
if epoch < self.warmup_epochs:
return epoch / self.warmup_epochs
else:
return 0.5 * (
1.0
+ math.cos(
math.pi
* (epoch - self.warmup_epochs)
/ (max_epochs - self.warmup_epochs)
)
)
cosine_scheduler = scheduler.CosineWarmupScheduler(optim, self.warmup_epochs, max_epochs)
return [optim], [cosine_scheduler]


models = [
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/byol.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lightly.data import SimCLRCollateFunction
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules import BYOLProjectionHead, BYOLPredictionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/dino.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lightly.data import DINOCollateFunction
from lightly.loss import DINOLoss
from lightly.models.modules import DINOProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/moco.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lightly.data import MoCoCollateFunction
from lightly.loss import NTXentLoss
from lightly.models.modules import MoCoProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/tico.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lightly.data import SimCLRCollateFunction
from lightly.loss.tico_loss import TiCoLoss
from lightly.models.modules.heads import TiCoProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch_lightning/byol.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from lightly.data import SimCLRCollateFunction
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules import BYOLProjectionHead, BYOLPredictionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch_lightning/dino.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from lightly.data import DINOCollateFunction
from lightly.loss import DINOLoss
from lightly.models.modules import DINOProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch_lightning/moco.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from lightly.data import MoCoCollateFunction
from lightly.loss import NTXentLoss
from lightly.models.modules import MoCoProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch_lightning/tico.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from lightly.data import SimCLRCollateFunction
from lightly.loss.tico_loss import TiCoLoss
from lightly.models.modules.heads import TiCoProjectionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch_lightning_distributed/byol.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules import BYOLProjectionHead
from lightly.models.modules.heads import BYOLPredictionHead
from lightly.models.utils import cosine_schedule
from lightly.utils.scheduler import cosine_schedule
from lightly.models.utils import deactivate_requires_grad
from lightly.models.utils import update_momentum

Expand Down
Loading

0 comments on commit 1c08371

Please sign in to comment.