-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path6_GoogLeNet.py
137 lines (114 loc) · 5.46 KB
/
6_GoogLeNet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import helper
"""
In this file, we will implement GoogleNet architecture for CIFAR10 dataset. GoogleNet was introduced by Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, and Zbigniew Wojna in 2014.
It was the winning architecture of ImageNet Large Scale Visual Recognition Challenge (ILSVRC) 2014.
Motivation::
1. Increase Depth and Width Without Overfitting: it aimed to deepen and widen the network while including regularization techniques to control overfitting.
2. Reduce Parameter Count : This was achieved through the use of 1x1 convolutions to perform dimensionality reduction before applying larger convolutions.
Conclusion::
This network was perhaps the pioneer in demonstrating a clear demarcation between the stem (initial data capture), the body (data interpretation), and the head (outcome prediction) within a CNN framework.
This architectural blueprint has continued to be a mainstay in the construction of intricate networks to this day: the stem is composed of the initial two or three convolutional layers that process the image, distilling basic features from it.
This is succeeded by a sequence of convolutional blocks that form the body.
Architecture::
1. Follows a fixed number of Inception blocks
2. then, a global average pooling layer for classification
Inception block:
|-> 1x1 Convolution -> Relu -> |
input -> |-> 1x1 Convolution -> 3x3 Convolution -> Relu -> | -> Concatenate -> output
|-> 1x1 Convolution -> 5x5 Convolution -> Relu -> |
|-> 3x3 MaxPool -> 1x1 Convolution -> Relu -> |
"""
# Load the CIFAR10 data
batch_size = 64
torch.manual_seed(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data, input_shape = helper.load_dataset_image_data("CIFAR10" , batch_size, resize=(224, 224))
train_loader, valid_loader ,test_loader = data
class InceptionBlock(nn.Module):
def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
super(InceptionBlock, self).__init__()
self.branch1 = nn.Sequential(
nn.Conv2d(in_channels, out_1x1, kernel_size=1),
nn.ReLU(inplace=True)
)
self.branch2 = nn.Sequential(
nn.Conv2d(in_channels, red_3x3, kernel_size=1),
nn.Conv2d(red_3x3, out_3x3, kernel_size=3, padding=1),
nn.ReLU(inplace=True)
)
self.branch3 = nn.Sequential(
nn.Conv2d(in_channels, red_5x5, kernel_size=1),
nn.Conv2d(red_5x5, out_5x5, kernel_size=5, padding=2),
nn.ReLU(inplace=True)
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, out_1x1pool, kernel_size=1),
nn.ReLU(inplace=True)
)
def forward(self, x):
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
return torch.cat([branch1, branch2, branch3, branch4], 1)
class GoogleLeNet(nn.Module):
def __init__(self, in_channels, num_classes=10):
super(GoogleLeNet, self).__init__()
self.stem = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
nn.Conv2d(64, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
self.inception3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
self.inception3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64)
self.inception4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64)
self.inception4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64)
self.inception4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64)
self.inception4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128)
self.inception5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(0.4)
self.fc = nn.Linear(1024, num_classes)
def forward(self, x):
x = self.stem(x)
x = self.inception3a(x)
x = self.inception3b(x)
x = self.maxpool(x)
x = self.inception4a(x)
x = self.inception4b(x)
x = self.inception4c(x)
x = self.inception4d(x)
x = self.inception4e(x)
x = self.maxpool(x)
x = self.inception5a(x)
x = self.inception5b(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.fc(x)
return x
model = GoogleLeNet(3, 10).to(device)
helper.model_summary(model, input_shape)
# Hyperparameters
learning_rate = 0.001
epochs = 1
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train network
trainer = helper.Trainer(model, optimizer, criterion, lr=learning_rate, max_epochs=epochs)
trainer.train(train_loader, valid_loader)
trainer.plot("NiN")
trainer.test(test_loader)