-
Notifications
You must be signed in to change notification settings - Fork 78
/
Copy pathimageNet_distilled.py
151 lines (128 loc) · 7.11 KB
/
imageNet_distilled.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import torch
import torchvision
import cnn_models.conv_forward_model as convForwModel
import cnn_models.help_fun as cnn_hf
import datasets
import model_manager
cuda_devices = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
print('CUDA_VISIBLE_DEVICES: {} for a total of {} GPUs'.format(cuda_devices, len(cuda_devices)))
if 'NUM_BITS' in os.environ:
NUM_BITS = int(os.environ['NUM_BITS'])
else:
NUM_BITS = 4
print('Number of bits in training: {}'.format(NUM_BITS))
datasets.BASE_DATA_FOLDER = '...'
SAVED_MODELS_FOLDER = '...'
USE_CUDA = torch.cuda.is_available()
NUM_GPUS = len(cuda_devices)
try:
os.mkdir(datasets.BASE_DATA_FOLDER)
except:pass
try:
os.mkdir(SAVED_MODELS_FOLDER)
except:pass
epochsToTrainImageNet = 90
imageNet12modelsFolder = os.path.join(SAVED_MODELS_FOLDER, 'imagenet12_new')
imagenet_manager = model_manager.ModelManager('model_manager_imagenet_distilled_New{}bits.tst'.format(NUM_BITS),
'model_manager', create_new_model_manager=False)
for x in imagenet_manager.list_models():
if imagenet_manager.get_num_training_runs(x) >= 1:
s = '{}; Last prediction acc: {}, Best prediction acc: {}'.format(x,
imagenet_manager.load_metadata(x)[1]['predictionAccuracy'][-1],
max(imagenet_manager.load_metadata(x)[1]['predictionAccuracy']))
print(s)
try:
os.mkdir(imageNet12modelsFolder)
except:pass
print('Batch size: {}'.format(batch_size))
if batch_size % NUM_GPUS != 0:
raise ValueError('Batch size: {} must be a multiple of the number of gpus:{}'.format(batch_size, NUM_GPUS))
imageNet12 = datasets.ImageNet12('...',
'...',
type_of_data_augmentation='extended', already_scaled=False,
pin_memory=True)
train_loader = imageNet12.getTrainLoader(batch_size, shuffle=True)
test_loader = imageNet12.getTestLoader(batch_size, shuffle=False)
# # Teacher model
# resnet152 = torchvision.models.resnet152(True) #already trained
# if USE_CUDA:
# resnet152 = resnet152.cuda()
# if NUM_GPUS > 1:
# resnet152 = torch.nn.parallel.DataParallel(resnet152)
#normal resnet18 training
resnet18 = torchvision.models.resnet18(False) #not pre-trained, 11.7 million parameters
if USE_CUDA:
resnet18 = resnet18.cuda()
if NUM_GPUS > 1:
resnet18 = torch.nn.parallel.DataParallel(resnet18)
model_name = 'resnet18_normal_fullprecision'
model_path = os.path.join(imageNet12modelsFolder, model_name)
if not model_name in imagenet_manager.saved_models:
imagenet_manager.add_new_model(model_name, model_path,
arguments_creator_function={'loaded_from':'torchvision_models'})
imagenet_manager.train_model(resnet18, model_name=model_name,
train_function=convForwModel.train_model,
arguments_train_function={'epochs_to_train': epochsToTrainImageNet,
'learning_rate_style': 'imagenet',
'initial_learning_rate': 0.1,
'weight_decayL2':1e-4,
'start_epoch':0,
'print_every':30},
train_loader=train_loader, test_loader=test_loader)
#distilled
# resnet18_distilled = torchvision.models.resnet18(False) #not pre-trained, 11.7 million parameters
# if USE_CUDA:
# resnet18_distilled = resnet18_distilled.cuda()
# if NUM_GPUS > 1:
# resnet18_distilled = torch.nn.parallel.DataParallel(resnet18_distilled)
# model_name = 'resnet18_distilled'
# model_path = os.path.join(imageNet12modelsFolder, model_name)
#
# if not model_name in imagenet_manager.saved_models:
# imagenet_manager.add_new_model(model_name, model_path,
# arguments_creator_function={'loaded_from':'torchvision_models'})
# imagenet_manager.train_model(resnet18_distilled, model_name=model_name,
# train_function=convForwModel.train_model,
# arguments_train_function={'epochs_to_train': epochsToTrainImageNet,
# 'teacher_model': resnet34,
# 'learning_rate_style': 'imagenet',
# 'initial_learning_rate': initial_lr,
# 'weight_decayL2':1e-4,
# 'use_distillation_loss':True,
# 'start_epoch':start_epoch,
# 'print_every':100},
# train_loader=train_loader, test_loader=test_loader)
#quantized distilled
# bits_to_try = [NUM_BITS]
#
# for numBit in bits_to_try:
# resnet18_quant_distilled = torchvision.models.resnet18(False) #not pre-trained, 11.7 million parameters
# if USE_CUDA:
# resnet18_quant_distilled = resnet18_quant_distilled.cuda()
# if NUM_GPUS > 1:
# resnet18_quant_distilled = torch.nn.parallel.DataParallel(resnet18_quant_distilled)
# model_name = 'resnet18_quant_distilled_{}bits'.format(numBit)
# model_path = os.path.join(imageNet12modelsFolder, model_name)
#
# if not model_name in imagenet_manager.saved_models:
# imagenet_manager.add_new_model(model_name, model_path,
# arguments_creator_function={'loaded_from':'torchvision_models'})
#
# imagenet_manager.train_model(resnet18_quant_distilled, model_name=model_name,
# train_function=convForwModel.train_model,
# arguments_train_function={'epochs_to_train': epochsToTrainImageNet,
# 'learning_rate_style': 'imagenet',
# 'initial_learning_rate': 0.1,
# 'use_nesterov':True,
# 'initial_momentum':0.9,
# 'weight_decayL2':1e-4,
# 'start_epoch': 0,
# 'print_every':30,
# 'use_distillation_loss':True,
# 'teacher_model': resnet152,
# 'quantizeWeights':True,
# 'numBits':numBit,
# 'bucket_size':256,
# 'quantize_first_and_last_layer': False},
# train_loader=train_loader, test_loader=test_loader)