-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
129 lines (109 loc) · 4.24 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import datetime
import tensorflow as tf
import json, sys, os
from os import path
import time
import shutil
import matplotlib
import importlib
import argparse
from tqdm import tqdm
from utils import *
from data import *
from comp_graph import *
# os settings
sys.path.append(os.getcwd() + '/..')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Parse cmdline args
parser = argparse.ArgumentParser(description='Image Classification')
parser.add_argument('--logdir', default='../../data/cifar-100-logs/', type=str)
parser.add_argument('--seed', default=0, type=int)
parser.add_argument('--exp_id', default='sl.vgg16_nobn_l2', type=str)
parser.add_argument('--gpu', default=-1, type=int)
parser.add_argument('--modeldir', default='../../data/cifar-100-models/')
args = parser.parse_args()
# GPU settings
if args.gpu > -1:
print("GPU COMPATIBLE RUN...")
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
# Print experiment details
print('Booting with exp params: {}...'.format(args.exp_id))
time.sleep(2)
# Experiment parameters
mod = importlib.import_module('saved_params.' + args.exp_id)
params = mod.generate_params()
# set seed
params['train']['seed'] = args.seed
np.random.seed(args.seed)
tf.set_random_seed(args.seed)
# target log dir
log_dir = os.path.join(args.logdir, datetime.datetime.now().strftime('[%m_%d_%H_%M]') + args.exp_id)
print('Experiment Logs will be written at {}'.format(log_dir))
logger = LogWriter(log_dir, 'main.log')
# model save log dir
model_dir = os.path.join(args.modeldir, args.exp_id, datetime.datetime.now().strftime('%y-%m-%d-%H-%M'))
if not os.path.exists(model_dir):
os.makedirs(model_dir)
# load dataset
dataset = load_dataset(params)
train_loader, test_loader = dataset['train'], dataset['test']
# build model
ph, graph, graph_vars, targets = build_image_classfication_model(params)
saver = tf.train.Saver(var_list=graph_vars)
iter_per_epoch = params['train']['iter_per_epoch']
train_scheduler = MultiStepLR(params['train']['milestone'], params['train']['gamma'])
warmup_scheduler = WarmupLR(iter_per_epoch * params['train']['warmup'])
def train(ph, graph, targets, epoch, data_loader, train_scheduler,
warmup_scheduler, debug=False):
base_lr = train_scheduler.step()
train_log = {}
print('Epoch {}: lr decay = {}'.format(epoch, base_lr))
for batch_idx in range(params['train']['iter_per_epoch']):
if epoch < params['train']['warmup']:
lr = base_lr * warmup_scheduler.step()
else:
lr = base_lr
if debug:
print('Epoch {} Batch {}: Learning Decay = {}'.format(epoch, batch_idx, lr))
x, y = data_loader.next_batch(params['train']['batch_size'])
fetch = sess.run(targets['train'],
feed_dict={
ph['x']: x,
ph['y']: y,
ph['lr_decay']: lr,
ph['is_training']: True
}
)
update_loss(fetch, train_log)
print_log('train', epoch, train_log)
logger.print(epoch, 'train', train_log)
def eval(ph, graph, targets, epoch, domain, data_loader):
#base_lr = train_scheduler.step()
eval_log = {}
for batch_idx in range(params[domain]['iter_per_epoch']):
x, y = data_loader.next_batch(params[domain]['batch_size'])
fetch = sess.run(targets['eval'],
feed_dict={
ph['x']: x,
ph['y']: y,
ph['is_training']: False
}
)
update_loss(fetch, eval_log)
print_log(domain, epoch, eval_log)
logger.print(epoch, domain, eval_log)
return np.mean(eval_log['acc_loss'])
gpu_options = tf.GPUOptions(allow_growth=True)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=True))
sess.run(tf.global_variables_initializer())
max_valid_acc = 0
for epoch in range(params['train']['num_epoches']):
train(ph, graph, targets, epoch, train_loader,
train_scheduler, warmup_scheduler)
valid_acc = eval(ph, graph, targets, epoch, 'test', test_loader)
if valid_acc > max_valid_acc:
max_valid_acc = valid_acc
print('== Max Test Accuracy: {}'.format(max_valid_acc))
if epoch % params['train']['save_interval'] == 0:
saver.save(sess, os.path.join(model_dir, 'epoch{}'.format(epoch), 'vgg.ckpt'))