-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcall_DynComputeEmbeddingsRobust_2.lua
206 lines (178 loc) · 7.81 KB
/
call_DynComputeEmbeddingsRobust_2.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
-- call_DynComputeEmbeddingsRobust_2.lua
-- Compute latent space embeddings using trained image autoencoder
-- and export to .csv file.
-- BioHPC Environment configurations
-- module add cuda80; module add torch; module add cudnn/5.1.5;
-- or Singularity containter:
-- singularity pull shub://andrewjUTSW/openLCH:latest
-- singularity exec --nv openLCH_latest.sif /bin/bash -c '<code>'
-- th ./call_DynComputeEmbeddingsRobust_2.lua \
-- -autoencoder output/autoencoder_eval.t7 \
-- -imsize 256 \
-- -dataProvider DynDataProviderRobust_2 \
-- -imPathFile imageList3.txt \
-- -batchSize 100 \
-- -batchSizeLoad 20000 \
-- -miniBatchSizeLoad 2500 \
-- -gpu 2 \
-- -useParallel 1 \
-- -numThreads 3 \
-- -embeddingFile output/embeddings_sampleTest.csv
local optim = require 'optim'
local gnuplot = require 'gnuplot'
local image = require 'image'
local cuda = pcall(require, 'cutorch') -- Use CUDA if available
local hasCudnn, cudnn = pcall(require, 'cudnn') -- Use cuDNN if available
require 'dpnn'
require 'paths'
require 'imtools_LCH'
require 'utils'
require 'nn'
require 'torchx'
require 'cunn' -- https://github.com/soumith/cudnn.torch/issues/129
require 'debugRepl'
log = require 'log'
------
cmd = torch.CmdLine()
-- Input Image config
cmd:option('-t7ImageDataOptsfile', '', 't7 opts file for loading image data')
cmd:option('-imPathFile', '', 'text file with image paths per line')
cmd:option('-dataProvider', 'DynDataProviderRobust_2', 'data provider object')
cmd:option('-imsize', 256, 'data set ImageSize, if different will be rescaled.')
cmd:option('-useParallel', 0, 'attempt to load images with parallel threads')
cmd:option('-numThreads', 10, 'attempt to load images with parallel threads')
-- autoencoder and output
cmd:option('-autoencoder', 'autoencoder t7 file to load')
cmd:option('-embeddingFile', 'embeddings.csv', 'path to emebdding csv text readable data file')
-- Compute config
cmd:option('-gpu', 1, 'Which GPU device to use')
cmd:option('-batchSize', 100, 'batch size for procressing AAE emebddings in CUDA on GPU')
cmd:option('-batchSizeLoad', 10000, 'batch size for pre-loading images from disk into memory')
cmd:option('-miniBatchSizeLoad', 2500, 'mini batch size for DynDataProvider Parallel loading images into RAM')
-- Image Pre-processing config (feeds into the dataloader)
cmd:option('-lpf', 0, 'Low-pass Filter, Gaussian Blur Sigman')
cmd:option('-gaussSigmaIn', 0, 'Gaussian sigma (in percentge of image size) for masking/multiplying by training images')
cmd:option('-lcn', 0, 'Local Contast Normalization performed on all images')
cmd:option('-lpfKernelSize', 55, 'Low-pass Filter, Gaussian Blur kernel Size')
cmd:option('-illumShift', 0, 'illumination shift in images')
opts = cmd:parse(arg)
print(opts)
cutorch.setDevice(opts.gpu)
ef = opts.embeddingFile;
log.outfile = string.sub(ef, 1, #ef-4) .. '.log'
log.level = 'trace'
log.trace('Initializing DataProvider')
if opts.t7ImageDataOptsfile ~= '' then
optsData = torch.load(opts.t7ImageDataOptsfile)
log.trace('==========[LOAD DATASET from OPTS t7 file]=========')
DataProvider = require(optsData.dataProvider)
data = DataProvider.create(optsData)
else
print('==========[READING IMAGES from image path list file]====\n')
local tpaths = {}
tpaths['train'] = utils.readlines_from(opts.imPathFile) --
print('Number of image paths for training: ' .. #tpaths['train'])
print('==========[LOAD DATASET]=============================\n')
opts.paths = tpaths
DataProvider = require(opts.dataProvider)
data = DataProvider.create(opts)
end
log.trace('==========[SUCCESS]===================================')
log.trace('set up logger')
collectgarbage()
collectgarbage()
log.trace('==========[LOAD AUTOENCODER]==========================')
autoencoder = nil
print(opts.autoencoder)
autoencoder = torch.load(opts.autoencoder)
log.trace('=========[DONE loading AUTOENCODER]===================')
autoencoder:clearState()
autoencoder:evaluate()
collectgarbage()
log.trace('Converting to cuda')
autoencoder:cuda()
autoencoder:evaluate()
log.trace('==========[SUCCESS!]================================\n')
log.trace('==========[verifying AUTOENCODER ...]==================')
local xTest = data:getImages(torch.LongTensor{1,1}, nil, nil, nil, opts.gaussSigma, opts.lcn, opts.lpf, opts.lpfKernelSize, opts.illumShift)
local xHat_auto = autoencoder:forward(xTest:cuda())
local codes = autoencoder.modules[1].output
local xHat_auto2 = autoencoder.modules[2]:forward(codes)
local nZ = codes:size(2)
log.trace('==========[DONE verifying AUTOENCODER...]=============')
log.trace('==========[Begin Computing EMBEDDINGS]================')
ndat = #data.train.paths
local indices1 = torch.linspace(1,ndat,ndat):long():split(opts.batchSizeLoad)
tcodes = {}
xTest = nil
codes = nil
-- Pre-allocated memory for embeddings
local embeddings = torch.zeros(ndat, nZ);
local i = 1;
for t1,v1 in ipairs(indices1) do
collectgarbage()
collectgarbage()
-- First load big batch batchSizeLoad into memory
log.trace('=========[ batch # '.. i .. ' ]=========')
log.trace('=========[loading big batch # ' .. opts.batchSizeLoad .. ' into memory]=========')
local x1, x_out1 = nil
sys.tic()
if opts.useParallel == 1 then
log.warn('Attempting parallel data load')
x1, x_out1 = data:getImagesParallel(v1, nil, nil, opts.miniBatchSizeLoad, nil, opts.gaussSigma, opts.lcn, opts.lpf, opts.lpfKernelSize, opts.illumShift)
else
x1, x_out1 = data:getImages(v1, nil, nil, nil, opts.gaussSigma, opts.lcn, opts.lpf, opts.lpfKernelSize, opts.illumShift)
end
local dataloadTime = sys.toc()
log.trace('***************** DATA LOAD TOTAL time: ' .. dataloadTime..' *********************')
log.trace('=========[DONE]=========')
local indicesG = torch.linspace(torch.min(v1), torch.max(v1), v1:size(1)):long():split(opts.batchSize)
local indices = torch.linspace(1, x1:size(1), x1:size(1)):long():split(opts.batchSize)
local j = 1;
log.trace('=========[Start] << ' .. i ..' >>mini-batch embeddings]=========')
for t,v in ipairs(indices) do
autoencoder:evaluate()
local start = torch.min(v);
local stop = torch.max(v);
local startG = torch.min(indicesG[j]);
local stopG = torch.max(indicesG[j]);
local x = x1:index(1, v)
local s = sys.tic()
if x:size(1) == 1 then
x = torch.cat(x,x,1) -- CUDA needs at least 2 tensors for some reason?
local xHat = autoencoder:forward(x:cuda())
local codes = autoencoder.modules[1].output
embeddings:sub(startG, stopG, 1, nZ):copy(codes[1])
else
sys.tic()
local xHat = autoencoder:forward(x:cuda())
local codes = autoencoder.modules[1].output
embeddings:sub(startG, stopG, 1, nZ):copy(codes)
end
local GPUtime = sys.toc();
if j % 20 == 0 then
-- debugRepl()
log.trace(' Start (absolute) : '..startG .. ' ')
log.trace(' Stop (absolute) : '..stopG.. ' ')
log.trace(' Start (batch) : '..start.. ' ')
log.trace(' Stop (batch) : '..stop.. ' ')
log.trace('GPU time:' .. GPUtime)
xlua.progress(j, #indices);
end
j = j + 1;
end
log.trace('=========[DONE with mini-batch embeddings]=========')
log.trace('=========^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^=========')
log.trace('&&*****************DATA LOAD TOTAL time:' .. dataloadTime..'*********************')
log.trace(' ')
log.trace(xlua.progress(i, #indices1)); i = i + 1;
log.trace(' ')
log.trace('&&*****************DATA LOAD TOTAL time:' .. dataloadTime..'*********************')
collectgarbage()
collectgarbage()
end
log.trace('=========[converting embeddings to table]=========')
log.trace('=========[writing out embeddgins to .csv file...]=========')
utils.csv_write_tensor(opts.embeddingFile, embeddings, sep)
log.trace('=========[COMPLETE]=========')
collectgarbage()