diff --git a/data/base_dataset.py b/data/base_dataset.py index a061a05edb0..3968a25cc0e 100644 --- a/data/base_dataset.py +++ b/data/base_dataset.py @@ -32,8 +32,12 @@ def get_transform(opt): transform_list.append(transforms.RandomHorizontalFlip()) transform_list += [transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), - (0.5, 0.5, 0.5))] + # this is wrong! because the fake samples are not normalized like this, + # still they are inferred on the same network, + #transforms.Normalize((0.5, 0.5, 0.5), + # (0.5, 0.5, 0.5)) + lambda x: (x - x.min()) / x.max() * 2 - 1, # [-1., 1.] + ] return transforms.Compose(transform_list) def __scale_width(img, target_width): diff --git a/models/cycle_gan_model.py b/models/cycle_gan_model.py index b3c52c7f630..97be1c05572 100644 --- a/models/cycle_gan_model.py +++ b/models/cycle_gan_model.py @@ -199,6 +199,20 @@ def get_current_visuals(self): return OrderedDict([('real_A', real_A), ('fake_B', fake_B), ('rec_A', rec_A), ('real_B', real_B), ('fake_A', fake_A), ('rec_B', rec_B)]) + def forward_external(self, x, direction): + isBatch = x.size(0) > 1 + if direction == 'AtoB': + real_A = Variable(x, volatile=True) + fake_B = self.netG_A.forward(real_A) + return util.tensor2im(fake_B.data, batch=isBatch) + elif direction == 'BtoA': + real_B = Variable(x, volatile=True) + fake_A = self.netG_B.forward(real_B) + return util.tensor2im(fake_A.data, batch=isBatch) + + raise ValueError('`direction must` be "AtoB" or "BtoA"') + + def save(self, label): self.save_network(self.netG_A, 'G_A', label, self.gpu_ids) self.save_network(self.netD_A, 'D_A', label, self.gpu_ids) diff --git a/models/networks.py b/models/networks.py index 12da13bd49e..13048ede468 100644 --- a/models/networks.py +++ b/models/networks.py @@ -131,6 +131,20 @@ def __call__(self, input, target_is_real): # downsampling/upsampling operations. # Code and idea originally from Justin Johnson's architecture. # https://github.com/jcjohnson/fast-neural-style/ + +class Printer(nn.Module): + def __init__(self, text='', only_size=True): + super(Printer, self).__init__() + self.only_size = only_size + self.text = text + def forward(self, x): + print(self.text, end=' ') + if self.only_size: + print(x.size()) + else: + print(x) + return x + class ResnetGenerator(nn.Module): def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'): assert(n_blocks >= 0) @@ -149,9 +163,17 @@ def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_d for i in range(n_downsampling): mult = 2**i model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, - stride=2, padding=1), + stride=1, padding=1), + nn.MaxPool2d(2), norm_layer(ngf * mult * 2), - nn.ReLU(True)] + nn.ReLU(True), + #Printer('downsample %d'%mult) + ] + # model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, + # stride=2, padding=1), + # norm_layer(ngf * mult * 2), + # nn.ReLU(True), + # Printer('downsample %d'%mult)] mult = 2**n_downsampling for i in range(n_blocks): @@ -159,11 +181,22 @@ def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_d for i in range(n_downsampling): mult = 2**(n_downsampling - i) - model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), - kernel_size=3, stride=2, - padding=1, output_padding=1), - norm_layer(int(ngf * mult / 2)), - nn.ReLU(True)] + model += [ + nn.UpsamplingBilinear2d(scale_factor=2), + # nn.Upsample(scale_factor=2, mode='nearest'), + nn.Conv2d(ngf * mult, int(ngf * mult / 2), 3, padding=1), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True), + #Printer('upsample %d'%mult) + ] + + # model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + # kernel_size=3, stride=2, + # padding=1, output_padding=1), + # norm_layer(int(ngf * mult / 2)), + # nn.ReLU(True)] + # + model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model += [nn.Tanh()] diff --git a/options/test_options.py b/options/test_options.py index 6b79860fd50..33ba441b261 100644 --- a/options/test_options.py +++ b/options/test_options.py @@ -4,6 +4,7 @@ class TestOptions(BaseOptions): def initialize(self): BaseOptions.initialize(self) + self.parser.add_argument('--input_video', type=str, help='input video path') self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') diff --git a/test-video.py b/test-video.py new file mode 100644 index 00000000000..3ae76410d04 --- /dev/null +++ b/test-video.py @@ -0,0 +1,76 @@ +import cv2 +import time +import os +import sys +import torch as th +from PIL import Image +from torchvision import transforms +from options.test_options import TestOptions +from data.data_loader import CreateDataLoader +from models.models import create_model +from util.visualizer import Visualizer +from pdb import set_trace as st +from util import html + + +opt = TestOptions().parse() +opt.nThreads = 1 # test code only supports nThreads = 1 +opt.batchSize = 1 # test code only supports batchSize = 1 +opt.serial_batches = True # no shuffle +opt.no_flip = True # no flip + +# video +print(opt.input_video) +video_capture = cv2.VideoCapture(opt.input_video) +W = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) +H = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) +W, H = 640, 480 +#W, H = 128, 128 +#W, H = 256, 256 +length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) +fourcc = cv2.VideoWriter_fourcc(*'XVID') +out_video = cv2.VideoWriter(opt.name+'.avi', fourcc, 20.0, (W, H)) + + +model = create_model(opt) +BUFFER = 14 +# test +it = 0 +while True: + it += 1 + t = time.time() + x = [] + for b in range(BUFFER): + ret, frame = video_capture.read() + if not ret: + break + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = Image.fromarray(frame) + + + + T = transforms.Compose([ + transforms.Scale([W, H]), + transforms.ToTensor(), + #lambda x: x * 2. - 1. + ]) + x += [T(img)[None]] + if len(x) == 0: break + x = th.cat(x, 0) + if opt.gpu_ids[0] > -1: + x = x.cuda(opt.gpu_ids[0]) + y = -model.forward_external(x, 'BtoA') + for frame in y: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + out_video.write(frame) + + print('processed frame... %4d FPS: %5.2f,' % ( + it*BUFFER, BUFFER/(time.time()-t))) + + if not ret: + break + + +out_video.release() +video_capture.release() +print("Ended!") diff --git a/util/util.py b/util/util.py index 781239f7ce7..f35c2fe6f04 100644 --- a/util/util.py +++ b/util/util.py @@ -9,9 +9,13 @@ # Converts a Tensor into a Numpy array # |imtype|: the desired type of the converted numpy array -def tensor2im(image_tensor, imtype=np.uint8): - image_numpy = image_tensor[0].cpu().float().numpy() - image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 +def tensor2im(image_tensor, imtype=np.uint8, batch=False): + if batch: + image_numpy = image_tensor.cpu().float().numpy() + image_numpy = (np.transpose(image_numpy, (0, 2, 3, 1)) + 1) / 2.0 * 255.0 + else: + image_numpy = image_tensor[0].cpu().float().numpy() + image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 return image_numpy.astype(imtype)