-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3000f6d
Showing
29 changed files
with
1,983 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# RUBi : Reducing Unimodal Biases for Visual Question Answering | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
block.bootstrap.pytorch | ||
pytorch_pretrained_bert | ||
h5py |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = '0.0.0' |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
from bootstrap.lib.options import Options | ||
from block.datasets.tdiuc import TDIUC | ||
from block.datasets.vrd import VRD | ||
from block.datasets.vg import VG | ||
from block.datasets.vqa_utils import ListVQADatasets | ||
from .vqa2 import VQA2 | ||
from .vqacp2 import VQACP2 | ||
|
||
def factory(engine=None): | ||
opt = Options()['dataset'] | ||
|
||
dataset = {} | ||
if opt.get('train_split', None): | ||
dataset['train'] = factory_split(opt['train_split']) | ||
if opt.get('eval_split', None): | ||
dataset['eval'] = factory_split(opt['eval_split']) | ||
|
||
return dataset | ||
|
||
def factory_split(split): | ||
opt = Options()['dataset'] | ||
shuffle = ('train' in split) | ||
|
||
if opt['name'] == 'vqacp2': | ||
assert(split in ['train', 'val', 'test']) | ||
samplingans = (opt['samplingans'] and split == 'train') | ||
|
||
dataset = VQACP2( | ||
dir_data=opt['dir'], | ||
split=split, | ||
batch_size=opt['batch_size'], | ||
nb_threads=opt['nb_threads'], | ||
pin_memory=Options()['misc']['cuda'], | ||
shuffle=shuffle, | ||
nans=opt['nans'], | ||
minwcount=opt['minwcount'], | ||
nlp=opt['nlp'], | ||
proc_split=opt['proc_split'], | ||
samplingans=samplingans, | ||
dir_rcnn=opt['dir_rcnn'], | ||
dir_cnn=opt.get('dir_cnn', None), | ||
dir_vgg16=opt.get('dir_vgg16', None), | ||
) | ||
|
||
elif opt['name'] == 'vqacpv2-with-testdev': | ||
assert(split in ['train', 'val', 'test']) | ||
samplingans = (opt['samplingans'] and split == 'train') | ||
dataset = VQACP2( | ||
dir_data=opt['dir'], | ||
split=split, | ||
batch_size=opt['batch_size'], | ||
nb_threads=opt['nb_threads'], | ||
pin_memory=Options()['misc']['cuda'], | ||
shuffle=shuffle, | ||
nans=opt['nans'], | ||
minwcount=opt['minwcount'], | ||
nlp=opt['nlp'], | ||
proc_split=opt['proc_split'], | ||
samplingans=samplingans, | ||
dir_rcnn=opt['dir_rcnn'], | ||
dir_cnn=opt.get('dir_cnn', None), | ||
dir_vgg16=opt.get('dir_vgg16', None), | ||
has_testdevset=True, | ||
) | ||
|
||
elif opt['name'] == 'vqa2': | ||
assert(split in ['train', 'val', 'test']) | ||
samplingans = (opt['samplingans'] and split == 'train') | ||
|
||
if opt['vg']: | ||
assert(opt['proc_split'] == 'trainval') | ||
|
||
# trainvalset | ||
vqa2 = VQA2( | ||
dir_data=opt['dir'], | ||
split='train', | ||
nans=opt['nans'], | ||
minwcount=opt['minwcount'], | ||
nlp=opt['nlp'], | ||
proc_split=opt['proc_split'], | ||
samplingans=samplingans, | ||
dir_rcnn=opt['dir_rcnn']) | ||
|
||
vg = VG( | ||
dir_data=opt['dir_vg'], | ||
split='train', | ||
nans=10000, | ||
minwcount=0, | ||
nlp=opt['nlp'], | ||
dir_rcnn=opt['dir_rcnn_vg']) | ||
|
||
vqa2vg = ListVQADatasets( | ||
[vqa2,vg], | ||
split='train', | ||
batch_size=opt['batch_size'], | ||
nb_threads=opt['nb_threads'], | ||
pin_memory=Options()['misc.cuda'], | ||
shuffle=shuffle) | ||
|
||
if split == 'train': | ||
dataset = vqa2vg | ||
else: | ||
dataset = VQA2( | ||
dir_data=opt['dir'], | ||
split=split, | ||
batch_size=opt['batch_size'], | ||
nb_threads=opt['nb_threads'], | ||
pin_memory=Options()['misc.cuda'], | ||
shuffle=False, | ||
nans=opt['nans'], | ||
minwcount=opt['minwcount'], | ||
nlp=opt['nlp'], | ||
proc_split=opt['proc_split'], | ||
samplingans=samplingans, | ||
dir_rcnn=opt['dir_rcnn']) | ||
dataset.sync_from(vqa2vg) | ||
|
||
else: | ||
dataset = VQA2( | ||
dir_data=opt['dir'], | ||
split=split, | ||
batch_size=opt['batch_size'], | ||
nb_threads=opt['nb_threads'], | ||
pin_memory=Options()['misc.cuda'], | ||
shuffle=shuffle, | ||
nans=opt['nans'], | ||
minwcount=opt['minwcount'], | ||
nlp=opt['nlp'], | ||
proc_split=opt['proc_split'], | ||
samplingans=samplingans, | ||
dir_rcnn=opt['dir_rcnn'], | ||
dir_cnn=opt.get('dir_cnn', None), | ||
) | ||
|
||
return dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
import os | ||
import csv | ||
import copy | ||
import json | ||
import torch | ||
import numpy as np | ||
from os import path as osp | ||
from bootstrap.lib.logger import Logger | ||
from bootstrap.lib.options import Options | ||
from block.datasets.vqa_utils import AbstractVQA | ||
from copy import deepcopy | ||
import random | ||
import tqdm | ||
import h5py | ||
|
||
class VQA2(AbstractVQA): | ||
|
||
def __init__(self, | ||
dir_data='data/vqa2', | ||
split='train', | ||
batch_size=10, | ||
nb_threads=4, | ||
pin_memory=False, | ||
shuffle=False, | ||
nans=1000, | ||
minwcount=10, | ||
nlp='mcb', | ||
proc_split='train', | ||
samplingans=False, | ||
dir_rcnn='data/coco/extract_rcnn', | ||
adversarial=False, | ||
dir_cnn=None | ||
): | ||
|
||
super(VQA2, self).__init__( | ||
dir_data=dir_data, | ||
split=split, | ||
batch_size=batch_size, | ||
nb_threads=nb_threads, | ||
pin_memory=pin_memory, | ||
shuffle=shuffle, | ||
nans=nans, | ||
minwcount=minwcount, | ||
nlp=nlp, | ||
proc_split=proc_split, | ||
samplingans=samplingans, | ||
has_valset=True, | ||
has_testset=True, | ||
has_answers_occurence=True, | ||
do_tokenize_answers=False) | ||
|
||
self.dir_rcnn = dir_rcnn | ||
self.dir_cnn = dir_cnn | ||
self.load_image_features() | ||
# to activate manually in visualization context (notebo# to activate manually in visualization context (notebook) | ||
self.load_original_annotation = True | ||
|
||
if 'bert' in Options()['model.network.name']: | ||
from pytorch_pretrained_bert import BertTokenizer | ||
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | ||
|
||
def add_rcnn_to_item(self, item): | ||
path_rcnn = os.path.join(self.dir_rcnn, '{}.pth'.format(item['image_name'])) | ||
item_rcnn = torch.load(path_rcnn) | ||
item['visual'] = item_rcnn['pooled_feat'] | ||
item['coord'] = item_rcnn['rois'] | ||
item['norm_coord'] = item_rcnn.get('norm_rois', None) | ||
item['nb_regions'] = item['visual'].size(0) | ||
return item | ||
|
||
def add_cnn_to_item(self, item): | ||
image_name = item['image_name'] | ||
if image_name in self.image_names_to_index_train: | ||
index = self.image_names_to_index_train[image_name] | ||
image = torch.tensor(self.image_features_train['att'][index]) | ||
elif image_name in self.image_names_to_index_val: | ||
index = self.image_names_to_index_val[image_name] | ||
image = torch.tensor(self.image_features_val['att'][index]) | ||
image = image.permute(1, 2, 0).view(196, 2048) | ||
item['visual'] = image | ||
return item | ||
|
||
def load_image_features(self): | ||
if self.dir_cnn: | ||
filename_train = os.path.join(self.dir_cnn, 'trainset.hdf5') | ||
filename_val = os.path.join(self.dir_cnn, 'valset.hdf5') | ||
Logger()(f"Opening file {filename_train}, {filename_val}") | ||
self.image_features_train = h5py.File(filename_train, 'r', swmr=True) | ||
self.image_features_val = h5py.File(filename_val, 'r', swmr=True) | ||
# load txt | ||
with open(os.path.join(self.dir_cnn, 'trainset.txt'.format(self.split)), 'r') as f: | ||
self.image_names_to_index_train = {} | ||
for i, line in enumerate(f): | ||
self.image_names_to_index_train[line.strip()] = i | ||
with open(os.path.join(self.dir_cnn, 'valset.txt'.format(self.split)), 'r') as f: | ||
self.image_names_to_index_val = {} | ||
for i, line in enumerate(f): | ||
self.image_names_to_index_val[line.strip()] = i | ||
|
||
def __getitem__(self, index): | ||
item = {} | ||
item['index'] = index | ||
|
||
# Process Question (word token) | ||
question = self.dataset['questions'][index] | ||
if self.load_original_annotation: | ||
item['original_question'] = question | ||
|
||
item['question_id'] = question['question_id'] | ||
|
||
if 'bert' in Options()['model.network.name']: | ||
tokenized_text = self.tokenizer.tokenize(question['question']) | ||
indexed_tokens = self.tokenizer.convert_tokens_to_ids(tokenized_text) | ||
tokens_tensor = torch.tensor(indexed_tokens) | ||
item['question'] = tokens_tensor | ||
item['lengths'] = torch.LongTensor([len(tokenized_text)]) | ||
else: | ||
#question['question_wids'] = question['question_wids'][:26] | ||
item['question'] = torch.tensor(question['question_wids'], dtype=torch.long) | ||
item['lengths'] = torch.tensor([len(question['question_wids'])], dtype=torch.long) | ||
item['image_name'] = question['image_name'] | ||
|
||
# Process Object, Attribut and Relational features | ||
# Process Object, Attribut and Relational features | ||
if self.dir_rcnn: | ||
item = self.add_rcnn_to_item(item) | ||
elif self.dir_cnn: | ||
item = self.add_cnn_to_item(item) | ||
|
||
# Process Answer if exists | ||
if 'annotations' in self.dataset: | ||
annotation = self.dataset['annotations'][index] | ||
if self.load_original_annotation: | ||
item['original_annotation'] = annotation | ||
if 'train' in self.split and self.samplingans: | ||
proba = annotation['answers_count'] | ||
proba = proba / np.sum(proba) | ||
item['answer_id'] = int(np.random.choice(annotation['answers_id'], p=proba)) | ||
else: | ||
item['answer_id'] = annotation['answer_id'] | ||
item['class_id'] = torch.tensor([item['answer_id']], dtype=torch.long) | ||
item['answer'] = annotation['answer'] | ||
item['question_type'] = annotation['question_type'] | ||
else: | ||
if item['question_id'] in self.is_qid_testdev: | ||
item['is_testdev'] = True | ||
else: | ||
item['is_testdev'] = False | ||
|
||
# if Options()['model.network.name'] == 'xmn_net': | ||
# num_feat = 36 | ||
# relation_mask = np.zeros((num_feat, num_feat)) | ||
# boxes = item['coord'] | ||
# for i in range(num_feat): | ||
# for j in range(i+1, num_feat): | ||
# # if there is no overlap between two bounding box | ||
# if boxes[0,i]>boxes[2,j] or boxes[0,j]>boxes[2,i] or boxes[1,i]>boxes[3,j] or boxes[1,j]>boxes[3,i]: | ||
# pass | ||
# else: | ||
# relation_mask[i,j] = relation_mask[j,i] = 1 | ||
# relation_mask = torch.from_numpy(relation_mask).byte() | ||
# item['relation_mask'] = relation_mask | ||
|
||
return item | ||
|
||
def download(self): | ||
dir_zip = osp.join(self.dir_raw, 'zip') | ||
os.system('mkdir -p '+dir_zip) | ||
dir_ann = osp.join(self.dir_raw, 'annotations') | ||
os.system('mkdir -p '+dir_ann) | ||
os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Train_mscoco.zip -P '+dir_zip) | ||
os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Val_mscoco.zip -P '+dir_zip) | ||
os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Questions_Test_mscoco.zip -P '+dir_zip) | ||
os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Annotations_Train_mscoco.zip -P '+dir_zip) | ||
os.system('wget http://visualqa.org/data/mscoco/vqa/v2_Annotations_Val_mscoco.zip -P '+dir_zip) | ||
os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Train_mscoco.zip')+' -d '+dir_ann) | ||
os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Val_mscoco.zip')+' -d '+dir_ann) | ||
os.system('unzip '+osp.join(dir_zip, 'v2_Questions_Test_mscoco.zip')+' -d '+dir_ann) | ||
os.system('unzip '+osp.join(dir_zip, 'v2_Annotations_Train_mscoco.zip')+' -d '+dir_ann) | ||
os.system('unzip '+osp.join(dir_zip, 'v2_Annotations_Val_mscoco.zip')+' -d '+dir_ann) | ||
os.system('mv '+osp.join(dir_ann, 'v2_mscoco_train2014_annotations.json')+' ' | ||
+osp.join(dir_ann, 'mscoco_train2014_annotations.json')) | ||
os.system('mv '+osp.join(dir_ann, 'v2_mscoco_val2014_annotations.json')+' ' | ||
+osp.join(dir_ann, 'mscoco_val2014_annotations.json')) | ||
os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_train2014_questions.json')+' ' | ||
+osp.join(dir_ann, 'OpenEnded_mscoco_train2014_questions.json')) | ||
os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_val2014_questions.json')+' ' | ||
+osp.join(dir_ann, 'OpenEnded_mscoco_val2014_questions.json')) | ||
os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_test2015_questions.json')+' ' | ||
+osp.join(dir_ann, 'OpenEnded_mscoco_test2015_questions.json')) | ||
os.system('mv '+osp.join(dir_ann, 'v2_OpenEnded_mscoco_test-dev2015_questions.json')+' ' | ||
+osp.join(dir_ann, 'OpenEnded_mscoco_test-dev2015_questions.json')) |
Oops, something went wrong.