-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
169 lines (132 loc) · 4.63 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import xml.etree.ElementTree as ET
import torch
from torchvision import transforms
import cv2
import math
# function to get bounding box from xml file
def extract_bbox(path):
root = ET.parse(path).getroot()
size = []
bbox = {}
for i in root.findall('size'):
for j in i:
size.append(int(j.text))
for i in root.findall('object/bndbox'):
for j in i:
bbox.update({j.tag: int(j.text)})
return bbox, size
# function to resize bbox
def resize_bbox(bbox, new_size=(224, 224), old_size=None):
new_w, new_h = new_size
assert old_size != None, 'old_size should not be None'
old_w, old_h, ch = old_size
w_ratio, h_ratio = new_w / old_w, new_h / old_h
xmin, ymin, xmax, ymax = bbox
xmin, xmax = xmin * w_ratio, xmax * w_ratio
ymin, ymax = ymin * h_ratio, ymax * h_ratio
return xmin, ymin, xmax, ymax
# resize transforms image and bbox
def pascal2yolo(bbox, size):
# image will in format : tensor shape: (batch_size, 4) (xmin, ymin, xmax, ymax)
bbox1 = bbox.clone().detach()
w, h = size
bbox1[:, 3] = bbox1[:, 3] - bbox1[:, 1]
bbox1[:, 2] = bbox1[:, 2] - bbox1[:, 0]
bbox1[:, 0] = bbox1[:, 0] + (bbox1[:, 2] / 2)
bbox1[:, 1] = bbox1[:, 1] + (bbox1[:, 3] / 2)
bbox1[:, [0, 2]] = bbox1[:, [0, 2]] / w
bbox1[:, [1, 3]] = bbox1[:, [1, 3]] / h
return bbox1
def yolo2pascal(bbox, size):
# image will in format : tensor shape: (batch_size, 4) (xmin, ymin, xmax, ymax)
bbox1 = bbox.clone().detach()
w, h = size
bbox1[:, [0, 2]] = bbox1[:, [0, 2]] * w
bbox1[:, [1, 3]] = bbox1[:, [1, 3]] * h
bbox1[:, 0] = bbox1[:, 0] - (bbox1[:, 2] / 2)
bbox1[:, 1] = bbox1[:, 1] - (bbox1[:, 3] / 2)
bbox1[:, 3] = bbox1[:, 1] + (bbox1[:, 3])
bbox1[:, 2] = bbox1[:, 0] + (bbox1[:, 2])
return bbox1
# function to assign bbox to a cell in final feature map
def assign_cell(bbox, grid_size=(7, 7)):
cx, cy, _, _, _ = bbox
w, h = grid_size
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_h = torch.tensor([1 / h]).to(device)
base_w = torch.tensor([1 / w]).to(device)
px = torch.div(cx, base_w, rounding_mode='floor')
py = torch.div(cy, base_h, rounding_mode='floor')
px = int(px.item())
py = int(py.item())
if px >= w:
px = w - 1
if py >= h:
py = h - 1
return (py, px)
def read_image(path):
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image
def get_wh(old_size, new_size):
w, h = old_size
aspect_ratio = w / h
if w < h:
w_ = new_size
h_ = w_ / aspect_ratio
else:
h_ = new_size
w_ = h_ * aspect_ratio
return int(w_), int(h_)
def yolo2coco(bbox, size):
w, h = size
bbox[:, 0] = bbox[:, 0] - (bbox[:, 2] / 2)
bbox[:, 1] = bbox[:, 1] - (bbox[:, 3] / 2)
bbox[:, [0, 2]] = bbox[:, [0, 2]] * w
bbox[:, [1, 3]] = bbox[:, [1, 3]] * h
return bbox
# generating anchor box shape: 49*4 wh
def generate_boxes(dim):
a = [1/1, 2/1, 1/2, 3/1]
scale = [0.1, 0.15, 0.25, 0.35]
prior_boxes = []
for k, ratio in enumerate(a):
for i in range(dim):
for j in range(dim):
cx = (j + 0.5) / dim
cy = (i + 0.5) / dim
prior_boxes.append(
[cx, cy, scale[k] * math.sqrt(ratio), scale[k] / math.sqrt(ratio)])
return torch.tensor(prior_boxes)
# This function is from https://github.com/kuangliu/pytorch-ssd.
def iou(box1, box2):
'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
Args:
box1: (tensor) bounding boxes, sized [N,4].
box2: (tensor) bounding boxes, sized [M,4].
Return:
(tensor) iou, sized [N,M].
'''
N = box1.size(0)
M = box2.size(0)
lt = torch.max(
# [N,2] -> [N,1,2] -> [N,M,2]
box1[:, :2].unsqueeze(1).expand(N, M, 2),
# [M,2] -> [1,M,2] -> [N,M,2]
box2[:, :2].unsqueeze(0).expand(N, M, 2),
)
rb = torch.min(
# [N,2] -> [N,1,2] -> [N,M,2]
box1[:, 2:].unsqueeze(1).expand(N, M, 2),
# [M,2] -> [1,M,2] -> [N,M,2]
box2[:, 2:].unsqueeze(0).expand(N, M, 2),
)
wh = rb - lt # [N,M,2]
wh[wh < 0] = 0 # clip at 0
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
area1 = (box1[:, 2]-box1[:, 0]) * (box1[:, 3]-box1[:, 1]) # [N,]
area2 = (box2[:, 2]-box2[:, 0]) * (box2[:, 3]-box2[:, 1]) # [M,]
area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M]
area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M]
iou = inter / (area1 + area2 - inter)
return iou