diff --git a/__pycache__/export.cpython-38.pyc b/__pycache__/export.cpython-38.pyc new file mode 100644 index 000000000..0e6a8dad0 Binary files /dev/null and b/__pycache__/export.cpython-38.pyc differ diff --git a/__pycache__/val_dual.cpython-38.pyc b/__pycache__/val_dual.cpython-38.pyc new file mode 100644 index 000000000..9d8ef23d2 Binary files /dev/null and b/__pycache__/val_dual.cpython-38.pyc differ diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml index fdb2c3788..ef5bdec5c 100644 --- a/data/hyps/hyp.scratch-high.yaml +++ b/data/hyps/hyp.scratch-high.yaml @@ -26,5 +26,5 @@ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 flipud: 0.0 # image flip up-down (probability) fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) -mixup: 0.15 # image mixup (probability) -copy_paste: 0.3 # segment copy-paste (probability) +mixup: 0.5 # image mixup (probability) +copy_paste: 0.0 # segment copy-paste (probability) diff --git a/data/polyp_2.yaml b/data/polyp_2.yaml new file mode 100644 index 000000000..d16693ef4 --- /dev/null +++ b/data/polyp_2.yaml @@ -0,0 +1,18 @@ +# COCO 2017 dataset http://cocodataset.org + +# download command/URL (optional) +# download: bash ./scripts/get_coco.sh + +# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] +# train: ./coco/train2017.txt # 118287 images +# val: ./coco/val2017.txt # 5000 images +# test: ./coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 +train: /work/u6859530/polyp_detection/dataset2/images/train +val: /work/u6859530/polyp_detection/dataset2/images/valid +test: /work/u6859530/polyp_detection/dataset/images/valid + +# number of classes +nc: 2 + +# class names +names: [ 'hyperplastic', 'adenoma'] \ No newline at end of file diff --git a/models/.ipynb_checkpoints/yolo-checkpoint.py b/models/.ipynb_checkpoints/yolo-checkpoint.py new file mode 100644 index 000000000..2681c2308 --- /dev/null +++ b/models/.ipynb_checkpoints/yolo-checkpoint.py @@ -0,0 +1,767 @@ +import argparse +import os +import platform +import sys +from copy import deepcopy +from pathlib import Path + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +if platform.system() != 'Windows': + ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.common import * +from models.experimental import * +from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args +from utils.plots import feature_visualization +from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, + time_sync) +from utils.tal.anchor_generator import make_anchors, dist2bbox + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + + +class Detect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + shape = x[0].shape # BCHW + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + shape = x[0].shape # BCHW + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DualDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 2 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + if self.training: + return [d1, d2] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] + return y if self.export else (y, [d1, d2]) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DualDDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 2 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + if self.training: + return [d1, d2] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] + return y if self.export else (y, [d1, d2]) + #y = torch.cat((dbox2, cls2.sigmoid()), 1) + #return y if self.export else (y, d2) + #y1 = torch.cat((dbox, cls.sigmoid()), 1) + #y2 = torch.cat((dbox2, cls2.sigmoid()), 1) + #return [y1, y2] if self.export else [(y1, d1), (y2, d2)] + #return [y1, y2] if self.export else [(y1, y2), (d1, d2)] + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class TripleDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 3 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + c6, c7 = max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:self.nl*2]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) + self.cv6 = nn.ModuleList( + nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3), nn.Conv2d(c6, 4 * self.reg_max, 1)) for x in ch[self.nl*2:self.nl*3]) + self.cv7 = nn.ModuleList( + nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + self.dfl3 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + d3 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) + if self.training: + return [d1, d2, d3] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) + dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] + return y if self.export else (y, [d1, d2, d3]) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv6, m.cv7, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class TripleDDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 3 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[self.nl], min((self.nc * 2, 128)))) # channels + c6, c7 = make_divisible(max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), + nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), + nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:self.nl*2]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) + self.cv6 = nn.ModuleList( + nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3, g=4), + nn.Conv2d(c6, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl*2:self.nl*3]) + self.cv7 = nn.ModuleList( + nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + self.dfl3 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + d3 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) + if self.training: + return [d1, d2, d3] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) + dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + #y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] + #return y if self.export else (y, [d1, d2, d3]) + y = torch.cat((dbox3, cls3.sigmoid()), 1) + return y if self.export else (y, d3) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv6, m.cv7, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class Segment(Detect): + # YOLO Segment head for segmentation models + def __init__(self, nc=80, nm=32, npr=256, ch=(), inplace=True): + super().__init__(nc, ch, inplace) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + def forward(self, x): + p = self.proto(x[0]) + bs = p.shape[0] + + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p + return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) + + +class Panoptic(Detect): + # YOLO Panoptic head for panoptic segmentation models + def __init__(self, nc=80, sem_nc=93, nm=32, npr=256, ch=(), inplace=True): + super().__init__(nc, ch, inplace) + self.sem_nc = sem_nc + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.uconv = UConv(ch[0], ch[0]//4, self.sem_nc+self.nc) + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + + def forward(self, x): + p = self.proto(x[0]) + s = self.uconv(x[0]) + bs = p.shape[0] + + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p, s + return (torch.cat([x, mc], 1), p, s) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p, s)) + + +class BaseModel(nn.Module): + # YOLO base model + def forward(self, x, profile=False, visualize=False): + return self._forward_once(x, profile, visualize) # single-scale inference, train + + def _forward_once(self, x, profile=False, visualize=False): + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + if profile: + self._profile_one_layer(m, x, dt) + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + if visualize: + feature_visualization(x, m.type, m.i, save_dir=visualize) + return x + + def _profile_one_layer(self, m, x, dt): + c = m == self.model[-1] # is final layer, copy input as inplace fix + o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs + t = time_sync() + for _ in range(10): + m(x.copy() if c else x) + dt.append((time_sync() - t) * 100) + if m == self.model[0]: + LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") + LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') + if c: + LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") + + def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + LOGGER.info('Fusing layers... ') + for m in self.model.modules(): + if isinstance(m, (RepConvN)) and hasattr(m, 'fuse_convs'): + m.fuse_convs() + m.forward = m.forward_fuse # update forward + if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.forward_fuse # update forward + self.info() + return self + + def info(self, verbose=False, img_size=640): # print model information + model_info(self, verbose, img_size) + + def _apply(self, fn): + # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers + self = super()._apply(fn) + m = self.model[-1] # Detect() + if isinstance(m, (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, Panoptic)): + m.stride = fn(m.stride) + m.anchors = fn(m.anchors) + m.strides = fn(m.strides) + # m.grid = list(map(fn, m.grid)) + return self + + +class DetectionModel(BaseModel): + # YOLO detection model + def __init__(self, cfg='yolo.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes + super().__init__() + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg, encoding='ascii', errors='ignore') as f: + self.yaml = yaml.safe_load(f) # model dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + if anchors: + LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') + self.yaml['anchors'] = round(anchors) # override yaml value + self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist + self.names = [str(i) for i in range(self.yaml['nc'])] # default names + self.inplace = self.yaml.get('inplace', True) + + # Build strides, anchors + m = self.model[-1] # Detect() + if isinstance(m, (Detect, DDetect, Segment, Panoptic)): + s = 256 # 2x min stride + m.inplace = self.inplace + forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Panoptic)) else self.forward(x) + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward + # check_anchor_order(m) + # m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + m.bias_init() # only run once + if isinstance(m, (DualDetect, TripleDetect, DualDDetect, TripleDDetect)): + s = 256 # 2x min stride + m.inplace = self.inplace + #forward = lambda x: self.forward(x)[0][0] if isinstance(m, (DualSegment, DualPanoptic)) else self.forward(x)[0] + forward = lambda x: self.forward(x)[0] + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward + # check_anchor_order(m) + # m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + m.bias_init() # only run once + + # Init weights, biases + initialize_weights(self) + self.info() + LOGGER.info('') + + def forward(self, x, augment=False, profile=False, visualize=False): + if augment: + return self._forward_augment(x) # augmented inference, None + return self._forward_once(x, profile, visualize) # single-scale inference, train + + def _forward_augment(self, x): + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + yi = self._forward_once(xi)[0] # forward + # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save + yi = self._descale_pred(yi, fi, si, img_size) + y.append(yi) + y = self._clip_augmented(y) # clip augmented tails + return torch.cat(y, 1), None # augmented inference, train + + def _descale_pred(self, p, flips, scale, img_size): + # de-scale predictions following augmented inference (inverse operation) + if self.inplace: + p[..., :4] /= scale # de-scale + if flips == 2: + p[..., 1] = img_size[0] - p[..., 1] # de-flip ud + elif flips == 3: + p[..., 0] = img_size[1] - p[..., 0] # de-flip lr + else: + x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale + if flips == 2: + y = img_size[0] - y # de-flip ud + elif flips == 3: + x = img_size[1] - x # de-flip lr + p = torch.cat((x, y, wh, p[..., 4:]), -1) + return p + + def _clip_augmented(self, y): + # Clip YOLO augmented inference tails + nl = self.model[-1].nl # number of detection layers (P3-P5) + g = sum(4 ** x for x in range(nl)) # grid points + e = 1 # exclude layer count + i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices + y[0] = y[0][:, :-i] # large + i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices + y[-1] = y[-1][:, i:] # small + return y + + +Model = DetectionModel # retain YOLO 'Model' class for backwards compatibility + + +class SegmentationModel(DetectionModel): + # YOLO segmentation model + def __init__(self, cfg='yolo-seg.yaml', ch=3, nc=None, anchors=None): + super().__init__(cfg, ch, nc, anchors) + + +class ClassificationModel(BaseModel): + # YOLO classification model + def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index + super().__init__() + self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) + + def _from_detection_model(self, model, nc=1000, cutoff=10): + # Create a YOLO classification model from a YOLO detection model + if isinstance(model, DetectMultiBackend): + model = model.model # unwrap DetectMultiBackend + model.model = model.model[:cutoff] # backbone + m = model.model[-1] # last layer + ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module + c = Classify(ch, nc) # Classify() + c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type + model.model[-1] = c # replace + self.model = model.model + self.stride = model.stride + self.save = [] + self.nc = nc + + def _from_yaml(self, cfg): + # Create a YOLO classification model from a *.yaml file + self.model = None + + +def parse_model(d, ch): # model_dict, input_channels(3) + # Parse a YOLO model.yaml dictionary + LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") + anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') + if act: + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + RepConvN.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + LOGGER.info(f"{colorstr('activation:')} {act}") # print + na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + with contextlib.suppress(NameError): + args[j] = eval(a) if isinstance(a, str) else a # eval strings + + n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in { + Conv, AConv, ConvTranspose, + Bottleneck, SPP, SPPF, DWConv, BottleneckCSP, nn.ConvTranspose2d, DWConvTranspose2d, SPPCSPC, ADown, + RepNCSPELAN4, SPPELAN}: + c1, c2 = ch[f], args[0] + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) + + args = [c1, c2, *args[1:]] + if m in {BottleneckCSP, SPPCSPC}: + args.insert(2, n) # number of repeats + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum(ch[x] for x in f) + elif m is Shortcut: + c2 = ch[f[0]] + elif m is ReOrg: + c2 = ch[f] * 4 + elif m is CBLinear: + c2 = args[0] + c1 = ch[f] + args = [c1, c2, *args[1:]] + elif m is CBFuse: + c2 = ch[f[-1]] + # TODO: channel, gw, gd + elif m in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, Panoptic}: + args.append([ch[x] for x in f]) + # if isinstance(args[1], int): # number of anchors + # args[1] = [list(range(args[1] * 2))] * len(f) + if m in {Segment, Panoptic}: + args[2] = make_divisible(args[2] * gw, 8) + elif m is Contract: + c2 = ch[f] * args[0] ** 2 + elif m is Expand: + c2 = ch[f] // args[0] ** 2 + else: + c2 = ch[f] + + m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum(x.numel() for x in m_.parameters()) # number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml') + parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--profile', action='store_true', help='profile model speed') + parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') + parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') + opt = parser.parse_args() + opt.cfg = check_yaml(opt.cfg) # check YAML + print_args(vars(opt)) + device = select_device(opt.device) + + # Create model + im = torch.rand(opt.batch_size, 3, 640, 640).to(device) + model = Model(opt.cfg).to(device) + model.eval() + + # Options + if opt.line_profile: # profile layer by layer + model(im, profile=True) + + elif opt.profile: # profile forward-backward + results = profile(input=im, ops=[model], n=3) + + elif opt.test: # test all models + for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): + try: + _ = Model(cfg) + except Exception as e: + print(f'Error in {cfg}: {e}') + + else: # report fused model summary + model.fuse() diff --git a/models/__pycache__/__init__.cpython-38.pyc b/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..cc7fdc116 Binary files /dev/null and b/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/__pycache__/common.cpython-38.pyc b/models/__pycache__/common.cpython-38.pyc new file mode 100644 index 000000000..51de3c107 Binary files /dev/null and b/models/__pycache__/common.cpython-38.pyc differ diff --git a/models/__pycache__/experimental.cpython-38.pyc b/models/__pycache__/experimental.cpython-38.pyc new file mode 100644 index 000000000..5cb8b1d62 Binary files /dev/null and b/models/__pycache__/experimental.cpython-38.pyc differ diff --git a/models/__pycache__/yolo.cpython-38.pyc b/models/__pycache__/yolo.cpython-38.pyc new file mode 100644 index 000000000..cf21b2541 Binary files /dev/null and b/models/__pycache__/yolo.cpython-38.pyc differ diff --git a/models/common.py b/models/common.py index c7fbbe322..8946ccb29 100644 --- a/models/common.py +++ b/models/common.py @@ -413,7 +413,7 @@ def forward(self, x): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) - + class ASPP(torch.nn.Module): def __init__(self, in_channels, out_channels): @@ -493,8 +493,8 @@ def forward(self, x): import torch.nn.functional as F from torch.nn.modules.utils import _pair - - + + class ReOrg(nn.Module): # yolo def __init__(self): @@ -549,8 +549,8 @@ def __init__(self, dimension=0): def forward(self, x): return x[0]+x[1] - - + + class Silence(nn.Module): def __init__(self): super(Silence, self).__init__() @@ -558,8 +558,8 @@ def forward(self, x): return x -##### GELAN ##### - +# #### GELAN ##### + class SPPELAN(nn.Module): # spp-elan def __init__(self, c1, c2, c3): # ch_in, ch_out, number, shortcut, groups, expansion @@ -575,8 +575,8 @@ def forward(self, x): y = [self.cv1(x)] y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4]) return self.cv5(torch.cat(y, 1)) - - + + class RepNCSPELAN4(nn.Module): # csp-elan def __init__(self, c1, c2, c3, c4, c5=1): # ch_in, ch_out, number, shortcut, groups, expansion @@ -597,10 +597,10 @@ def forward_split(self, x): y.extend(m(y[-1]) for m in [self.cv2, self.cv3]) return self.cv4(torch.cat(y, 1)) -################# +# ################ -##### YOLOR ##### +# #### YOLOR ##### class ImplicitA(nn.Module): def __init__(self, channel): @@ -623,10 +623,10 @@ def __init__(self, channel): def forward(self, x): return self.implicit * x -################# +# ################ -##### CBNet ##### +# #### CBNet ##### class CBLinear(nn.Module): def __init__(self, c1, c2s, k=1, s=1, p=None, g=1): # ch_in, ch_outs, kernel, stride, padding, groups @@ -649,7 +649,7 @@ def forward(self, xs): out = torch.sum(torch.stack(res + xs[-1:]), dim=0) return out -################# +# ################ class DetectMultiBackend(nn.Module): diff --git a/models/yolo.py b/models/yolo.py index ad3d8fee7..2681c2308 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -464,7 +464,7 @@ def forward(self, x): if self.training: return x, mc, p, s return (torch.cat([x, mc], 1), p, s) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p, s)) - + class BaseModel(nn.Module): # YOLO base model diff --git a/requirements.txt b/requirements.txt index 1f96afcb0..6d8767b7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,6 @@ # Usage: pip install -r requirements.txt # Base ------------------------------------------------------------------------ -gitpython -ipython matplotlib>=3.2.2 numpy>=1.18.5 opencv-python>=4.1.1 diff --git a/run.sh b/run.sh new file mode 100755 index 000000000..377a698a2 --- /dev/null +++ b/run.sh @@ -0,0 +1,7 @@ +# train yolov9 models +python train_dual.py --workers 8 --device 0 --batch 16 --data data/polyp_2.yaml --img 640 --cfg models/detect/yolov9-e.yaml --weights '' --name v9-e_1c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 300 --close-mosaic 0 --exist-ok --single-cls +python val_dual.py --data data/polyp_2.yaml --img 640 --batch 32 --conf 0.25 --iou 0.65 --device 0 --weights 'runs/v9-e_1c/weights/best.pt' --name v9-c_1c --exist-ok --single-cls +python train_dual.py --workers 8 --device 0 --batch 16 --data data/polyp_2.yaml --img 640 --cfg models/detect/yolov9-e.yaml --weights '' --name v9-e_2c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 300 --close-mosaic 0 --exist-ok +python val_dual.py --data data/polyp_2.yaml --img 640 --batch 32 --conf 0.25 --iou 0.65 --device 0 --weights 'runs/v9-e_2c/weights/best.pt' --name v9-e_2c --exist-ok +python train_dual.py --workers 8 --device 0 --batch 16 --data data/polyp_2.yaml --img 640 --cfg models/detect/yolov9-c.yaml --weights '' --name v9-c_1c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 300 --close-mosaic 0 --exist-ok --single-cls +python val_dual.py --data data/polyp_2.yaml --img 640 --batch 32 --conf 0.25 --iou 0.65 --device 0 --weights 'runs/v9-c_1c/weights/best.pt' --name v9-c_1c --exist-ok --single-cls diff --git a/train_dual.py b/train_dual.py index 1d21ac8f5..458a30ca7 100644 --- a/train_dual.py +++ b/train_dual.py @@ -56,7 +56,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze callbacks.run('on_pretrain_routine_start') - # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir @@ -117,7 +116,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP - # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): @@ -285,7 +283,11 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() + #tt0 = time.time()*1000 for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- + #tt1 = time.time()*1000 + #print("\n loading time: ", tt1-tt0) + #tt0 = tt1 callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 @@ -341,6 +343,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return + #print("\nend: ", time.time()*1000-t_start, "ms") # end batch ------------------------------------------------------------------------------------------------ # Scheduler @@ -462,7 +465,7 @@ def parse_opt(known=False): parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') + parser.add_argument('--project', default=ROOT / 'runs/', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') diff --git a/utils/.ipynb_checkpoints/augment-checkpoint.py b/utils/.ipynb_checkpoints/augment-checkpoint.py new file mode 100644 index 000000000..1ac08117d --- /dev/null +++ b/utils/.ipynb_checkpoints/augment-checkpoint.py @@ -0,0 +1,970 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +import random +from copy import deepcopy + +import cv2 +import numpy as np +import torch +import torchvision.transforms as T + +from utils.instance import Instances +from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy +from utils.metrics import bbox_ioa + +POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + + +def polygons2masks(imgsz, polygons, color, downsample_ratio=1): + """ + Args: + imgsz (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], N is number of polygons, M is number of points (M % 2 = 0) + color (int): color + downsample_ratio (int): downsample ratio + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(imgsz, [polygons[si].reshape(-1)], color, downsample_ratio) + masks.append(mask) + return np.array(masks) +def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index +def polygons2masks_multi(imgsz, segments, cls, downsample_ratio=1, nc=3): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((nc, imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + mc = [] + for si in range(len(segments)): + mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) + ms.append(mask) + mc.append(int(cls[si][0])) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + cls_id = mc[i] + masks[cls_id] = masks[cls_id] + ms[i] + masks[cls_id] = np.clip(masks[cls_id], a_min=0, a_max=1) + return masks, index + + +class Compose: + + def __init__(self, transforms): + """Initializes the Compose object with a list of transforms.""" + self.transforms = transforms + + def __call__(self, data): + """Applies a series of transformations to input data.""" + for t in self.transforms: + data = t(data) + return data + + def append(self, transform): + """Appends a new transform to the existing list of transforms.""" + self.transforms.append(transform) + + def tolist(self): + """Converts list of transforms to a standard Python list.""" + return self.transforms + + def __repr__(self): + """Return string representation of object.""" + format_string = f'{self.__class__.__name__}(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string + + +class BaseMixTransform: + """This implementation is from mmyolo.""" + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + self.dataset = dataset + self.pre_transform = pre_transform + self.p = p + + def __call__(self, labels): + """Applies pre-processing transforms and mixup/mosaic transforms to labels data.""" + if random.uniform(0, 1) > self.p: + return labels + + # Get index of one or three other images + indexes = self.get_indexes() + if isinstance(indexes, int): + indexes = [indexes] + + # Get images information will be used for Mosaic or MixUp + mix_labels = [self.dataset.get_image_and_label(i) for i in indexes] + + if self.pre_transform is not None: + for i, data in enumerate(mix_labels): + mix_labels[i] = self.pre_transform(data) + labels['mix_labels'] = mix_labels + + # Mosaic or MixUp + labels = self._mix_transform(labels) + labels.pop('mix_labels', None) + return labels + + def _mix_transform(self, labels): + """Applies MixUp or Mosaic augmentation to the label dictionary.""" + raise NotImplementedError + + def get_indexes(self): + """Gets a list of shuffled indexes for mosaic augmentation.""" + raise NotImplementedError + + +class Mosaic(BaseMixTransform): + """ + Mosaic augmentation. + + This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. + The augmentation is applied to a dataset with a given probability. + + Attributes: + dataset: The dataset on which the mosaic augmentation is applied. + imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640. + p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0. + n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3). + """ + + def __init__(self, dataset, imgsz=640, p=1.0, n=4): + """Initializes the object with a dataset, image size, probability, and border.""" + assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.' + assert n in (4, 9), 'grid must be equal to 4 or 9.' + super().__init__(dataset=dataset, p=p) + self.dataset = dataset + self.imgsz = imgsz + self.border = (-imgsz // 2, -imgsz // 2) # width, height + self.n = n + + def get_indexes(self, buffer=True): + """Return a list of random indexes from the dataset.""" + if buffer: # select images from buffer + return random.choices(list(self.dataset.buffer), k=self.n - 1) + else: # select any images + return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)] + + def _mix_transform(self, labels): + """Apply mixup transformation to the input image and labels.""" + assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.' + assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.' + return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels) + + def _mosaic4(self, labels): + """Create a 2x2 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y + for i in range(4): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels_patch = self._update_labels(labels_patch, padw, padh) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + final_labels['img'] = img4 + return final_labels + + def _mosaic9(self, labels): + """Create a 3x3 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + hp, wp = -1, -1 # height, width previous + for i in range(9): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padw, padh = c[:2] + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + + # Image + img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous for next iteration + + # Labels assuming imgsz*2 mosaic size + labels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1]) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + + final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]] + return final_labels + + @staticmethod + def _update_labels(labels, padw, padh): + """Update labels.""" + nh, nw = labels['img'].shape[:2] + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(nw, nh) + labels['instances'].add_padding(padw, padh) + return labels + + def _cat_labels(self, mosaic_labels): + """Return labels with mosaic border instances clipped.""" + if len(mosaic_labels) == 0: + return {} + cls = [] + instances = [] + imgsz = self.imgsz * 2 # mosaic imgsz + for labels in mosaic_labels: + cls.append(labels['cls']) + instances.append(labels['instances']) + final_labels = { + 'im_file': mosaic_labels[0]['im_file'], + 'ori_shape': mosaic_labels[0]['ori_shape'], + 'resized_shape': (imgsz, imgsz), + 'cls': np.concatenate(cls, 0), + 'instances': Instances.concatenate(instances, axis=0), + 'mosaic_border': self.border} # final_labels + final_labels['instances'].clip(imgsz, imgsz) + good = final_labels['instances'].remove_zero_area_boxes() + final_labels['cls'] = final_labels['cls'][good] + return final_labels + + +class MixUp(BaseMixTransform): + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) + + def get_indexes(self): + """Get a random index from the dataset.""" + return random.randint(0, len(self.dataset) - 1) + + def _mix_transform(self, labels): + """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf.""" + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + labels2 = labels['mix_labels'][0] + labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8) + labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0) + labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0) + return labels + + +class RandomPerspective: + + def __init__(self, + degrees=0.0, + translate=0.1, + scale=0.5, + shear=0.0, + perspective=0.0, + border=(0, 0), + pre_transform=None): + self.degrees = degrees + self.translate = translate + self.scale = scale + self.shear = shear + self.perspective = perspective + # Mosaic border + self.border = border + self.pre_transform = pre_transform + + def affine_transform(self, img, border): + """Center.""" + C = np.eye(3, dtype=np.float32) + + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3, dtype=np.float32) + P[2, 0] = random.uniform(-self.perspective, self.perspective) # x perspective (about y) + P[2, 1] = random.uniform(-self.perspective, self.perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3, dtype=np.float32) + a = random.uniform(-self.degrees, self.degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - self.scale, 1 + self.scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3, dtype=np.float32) + S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3, dtype=np.float32) + T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0] # x translation (pixels) + T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1] # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + # Affine image + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if self.perspective: + img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114)) + return img, M, s + + def apply_bboxes(self, bboxes, M): + """ + Apply affine to bboxes only. + + Args: + bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4). + M (ndarray): affine matrix. + + Returns: + new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4]. + """ + n = len(bboxes) + if n == 0: + return bboxes + + xy = np.ones((n * 4, 3), dtype=bboxes.dtype) + xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # Create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T + + def apply_segments(self, segments, M): + """ + Apply affine to segments and generate new bboxes from segments. + + Args: + segments (ndarray): list of segments, [num_samples, 500, 2]. + M (ndarray): affine matrix. + + Returns: + new_segments (ndarray): list of segments after affine, [num_samples, 500, 2]. + new_bboxes (ndarray): bboxes after affine, [N, 4]. + """ + n, num = segments.shape[:2] + if n == 0: + return [], segments + + xy = np.ones((n * num, 3), dtype=segments.dtype) + segments = segments.reshape(-1, 2) + xy[:, :2] = segments + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] + segments = xy.reshape(n, -1, 2) + bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0) + return bboxes, segments + + def apply_keypoints(self, keypoints, M): + """ + Apply affine to keypoints. + + Args: + keypoints (ndarray): keypoints, [N, 17, 3]. + M (ndarray): affine matrix. + + Return: + new_keypoints (ndarray): keypoints after affine, [N, 17, 3]. + """ + n, nkpt = keypoints.shape[:2] + if n == 0: + return keypoints + xy = np.ones((n * nkpt, 3), dtype=keypoints.dtype) + visible = keypoints[..., 2].reshape(n * nkpt, 1) + xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2) + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine + out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1]) + visible[out_mask] = 0 + return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3) + + def __call__(self, labels): + """ + Affine images and targets. + + Args: + labels (dict): a dict of `bboxes`, `segments`, `keypoints`. + """ + if self.pre_transform and 'mosaic_border' not in labels: + labels = self.pre_transform(labels) + labels.pop('ratio_pad') # do not need ratio pad + + img = labels['img'] + cls = labels['cls'] + instances = labels.pop('instances') + # Make sure the coord formats are right + instances.convert_bbox(format='xyxy') + instances.denormalize(*img.shape[:2][::-1]) + + border = labels.pop('mosaic_border', self.border) + self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h + # M is affine matrix + # scale for func:`box_candidates` + img, M, scale = self.affine_transform(img, border) + + bboxes = self.apply_bboxes(instances.bboxes, M) + + segments = instances.segments + keypoints = instances.keypoints + # Update bboxes if there are segments. + if len(segments): + bboxes, segments = self.apply_segments(segments, M) + + if keypoints is not None: + keypoints = self.apply_keypoints(keypoints, M) + new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False) + # Clip + new_instances.clip(*self.size) + + # Filter instances + instances.scale(scale_w=scale, scale_h=scale, bbox_only=True) + # Make the bboxes have the same scale with new_bboxes + i = self.box_candidates(box1=instances.bboxes.T, + box2=new_instances.bboxes.T, + area_thr=0.01 if len(segments) else 0.10) + labels['instances'] = new_instances[i] + labels['cls'] = cls[i] + labels['img'] = img + labels['resized_shape'] = img.shape[:2] + return labels + + def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) + # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + + +class RandomBlur: + def __init__(self, p) -> None: + self.p = p + + def __call__(self, labels): + img = labels['img'] + if random.random() < self.p: + k = 2*random.randint(1, 10) + 1 + img_blur = cv2.GaussianBlur(img, (5, 5), 0) + labels['img'] = img_blur + return labels + + +class RandomHSV: + + def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: + self.hgain = hgain + self.sgain = sgain + self.vgain = vgain + + def __call__(self, labels): + """Applies random horizontal or vertical flip to an image with a given probability.""" + img = labels['img'] + if self.hgain or self.sgain or self.vgain: + r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + return labels + + +class RandomFlip: + + def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None: + assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' + assert 0 <= p <= 1.0 + + self.p = p + self.direction = direction + self.flip_idx = flip_idx + + def __call__(self, labels): + """Resize image and padding for detection, instance segmentation, pose.""" + img = labels['img'] + instances = labels.pop('instances') + instances.convert_bbox(format='xywh') + h, w = img.shape[:2] + h = 1 if instances.normalized else h + w = 1 if instances.normalized else w + + # Flip up-down + if self.direction == 'vertical' and random.random() < self.p: + img = np.flipud(img) + instances.flipud(h) + if self.direction == 'horizontal' and random.random() < self.p: + img = np.fliplr(img) + instances.fliplr(w) + # For keypoints + if self.flip_idx is not None and instances.keypoints is not None: + instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :]) + labels['img'] = np.ascontiguousarray(img) + labels['instances'] = instances + return labels + + +class RandomPoints: + + def __init__(self, num_region=10, num_point=20) -> None: + self.num_region = num_region ## number of regions + self.num_point = num_point ## number of points + + def __call__(self, labels): + img = labels['img'] + h, w, c = img.shape + + for i in range(self.num_region): + # region size [50*50, 200*200] + xx, ww = np.random.randint(0, w-200), np.random.randint(50, 200) + yy, hh = np.random.randint(0, h-200), np.random.randint(50, 200) + # 20 points in each region + xs = np.random.randint(xx, xx+ww, 20) + ys = np.random.randint(yy, yy+hh, 20) + # draw ovals to region + for j in range(self.num_point): + x, y = xs[j], ys[j] + ax, ay = np.random.randint(5, 8), np.random.randint(3, 10) + angle = np.random.randint(0,180) + cc = np.random.randint(225, 255) + cv2.ellipse(img, (x, y), (ax, ay), + angle, 0, 360, + color = (cc, cc, cc), + thickness = -1) + return labels + + +class LetterBox: + """Resize image and padding for detection, instance segmentation, pose.""" + + def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32): + """Initialize LetterBox object with specific parameters.""" + self.new_shape = new_shape + self.auto = auto + self.scaleFill = scaleFill + self.scaleup = scaleup + self.stride = stride + + def __call__(self, labels=None, image=None): + """Return updated labels and image with added border.""" + if labels is None: + labels = {} + img = labels.get('img') if image is None else image + shape = img.shape[:2] # current shape [height, width] + new_shape = labels.pop('rect_shape', self.new_shape) + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not self.scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if self.auto: # minimum rectangle + dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding + elif self.scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + if labels.get('ratio_pad'): + labels['ratio_pad'] = (labels['ratio_pad'], (dw, dh)) # for evaluation + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, + value=(114, 114, 114)) # add border + if len(labels): + labels = self._update_labels(labels, ratio, dw, dh) + labels['img'] = img + labels['resized_shape'] = new_shape + return labels + else: + return img + + def _update_labels(self, labels, ratio, padw, padh): + """Update labels.""" + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(*labels['img'].shape[:2][::-1]) + labels['instances'].scale(*ratio) + labels['instances'].add_padding(padw, padh) + return labels + + +class CopyPaste: + + def __init__(self, p=0.5) -> None: + self.p = p + + def __call__(self, labels): + """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy).""" + im = labels['img'] + cls = labels['cls'] + h, w = im.shape[:2] + instances = labels.pop('instances') + instances.convert_bbox(format='xyxy') + instances.denormalize(w, h) + if self.p and len(instances.segments): + n = len(instances) + _, w, _ = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # Calculate ioa first then select indexes randomly + ins_flip = deepcopy(instances) + ins_flip.fliplr(w) + + ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes) # intersection over area, (N, M) + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(self.p * n)): + cls = np.concatenate((cls, cls[[j]]), axis=0) + instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0) + cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + labels['img'] = im + labels['cls'] = cls + labels['instances'] = instances + return labels + + +class Albumentations: + # YOLOv8 Albumentations class (optional, only used if package is installed) + def __init__(self, p=1.0): + """Initialize the transform object for YOLO bbox formatted params.""" + self.p = p + self.transform = None + prefix = colorstr('albumentations: ') + try: + import albumentations as A + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + + T = [ + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0)] # transforms + self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) + + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + def __call__(self, labels): + """Generates object detections and returns a dictionary with detection results.""" + im = labels['img'] + cls = labels['cls'] + if len(cls): + labels['instances'].convert_bbox('xywh') + labels['instances'].normalize(*im.shape[:2][::-1]) + bboxes = labels['instances'].bboxes + # TODO: add supports of segments and keypoints + if self.transform and random.random() < self.p: + new = self.transform(image=im, bboxes=bboxes, class_labels=cls) # transformed + if len(new['class_labels']) > 0: # skip update if no bbox in new im + labels['img'] = new['image'] + labels['cls'] = np.array(new['class_labels']) + bboxes = np.array(new['bboxes']) + labels['instances'].update(bboxes=bboxes) + return labels + + +# TODO: technically this is not an augmentation, maybe we should put this to another files +class Format: + + def __init__(self, + bbox_format='xywh', + normalize=True, + return_mask=False, + return_keypoint=False, + mask_ratio=4, + mask_overlap=True, + batch_idx=True, + nc=3): + self.bbox_format = bbox_format + self.normalize = normalize + self.return_mask = return_mask # set False when training detection only + self.return_keypoint = return_keypoint + self.mask_ratio = mask_ratio + self.mask_overlap = mask_overlap + self.batch_idx = batch_idx # keep the batch indexes + self.nc=nc + def __call__(self, labels): + """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'.""" + img = labels.pop('img') + h, w = img.shape[:2] + cls = labels.pop('cls') + instances = labels.pop('instances') + instances.convert_bbox(format=self.bbox_format) + instances.denormalize(w, h) + nl = len(instances) + if self.return_mask: + if nl: + masks, instances, cls = self._format_segments(instances, cls, w, h) + masks = torch.from_numpy(masks) + else: + masks = torch.zeros(1 if self.mask_overlap else nl, self.nc, img.shape[0] // self.mask_ratio, + img.shape[1] // self.mask_ratio) + labels['masks'] = masks + if self.normalize: + instances.normalize(w, h) + labels['img'] = self._format_img(img) + labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl) + labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) + if self.return_keypoint: + labels['keypoints'] = torch.from_numpy(instances.keypoints) + # Then we can use collate_fn + if self.batch_idx: + labels['batch_idx'] = torch.zeros(nl) + return labels + + def _format_img(self, img): + """Format the image for YOLOv5 from Numpy array to PyTorch tensor.""" + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) + img = torch.from_numpy(img) + return img + + def _format_segments(self, instances, cls, w, h): + """convert polygon points to bitmap.""" + segments = instances.segments + if self.mask_overlap: + masks, sorted_idx = polygons2masks_multi((h, w), segments, cls, downsample_ratio=self.mask_ratio, nc=self.nc) + masks = masks[None] # (2, 640, 640) -> (1, 2, 640, 640) + instances = instances[sorted_idx] + cls = cls[sorted_idx] + else: + masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio) + + return masks, instances, cls + + +def v9_transforms(dataset, imgsz, hyp): + """Convert images to a size suitable for YOLOv8 training.""" + pre_transform = Compose([ + Mosaic(dataset, imgsz=imgsz, p=hyp['mosaic']), + CopyPaste(p=hyp['copy_paste']), + RandomPerspective( + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective'], + pre_transform=LetterBox(new_shape=(imgsz, imgsz)), + )]) + #flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation + #if dataset.use_keypoints: + # kpt_shape = dataset.data.get('kpt_shape', None) + # if flip_idx is None and hyp['fliplr'] > 0.0: + # hyp['fliplr'] = 0.0 + # LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'") + # elif flip_idx: + # if len(flip_idx) != kpt_shape[0]: + # raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}') + # elif flip_idx[0] != 0: + # raise ValueError(f'data.yaml flip_idx={flip_idx} must be zero-index (start from 0)') + + return Compose([ + pre_transform, + MixUp(dataset, pre_transform=pre_transform, p=hyp['mixup']), + Albumentations(p=1.0), + RandomHSV(hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']), + RandomFlip(direction='vertical', p=hyp['flipud']), + RandomFlip(direction='horizontal', p=hyp['fliplr']), + ]) # transforms + + +# Classification augmentations ----------------------------------------------------------------------------------------- +def classify_transforms(size=224, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD + # Transforms to apply if albumentations not installed + if not isinstance(size, int): + raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)') + if any(mean) or any(std): + return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(mean, std, inplace=True)]) + else: + return T.Compose([CenterCrop(size), ToTensor()]) + + +def hsv2colorjitter(h, s, v): + """Map HSV (hue, saturation, value) jitter into ColorJitter values (brightness, contrast, saturation, hue)""" + return v, v, s, h + + +def classify_albumentations( + augment=True, + size=224, + scale=(0.08, 1.0), + hflip=0.5, + vflip=0.0, + hsv_h=0.015, # image HSV-Hue augmentation (fraction) + hsv_s=0.7, # image HSV-Saturation augmentation (fraction) + hsv_v=0.4, # image HSV-Value augmentation (fraction) + mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN + std=(1.0, 1.0, 1.0), # IMAGENET_STD + auto_aug=False, +): + # YOLOv8 classification Albumentations (optional, only used if package is installed) + prefix = colorstr('albumentations: ') + try: + import albumentations as A + from albumentations.pytorch import ToTensorV2 + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + if augment: # Resize and crop + T = [A.RandomResizedCrop(height=size, width=size, scale=scale)] + if auto_aug: + # TODO: implement AugMix, AutoAug & RandAug in albumentations + LOGGER.info(f'{prefix}auto augmentations are currently not supported') + else: + if hflip > 0: + T += [A.HorizontalFlip(p=hflip)] + if vflip > 0: + T += [A.VerticalFlip(p=vflip)] + if any((hsv_h, hsv_s, hsv_v)): + T += [A.ColorJitter(*hsv2colorjitter(hsv_h, hsv_s, hsv_v))] # brightness, contrast, saturation, hue + else: # Use fixed crop for eval set (reproducibility) + T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] + T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + return A.Compose(T) + + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + +class ClassifyLetterBox: + # YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, size=(640, 640), auto=False, stride=32): + """Resizes image and crops it to center with max dimensions 'h' and 'w'.""" + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + self.auto = auto # pass max size integer, automatically solve for short side using stride + self.stride = stride # used with auto + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + r = min(self.h / imh, self.w / imw) # ratio of new/old + h, w = round(imh * r), round(imw * r) # resized image + hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w + top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) + im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + return im_out + + +class CenterCrop: + # YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) + def __init__(self, size=640): + """Converts an image from numpy array to PyTorch tensor.""" + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + m = min(imh, imw) # min dimension + top, left = (imh - m) // 2, (imw - m) // 2 + return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) + + +class ToTensor: + # YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, half=False): + """Initialize YOLOv8 ToTensor object with optional half-precision support.""" + super().__init__() + self.half = half + + def __call__(self, im): # im = np.array HWC in BGR order + im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous + im = torch.from_numpy(im) # to torch + im = im.half() if self.half else im.float() # uint8 to fp16/32 + im /= 255.0 # 0-255 to 0.0-1.0 + return im diff --git a/utils/.ipynb_checkpoints/augmentations-checkpoint.py b/utils/.ipynb_checkpoints/augmentations-checkpoint.py new file mode 100644 index 000000000..ad4c07fb6 --- /dev/null +++ b/utils/.ipynb_checkpoints/augmentations-checkpoint.py @@ -0,0 +1,395 @@ +import math +import random + +import cv2 +import numpy as np +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as TF + +from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy +from utils.metrics import bbox_ioa + +IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean +IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation + + +class Albumentations: + # YOLOv5 Albumentations class (optional, only used if package is installed) + def __init__(self, size=640): + self.transform = None + prefix = colorstr('albumentations: ') + try: + import albumentations as A + check_version(A.__version__, '1.0.3', hard=True) # version requirement + + T = [ + A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0), + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0)] # transforms + self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) + + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + def __call__(self, im, labels, p=1.0): + if self.transform and random.random() < p: + new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed + im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) + return im, labels + + +def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): + # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std + return TF.normalize(x, mean, std, inplace=inplace) + + +def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): + # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean + for i in range(3): + x[:, i] = x[:, i] * std[i] + mean[i] + return x + + +def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): + # HSV color-space augmentation + if hgain or sgain or vgain: + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) + dtype = im.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed + + +def hist_equalize(im, clahe=True, bgr=False): + # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 + yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) + if clahe: + c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + yuv[:, :, 0] = c.apply(yuv[:, :, 0]) + else: + yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram + return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB + + +def replicate(im, labels): + # Replicate labels + h, w = im.shape[:2] + boxes = labels[:, 1:].astype(int) + x1, y1, x2, y2 = boxes.T + s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) + for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices + x1b, y1b, x2b, y2b = boxes[i] + bh, bw = y2b - y1b, x2b - x1b + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y + x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] + im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + + return im, labels + + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + + +def random_perspective(im, + targets=(), + segments=(), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, + border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + if n: + use_segments = any(x.any() for x in segments) + new = np.zeros((n, 4)) + if use_segments: # warp segments + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + + else: # warp boxes + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # clip + new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) + new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + targets = targets[i] + targets[:, 1:5] = new[i] + + return im, targets + + +def copy_paste(im, labels, segments, p=0.5): + # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) + n = len(segments) + if p and n: + h, w, c = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # calculate ioa first then select indexes randomly + boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) + ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(p * n)): + l, box, s = labels[j], boxes[j], segments[j] + labels = np.concatenate((labels, [[l[0], *box]]), 0) + segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + return im, labels, segments + + +def cutout(im, labels, p=0.5): + # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 + if random.random() < p: + h, w = im.shape[:2] + scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction + for s in scales: + mask_h = random.randint(1, int(h * s)) # create random masks + mask_w = random.randint(1, int(w * s)) + + # box + xmin = max(0, random.randint(0, w) - mask_w // 2) + ymin = max(0, random.randint(0, h) - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + + # return unobscured labels + if len(labels) and s > 0.03: + box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) + ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area + labels = labels[ioa < 0.60] # remove >60% obscured labels + + return labels + + +def mixup(im, labels, im2, labels2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + return im, labels + + +def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) + # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + + +def classify_albumentations( + augment=True, + size=224, + scale=(0.08, 1.0), + ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33 + hflip=0.5, + vflip=0.0, + jitter=0.4, + mean=IMAGENET_MEAN, + std=IMAGENET_STD, + auto_aug=False): + # YOLOv5 classification Albumentations (optional, only used if package is installed) + prefix = colorstr('albumentations: ') + try: + import albumentations as A + from albumentations.pytorch import ToTensorV2 + check_version(A.__version__, '1.0.3', hard=True) # version requirement + if augment: # Resize and crop + T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)] + if auto_aug: + # TODO: implement AugMix, AutoAug & RandAug in albumentation + LOGGER.info(f'{prefix}auto augmentations are currently not supported') + else: + if hflip > 0: + T += [A.HorizontalFlip(p=hflip)] + if vflip > 0: + T += [A.VerticalFlip(p=vflip)] + if jitter > 0: + color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue + T += [A.ColorJitter(*color_jitter, 0)] + else: # Use fixed crop for eval set (reproducibility) + T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] + T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + return A.Compose(T) + + except ImportError: # package not installed, skip + LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)') + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + +def classify_transforms(size=224): + # Transforms to apply if albumentations not installed + assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' + # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + +class LetterBox: + # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, size=(640, 640), auto=False, stride=32): + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + self.auto = auto # pass max size integer, automatically solve for short side using stride + self.stride = stride # used with auto + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + r = min(self.h / imh, self.w / imw) # ratio of new/old + h, w = round(imh * r), round(imw * r) # resized image + hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w + top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) + im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + return im_out + + +class CenterCrop: + # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) + def __init__(self, size=640): + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + m = min(imh, imw) # min dimension + top, left = (imh - m) // 2, (imw - m) // 2 + return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) + + +class ToTensor: + # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, half=False): + super().__init__() + self.half = half + + def __call__(self, im): # im = np.array HWC in BGR order + im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous + im = torch.from_numpy(im) # to torch + im = im.half() if self.half else im.float() # uint8 to fp16/32 + im /= 255.0 # 0-255 to 0.0-1.0 + return im diff --git a/utils/.ipynb_checkpoints/dataloaders-checkpoint.py b/utils/.ipynb_checkpoints/dataloaders-checkpoint.py new file mode 100644 index 000000000..02c5978d0 --- /dev/null +++ b/utils/.ipynb_checkpoints/dataloaders-checkpoint.py @@ -0,0 +1,1278 @@ +import contextlib +import glob +import hashlib +import json +import math +import os +import random +import shutil +import time +from itertools import repeat +from multiprocessing.pool import Pool, ThreadPool +from pathlib import Path +from threading import Thread +from urllib.parse import urlparse + +import numpy as np +import psutil +import torch +import torch.nn.functional as F +import torchvision +import yaml +from PIL import ExifTags, Image, ImageOps +from torch.utils.data import DataLoader, Dataset, dataloader, distributed +from tqdm import tqdm + +from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, + letterbox, mixup, random_perspective) +from utils.augment import Compose, Format, Instances, LetterBox, v9_transforms +from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements, + check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, + xywh2xyxy, xywhn2xyxy, xyxy2xywhn) +from utils.torch_utils import torch_distributed_zero_first + +# Parameters +HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' +IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes +VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders + +# Get orientation exif tag +for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == 'Orientation': + break + + +def get_hash(paths): + # Returns a single hash value of a list of paths (files or dirs) + size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes + h = hashlib.md5(str(size).encode()) # hash sizes + h.update(''.join(paths).encode()) # hash paths + return h.hexdigest() # return hash + + +def exif_size(img): + # Returns exif-corrected PIL size + s = img.size # (width, height) + with contextlib.suppress(Exception): + rotation = dict(img._getexif().items())[orientation] + if rotation in [6, 8]: # rotation 270 or 90 + s = (s[1], s[0]) + return s + + +def exif_transpose(image): + """ + Transpose a PIL image accordingly if it has an EXIF Orientation tag. + Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() + + :param image: The image to transpose. + :return: An image. + """ + exif = image.getexif() + orientation = exif.get(0x0112, 1) # default 1 + if orientation > 1: + method = { + 2: Image.FLIP_LEFT_RIGHT, + 3: Image.ROTATE_180, + 4: Image.FLIP_TOP_BOTTOM, + 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, + 7: Image.TRANSVERSE, + 8: Image.ROTATE_90}.get(orientation) + if method is not None: + image = image.transpose(method) + del exif[0x0112] + image.info["exif"] = exif.tobytes() + return image + + +def seed_worker(worker_id): + # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader + worker_seed = torch.initial_seed() % 2 ** 32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def create_dataloader(path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + close_mosaic=False, + quad=False, + min_items=0, + prefix='', + shuffle=False): + if rect and shuffle: + LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadImagesAndLabels( + path, + imgsz, + batch_size, + augment=augment, # augmentation + hyp=hyp, # hyperparameters + rect=rect, # rectangular batches + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + min_items=min_items, + prefix=prefix) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + generator=generator), dataset + + +class InfiniteDataLoader(dataloader.DataLoader): + """ Dataloader that reuses workers + + Uses same syntax as vanilla DataLoader + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for _ in range(len(self)): + yield next(self.iterator) + def reset(self): + """Reset iterator. + This is useful when we want to modify settings of dataset while training. + """ + self.iterator = self._get_iterator() + + +class _RepeatSampler: + """ Sampler that repeats forever + + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) + + +class LoadScreenshots: + # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` + def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): + # source = [screen_number left top width height] (pixels) + check_requirements('mss') + import mss + + source, *params = source.split() + self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 + if len(params) == 1: + self.screen = int(params[0]) + elif len(params) == 4: + left, top, width, height = (int(x) for x in params) + elif len(params) == 5: + self.screen, left, top, width, height = (int(x) for x in params) + self.img_size = img_size + self.stride = stride + self.transforms = transforms + self.auto = auto + self.mode = 'stream' + self.frame = 0 + self.sct = mss.mss() + + # Parse monitor shape + monitor = self.sct.monitors[self.screen] + self.top = monitor["top"] if top is None else (monitor["top"] + top) + self.left = monitor["left"] if left is None else (monitor["left"] + left) + self.width = width or monitor["width"] + self.height = height or monitor["height"] + self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} + + def __iter__(self): + return self + + def __next__(self): + # mss screen capture: get raw pixels from the screen as np array + im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR + s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " + + if self.transforms: + im = self.transforms(im0) # transforms + else: + im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize + im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + im = np.ascontiguousarray(im) # contiguous + self.frame += 1 + return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s + + +class LoadImages: + # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` + def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): + files = [] + for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: + p = str(Path(p).resolve()) + if '*' in p: + files.extend(sorted(glob.glob(p, recursive=True))) # glob + elif os.path.isdir(p): + files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir + elif os.path.isfile(p): + files.append(p) # files + else: + raise FileNotFoundError(f'{p} does not exist') + + images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] + videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.stride = stride + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'image' + self.auto = auto + self.transforms = transforms # optional + self.vid_stride = vid_stride # video frame-rate stride + if any(videos): + self._new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, f'No images or videos found in {p}. ' \ + f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + for _ in range(self.vid_stride): + self.cap.grab() + ret_val, im0 = self.cap.retrieve() + while not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + path = self.files[self.count] + self._new_video(path) + ret_val, im0 = self.cap.read() + + self.frame += 1 + # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False + s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' + + else: + # Read image + self.count += 1 + im0 = cv2.imread(path) # BGR + assert im0 is not None, f'Image Not Found {path}' + s = f'image {self.count}/{self.nf} {path}: ' + + if self.transforms: + im = self.transforms(im0) # transforms + else: + im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize + im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + im = np.ascontiguousarray(im) # contiguous + + return path, im, im0, self.cap, s + + def _new_video(self, path): + # Create a new video capture object + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) + self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees + # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 + + def _cv2_rotate(self, im): + # Rotate a cv2 video manually + if self.orientation == 0: + return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) + elif self.orientation == 180: + return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif self.orientation == 90: + return cv2.rotate(im, cv2.ROTATE_180) + return im + + def __len__(self): + return self.nf # number of files + + +class LoadStreams: + # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` + def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): + torch.backends.cudnn.benchmark = True # faster for fixed-size inference + self.mode = 'stream' + self.img_size = img_size + self.stride = stride + self.vid_stride = vid_stride # video frame-rate stride + sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] + n = len(sources) + self.sources = [clean_str(x) for x in sources] # clean source names for later + self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n + for i, s in enumerate(sources): # index, source + # Start thread to read frames from video stream + st = f'{i + 1}/{n}: {s}... ' + if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video + # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' + check_requirements(('pafy', 'youtube_dl==2020.12.2')) + import pafy + s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL + s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam + if s == 0: + assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' + assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' + cap = cv2.VideoCapture(s) + assert cap.isOpened(), f'{st}Failed to open {s}' + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan + self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback + self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback + + _, self.imgs[i] = cap.read() # guarantee first frame + self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) + LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") + self.threads[i].start() + LOGGER.info('') # newline + + # check for common shapes + s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) + self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal + self.auto = auto and self.rect + self.transforms = transforms # optional + if not self.rect: + LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') + + def update(self, i, cap, stream): + # Read stream `i` frames in daemon thread + n, f = 0, self.frames[i] # frame number, frame array + while cap.isOpened() and n < f: + n += 1 + cap.grab() # .read() = .grab() followed by .retrieve() + if n % self.vid_stride == 0: + success, im = cap.retrieve() + if success: + self.imgs[i] = im + else: + LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') + self.imgs[i] = np.zeros_like(self.imgs[i]) + cap.open(stream) # re-open stream if signal was lost + time.sleep(0.0) # wait time + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit + cv2.destroyAllWindows() + raise StopIteration + + im0 = self.imgs.copy() + if self.transforms: + im = np.stack([self.transforms(x) for x in im0]) # transforms + else: + im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize + im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW + im = np.ascontiguousarray(im) # contiguous + + return self.sources, im, im0, None, '' + + def __len__(self): + return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years + + +def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings + return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] + + +class LoadImagesAndLabels(Dataset): + # YOLOv5 train_loader/val_loader, loads images and labels for training and validation + cache_version = 0.6 # dataset labels *.cache version + rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] + + def __init__(self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0.0, + min_items=0, + prefix=''): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + self.path = path + self.albumentations = Albumentations(size=img_size) if augment else None + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + # f = list(p.rglob('*.*')) # pathlib + elif p.is_file(): # file + with open(p) as t: + t = t.read().strip().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path + # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib) + else: + raise FileNotFoundError(f'{prefix}{p} does not exist') + self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) + # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib + assert self.im_files, f'{prefix}No images found' + except Exception as e: + raise Exception(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e + + # Check cache + self.label_files = img2label_paths(self.im_files) # labels + cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') + try: + cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict + assert cache['version'] == self.cache_version # matches current version + assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash + except Exception: + cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops + + # Display cache + nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total + if exists and LOCAL_RANK in {-1, 0}: + d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" + tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results + if cache['msgs']: + LOGGER.info('\n'.join(cache['msgs'])) # display warnings + assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' + + # Read cache + [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items + labels, shapes, self.segments = zip(*cache.values()) + nl = len(np.concatenate(labels, 0)) # number of labels + assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' + self.labels = list(labels) + self.shapes = np.array(shapes) + self.im_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + # Filter images + if min_items: + include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int) + LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset') + self.im_files = [self.im_files[i] for i in include] + self.label_files = [self.label_files[i] for i in include] + self.labels = [self.labels[i] for i in include] + self.segments = [self.segments[i] for i in include] + self.shapes = self.shapes[include] # wh + + # Create indices + n = len(self.shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + self.indices = range(n) + + # Update labels + include_class = [] # filter labels to include only these classes (optional) + include_class_array = np.array(include_class).reshape(1, -1) + for i, (label, segment) in enumerate(zip(self.labels, self.segments)): + if include_class: + j = (label[:, 0:1] == include_class_array).any(1) + self.labels[i] = label[j] + if segment: + self.segments[i] = segment[j] + if single_cls: # single-class training, merge all classes into 0 + self.labels[i][:, 0] = 0 + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.im_files = [self.im_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.segments = [self.segments[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride + + # Cache images into RAM/disk for faster training + if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): + cache_images = False + self.ims = [None] * n + self.im_hw0, self.im_hw = [None] * n, [None] * n + self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] + + if cache_images: + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image + results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) + pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) + for i, x in pbar: + if cache_images == 'disk': + b += self.npy_files[i].stat().st_size + else: # 'ram' + self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) + b += self.ims[i].nbytes + pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' + pbar.close() + + self.transforms = self.build_transforms(hyp=hyp) + self.ni = len(self.labels) # number of images + self.buffer = [] + self.max_buffer_length = min((n, batch_size * 8, 1000)) if self.augment else 0 + + def check_cache_ram(self, safety_margin=0.1, prefix=''): + # Check image caching requirements vs available memory + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + n = min(self.n, 30) # extrapolate from 30 random images + for _ in range(n): + im = cv2.imread(random.choice(self.im_files)) # sample image + ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio + b += im.nbytes * ratio ** 2 + mem_required = b * self.n / n # GB required to cache dataset into RAM + mem = psutil.virtual_memory() + cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question + if not cache: + LOGGER.info(f"{prefix}{mem_required / gb:.1f}GB RAM required, " + f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, " + f"{'caching images ✅' if cache else 'not caching images ⚠️'}") + return cache + + def cache_labels(self, path=Path('./labels.cache'), prefix=''): + # Cache dataset labels, check images and read shapes + x = {} # dict + nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages + desc = f"{prefix}Scanning {path.parent / path.stem}..." + with Pool(NUM_THREADS) as pool: + pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))), + desc=desc, + total=len(self.im_files), + bar_format=TQDM_BAR_FORMAT) + for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: + nm += nm_f + nf += nf_f + ne += ne_f + nc += nc_f + if im_file: + x[im_file] = [lb, shape, segments] + if msg: + msgs.append(msg) + pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" + + pbar.close() + if msgs: + LOGGER.info('\n'.join(msgs)) + if nf == 0: + LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') + x['hash'] = get_hash(self.label_files + self.im_files) + x['results'] = nf, nm, ne, nc, len(self.im_files) + x['msgs'] = msgs # warnings + x['version'] = self.cache_version # cache version + try: + np.save(path, x) # save cache for next time + path.with_suffix('.cache.npy').rename(path) # remove .npy suffix + LOGGER.info(f'{prefix}New cache created: {path}') + except Exception as e: + LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable + return x + + def __len__(self): + return len(self.im_files) + + def get_image_and_label(self, index): + img, (h0, w0), (h, w) = self.load_image(index) + bboxes = self.labels[index].copy() + segments = self.segments[index].copy() + return {'img': img, + 'im_file': self.im_files[index], + 'cls': bboxes[:, 0], + 'instances': Instances(bboxes[:,1:], segments, bbox_format='xywh', normalized=True), + 'ori_shape': (h0,w0), + 'resized_shape': (h, w)} + + def build_transforms(self, hyp=None): + """Builds and appends transforms to the list.""" + if self.augment: + hyp['mosaic'] = hyp['mosaic'] if self.augment and not self.rect else 0.0 + hyp['mixup'] = hyp['mixup'] if self.augment and not self.rect else 0.0 + transforms = v9_transforms(self, self.img_size, hyp) + else: + transforms = Compose([LetterBox(new_shape=(self.img_size, self.img_size), scaleup=False)]) + + #shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + transforms.append( + Format(bbox_format='xywh', + normalize=True, + batch_idx=True)) + return transforms + + + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + hyp = self.hyp + data = self.get_image_and_label(index) + data['ratio_pad'] = (data['resized_shape'][0] / data['ori_shape'][0], + data['resized_shape'][1] / data['ori_shape'][1]) # for evaluation + if self.rect: + data['rect_shape'] = self.batch_shapes[self.batch[index]] + + data = self.transforms(data) + labels_out = torch.cat([data['batch_idx'].view(-1,1), + data['cls'].view(-1,1), + data['bboxes']], dim=1) + if data.get('ratio_pad') is None: + return data['img'], labels_out, data['im_file'], (data['ori_shape'], None) + return data['img'], labels_out, data['im_file'], (data['ori_shape'], data['ratio_pad']) + + + #def __getitem__(self, index): + # t0 = time.time() + # index = self.indices[index] # linear, shuffled, or image_weights + # + # hyp = self.hyp + # mosaic = self.mosaic and random.random() < hyp['mosaic'] + # if mosaic: + # # Load mosaic + # img, labels = self.load_mosaic(index) + # shapes = None +# + # # MixUp augmentation + # if random.random() < hyp['mixup']: + # img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) +# + # else: + # # Load image + # img, (h0, w0), (h, w) = self.load_image(index) + # + # # Letterbox + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + # img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + # shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling +# + # labels = self.labels[index].copy() + # if labels.size: # normalized xywh to pixel xyxy format + # labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) +# + # if self.augment: + # img, labels = random_perspective(img, + # labels, + # degrees=hyp['degrees'], + # translate=hyp['translate'], + # scale=hyp['scale'], + # shear=hyp['shear'], + # perspective=hyp['perspective']) + # + # #print(img.shape, labels) + # nl = len(labels) # number of labels + # if nl: + # labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) +# + # if self.augment: + # # Albumentations + # img, labels = self.albumentations(img, labels) + # nl = len(labels) # update after albumentations +# + # # HSV color-space + # augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) +# + # # Flip up-down + # if random.random() < hyp['flipud']: + # img = np.flipud(img) + # if nl: + # labels[:, 2] = 1 - labels[:, 2] +# + # # Flip left-right + # if random.random() < hyp['fliplr']: + # img = np.fliplr(img) + # if nl: + # labels[:, 1] = 1 - labels[:, 1] +# + # # Cutouts + # # labels = cutout(img, labels, p=0.5) + # # nl = len(labels) # update after cutout + # + # labels_out = torch.zeros((nl, 6)) + # if nl: + # labels_out[:, 1:] = torch.from_numpy(labels) +# + # # Convert + # img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + # img = np.ascontiguousarray(img) + # # print("loaded: ", (time.time()-t0)*1000) # about 800-900 + # #print(torch.from_numpy(img), labels_out) + # return torch.from_numpy(img), labels_out, self.im_files[index], shapes + + def load_image(self, i): + # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) + im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], + if im is None: # not cached in RAM + if fn.exists(): # load npy + im = np.load(fn) + else: # read image + im = cv2.imread(f) # BGR + assert im is not None, f'Image Not Found {f}' + h0, w0 = im.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # ratio + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA + im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp) + if self.augment: + self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + self.buffer.append(i) + if len(self.buffer) >= self.max_buffer_length: + j = self.buffer.pop(0) + self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None + return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized + + def cache_images_to_disk(self, i): + # Saves an image as an *.npy file for faster loading + f = self.npy_files[i] + if not f.exists(): + np.save(f.as_posix(), cv2.imread(self.im_files[i])) + + def load_mosaic(self, index): + # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4 = [], [] + s = self.img_size + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + random.shuffle(indices) + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + # Labels + labels, segments = self.labels[index].copy(), self.segments[index].copy() + #print(labels) + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + labels4.append(labels) + segments4.extend(segments) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) + img4, labels4 = random_perspective(img4, + labels4, + segments4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img4, labels4 + + def load_mosaic9(self, index): + # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic + labels9, segments9 = [], [] + s = self.img_size + indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices + random.shuffle(indices) + hp, wp = -1, -1 # height, width previous + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padx, pady = c[:2] + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + + # Labels + labels, segments = self.labels[index].copy(), self.segments[index].copy() + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padx, pady) for x in segments] + labels9.append(labels) + segments9.extend(segments) + + # Image + img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous + + # Offset + yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y + img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] + + # Concat/clip labels + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + c = np.array([xc, yc]) # centers + segments9 = [x - c for x in segments9] + + for x in (labels9[:, 1:], *segments9): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img9, labels9 = replicate(img9, labels9) # replicate + + # Augment + img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste']) + img9, labels9 = random_perspective(img9, + labels9, + segments9, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img9, labels9 + + @staticmethod + def collate_fn(batch): + im, label, path, shapes = zip(*batch) # transposed + for i, lb in enumerate(label): + lb[:, 0] = i # add target image index for build_targets() + return torch.stack(im, 0), torch.cat(label, 0), path, shapes + + @staticmethod + def collate_fn4(batch): + im, label, path, shapes = zip(*batch) # transposed + n = len(shapes) // 4 + im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] + + ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) + wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) + s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale + for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW + i *= 4 + if random.random() < 0.5: + im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', + align_corners=False)[0].type(im[i].type()) + lb = label[i] + else: + im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2) + lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s + im4.append(im1) + label4.append(lb) + + for i, lb in enumerate(label4): + lb[:, 0] = i # add target image index for build_targets() + + return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 + + +# Ancillary functions -------------------------------------------------------------------------------------------------- +def flatten_recursive(path=DATASETS_DIR / 'coco128'): + # Flatten a recursive directory by bringing all files to top level + new_path = Path(f'{str(path)}_flat') + if os.path.exists(new_path): + shutil.rmtree(new_path) # delete output folder + os.makedirs(new_path) # make new output folder + for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)): + shutil.copyfile(file, new_path / Path(file).name) + + +def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes() + # Convert detection dataset into classification dataset, with one directory per class + path = Path(path) # images dir + shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing + files = list(path.rglob('*.*')) + n = len(files) # number of files + for im_file in tqdm(files, total=n): + if im_file.suffix[1:] in IMG_FORMATS: + # image + im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB + h, w = im.shape[:2] + + # labels + lb_file = Path(img2label_paths([str(im_file)])[0]) + if Path(lb_file).exists(): + with open(lb_file) as f: + lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels + + for j, x in enumerate(lb): + c = int(x[0]) # class + f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename + if not f.parent.is_dir(): + f.parent.mkdir(parents=True) + + b = x[1:] * [w, h, w, h] # box + # b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.2 + 3 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' + + +def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): + """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files + Usage: from utils.dataloaders import *; autosplit() + Arguments + path: Path to images directory + weights: Train, val, test weights (list, tuple) + annotated_only: Only use images with an annotated txt file + """ + path = Path(path) # images dir + files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only + n = len(files) # number of files + random.seed(0) # for reproducibility + indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + + txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files + for x in txt: + if (path.parent / x).exists(): + (path.parent / x).unlink() # remove existing + + print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) + for i, img in tqdm(zip(indices, files), total=n): + if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label + with open(path.parent / txt[i], 'a') as f: + f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file + + +def verify_image_label(args): + # Verify one image-label pair + im_file, lb_file, prefix = args + nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments + try: + # verify images + im = Image.open(im_file) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' + assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' + if im.format.lower() in ('jpg', 'jpeg'): + with open(im_file, 'rb') as f: + f.seek(-2, 2) + if f.read() != b'\xff\xd9': # corrupt JPEG + ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) + msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' + + # verify labels + if os.path.isfile(lb_file): + nf = 1 # label found + with open(lb_file) as f: + lb = [x.split() for x in f.read().strip().splitlines() if len(x)] + if any(len(x) > 6 for x in lb): # is segment + classes = np.array([x[0] for x in lb], dtype=np.float32) + segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) + lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) + lb = np.array(lb, dtype=np.float32) + nl = len(lb) + if nl: + assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' + assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' + assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' + _, i = np.unique(lb, axis=0, return_index=True) + if len(i) < nl: # duplicate row check + lb = lb[i] # remove duplicates + if segments: + segments = [segments[x] for x in i] + msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' + else: + ne = 1 # label empty + lb = np.zeros((0, 5), dtype=np.float32) + else: + nm = 1 # label missing + lb = np.zeros((0, 5), dtype=np.float32) + return im_file, lb, shape, segments, nm, nf, ne, nc, msg + except Exception as e: + nc = 1 + msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' + return [None, None, None, None, nm, nf, ne, nc, msg] + + +class HUBDatasetStats(): + """ Class for generating HUB dataset JSON and `-hub` dataset directory + + Arguments + path: Path to data.yaml or data.zip (with data.yaml inside data.zip) + autodownload: Attempt to download dataset if not found locally + + Usage + from utils.dataloaders import HUBDatasetStats + stats = HUBDatasetStats('coco128.yaml', autodownload=True) # usage 1 + stats = HUBDatasetStats('path/to/coco128.zip') # usage 2 + stats.get_json(save=False) + stats.process_images() + """ + + def __init__(self, path='coco128.yaml', autodownload=False): + # Initialize class + zipped, data_dir, yaml_path = self._unzip(Path(path)) + try: + with open(check_yaml(yaml_path), errors='ignore') as f: + data = yaml.safe_load(f) # data dict + if zipped: + data['path'] = data_dir + except Exception as e: + raise Exception("error/HUB/dataset_stats/yaml_load") from e + + check_dataset(data, autodownload) # download dataset if missing + self.hub_dir = Path(data['path'] + '-hub') + self.im_dir = self.hub_dir / 'images' + self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images + self.stats = {'nc': data['nc'], 'names': list(data['names'].values())} # statistics dictionary + self.data = data + + @staticmethod + def _find_yaml(dir): + # Return data.yaml file + files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive + assert files, f'No *.yaml file found in {dir}' + if len(files) > 1: + files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name + assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' + assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' + return files[0] + + def _unzip(self, path): + # Unzip data.zip + if not str(path).endswith('.zip'): # path is data.yaml + return False, None, path + assert Path(path).is_file(), f'Error unzipping {path}, file not found' + unzip_file(path, path=path.parent) + dir = path.with_suffix('') # dataset directory == zip name + assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' + return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path + + def _hub_ops(self, f, max_dim=1920): + # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing + f_new = self.im_dir / Path(f).name # dataset-hub image filename + try: # use PIL + im = Image.open(f) + r = max_dim / max(im.height, im.width) # ratio + if r < 1.0: # image too large + im = im.resize((int(im.width * r), int(im.height * r))) + im.save(f_new, 'JPEG', quality=50, optimize=True) # save + except Exception as e: # use OpenCV + LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') + im = cv2.imread(f) + im_height, im_width = im.shape[:2] + r = max_dim / max(im_height, im_width) # ratio + if r < 1.0: # image too large + im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) + cv2.imwrite(str(f_new), im) + + def get_json(self, save=False, verbose=False): + # Return dataset JSON for Ultralytics HUB + def _round(labels): + # Update labels to integer class and 6 decimal place floats + return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] + + for split in 'train', 'val', 'test': + if self.data.get(split) is None: + self.stats[split] = None # i.e. no test set + continue + dataset = LoadImagesAndLabels(self.data[split]) # load dataset + x = np.array([ + np.bincount(label[:, 0].astype(int), minlength=self.data['nc']) + for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')]) # shape(128x80) + self.stats[split] = { + 'instance_stats': { + 'total': int(x.sum()), + 'per_class': x.sum(0).tolist()}, + 'image_stats': { + 'total': dataset.n, + 'unlabelled': int(np.all(x == 0, 1).sum()), + 'per_class': (x > 0).sum(0).tolist()}, + 'labels': [{ + str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]} + + # Save, print and return + if save: + stats_path = self.hub_dir / 'stats.json' + print(f'Saving {stats_path.resolve()}...') + with open(stats_path, 'w') as f: + json.dump(self.stats, f) # save stats.json + if verbose: + print(json.dumps(self.stats, indent=2, sort_keys=False)) + return self.stats + + def process_images(self): + # Compress images for Ultralytics HUB + for split in 'train', 'val', 'test': + if self.data.get(split) is None: + continue + dataset = LoadImagesAndLabels(self.data[split]) # load dataset + desc = f'{split} images' + for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc): + pass + print(f'Done. All images saved to {self.im_dir}') + return self.im_dir + + +# Classification dataloaders ------------------------------------------------------------------------------------------- +class ClassificationDataset(torchvision.datasets.ImageFolder): + """ + YOLOv5 Classification Dataset. + Arguments + root: Dataset path + transform: torchvision transforms, used by default + album_transform: Albumentations transforms, used if installed + """ + + def __init__(self, root, augment, imgsz, cache=False): + super().__init__(root=root) + self.torch_transforms = classify_transforms(imgsz) + self.album_transforms = classify_albumentations(augment, imgsz) if augment else None + self.cache_ram = cache is True or cache == 'ram' + self.cache_disk = cache == 'disk' + self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im + + def __getitem__(self, i): + f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image + if self.cache_ram and im is None: + im = self.samples[i][3] = cv2.imread(f) + elif self.cache_disk: + if not fn.exists(): # load npy + np.save(fn.as_posix(), cv2.imread(f)) + im = np.load(fn) + else: # read image + im = cv2.imread(f) # BGR + if self.album_transforms: + sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"] + else: + sample = self.torch_transforms(im) + return sample, j + + +def create_classification_dataloader(path, + imgsz=224, + batch_size=16, + augment=True, + cache=False, + rank=-1, + workers=8, + shuffle=True): + # Returns Dataloader object to be used with YOLOv5 Classifier + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return InfiniteDataLoader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + worker_init_fn=seed_worker, + generator=generator) # or DataLoader(persistent_workers=True) diff --git a/utils/.ipynb_checkpoints/general-checkpoint.py b/utils/.ipynb_checkpoints/general-checkpoint.py new file mode 100644 index 000000000..08d8b14ff --- /dev/null +++ b/utils/.ipynb_checkpoints/general-checkpoint.py @@ -0,0 +1,1193 @@ +import contextlib +import glob +import inspect +import logging +import logging.config +import math +import os +import platform +import random +import re +import signal +import sys +import time +import urllib +from copy import deepcopy +from datetime import datetime +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from subprocess import check_output +from tarfile import is_tarfile +from typing import Optional +from zipfile import ZipFile, is_zipfile + +import cv2 +import IPython +import numpy as np +import pandas as pd +import pkg_resources as pkg +import torch +import torchvision +import yaml + +from utils import TryExcept, emojis +from utils.downloads import gsutil_getsize +from utils.metrics import box_iou, fitness + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO root directory +RANK = int(os.getenv('RANK', -1)) + +# Settings +NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads +DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory +AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode +VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode +TQDM_BAR_FORMAT = '{l_bar}{bar:10}| {n_fmt}/{total_fmt} {elapsed}' # tqdm bar format +FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf + +torch.set_printoptions(linewidth=320, precision=5, profile='long') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +pd.options.display.max_columns = 10 +cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) +os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads +os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy) + + +def is_ascii(s=''): + # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) + s = str(s) # convert list, tuple, None, etc. to str + return len(s.encode().decode('ascii', 'ignore')) == len(s) + + +def is_chinese(s='人工智能'): + # Is string composed of any Chinese characters? + return bool(re.search('[\u4e00-\u9fff]', str(s))) + + +def is_colab(): + # Is environment a Google Colab instance? + return 'google.colab' in sys.modules + + +def is_notebook(): + # Is environment a Jupyter notebook? Verified on Colab, Jupyterlab, Kaggle, Paperspace + ipython_type = str(type(IPython.get_ipython())) + return 'colab' in ipython_type or 'zmqshell' in ipython_type + + +def is_kaggle(): + # Is environment a Kaggle Notebook? + return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' + + +def is_docker() -> bool: + """Check if the process runs inside a docker container.""" + if Path("/.dockerenv").exists(): + return True + try: # check if docker is in control groups + with open("/proc/self/cgroup") as file: + return any("docker" in line for line in file) + except OSError: + return False + + +def is_writeable(dir, test=False): + # Return True if directory has write permissions, test opening a file with write permissions if test=True + if not test: + return os.access(dir, os.W_OK) # possible issues on Windows + file = Path(dir) / 'tmp.txt' + try: + with open(file, 'w'): # open file with write permissions + pass + file.unlink() # remove file + return True + except OSError: + return False + + +LOGGING_NAME = "yolov5" + + +def set_logging(name=LOGGING_NAME, verbose=True): + # sets up logging for the given name + rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings + level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR + logging.config.dictConfig({ + "version": 1, + "disable_existing_loggers": False, + "formatters": { + name: { + "format": "%(message)s"}}, + "handlers": { + name: { + "class": "logging.StreamHandler", + "formatter": name, + "level": level,}}, + "loggers": { + name: { + "level": level, + "handlers": [name], + "propagate": False,}}}) + + +set_logging(LOGGING_NAME) # run before defining LOGGER +LOGGER = logging.getLogger(LOGGING_NAME) # define globally (used in train.py, val.py, detect.py, etc.) +if platform.system() == 'Windows': + for fn in LOGGER.info, LOGGER.warning: + setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x))) # emoji safe logging + + +def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'): + # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required. + env = os.getenv(env_var) + if env: + path = Path(env) # use environment variable + else: + cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs + path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir + path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable + path.mkdir(exist_ok=True) # make if required + return path + + +CONFIG_DIR = user_config_dir() # Ultralytics settings dir + + +class Profile(contextlib.ContextDecorator): + # YOLO Profile class. Usage: @Profile() decorator or 'with Profile():' context manager + def __init__(self, t=0.0): + self.t = t + self.cuda = torch.cuda.is_available() + + def __enter__(self): + self.start = self.time() + return self + + def __exit__(self, type, value, traceback): + self.dt = self.time() - self.start # delta-time + self.t += self.dt # accumulate dt + + def time(self): + if self.cuda: + torch.cuda.synchronize() + return time.time() + + +class Timeout(contextlib.ContextDecorator): + # YOLO Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager + def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True): + self.seconds = int(seconds) + self.timeout_message = timeout_msg + self.suppress = bool(suppress_timeout_errors) + + def _timeout_handler(self, signum, frame): + raise TimeoutError(self.timeout_message) + + def __enter__(self): + if platform.system() != 'Windows': # not supported on Windows + signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM + signal.alarm(self.seconds) # start countdown for SIGALRM to be raised + + def __exit__(self, exc_type, exc_val, exc_tb): + if platform.system() != 'Windows': + signal.alarm(0) # Cancel SIGALRM if it's scheduled + if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError + return True + + +class WorkingDirectory(contextlib.ContextDecorator): + # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager + def __init__(self, new_dir): + self.dir = new_dir # new dir + self.cwd = Path.cwd().resolve() # current dir + + def __enter__(self): + os.chdir(self.dir) + + def __exit__(self, exc_type, exc_val, exc_tb): + os.chdir(self.cwd) + + +def methods(instance): + # Get class/instance methods + return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")] + + +def print_args(args: Optional[dict] = None, show_file=True, show_func=False): + # Print function arguments (optional args dict) + x = inspect.currentframe().f_back # previous frame + file, _, func, _, _ = inspect.getframeinfo(x) + if args is None: # get args automatically + args, _, _, frm = inspect.getargvalues(x) + args = {k: v for k, v in frm.items() if k in args} + try: + file = Path(file).resolve().relative_to(ROOT).with_suffix('') + except ValueError: + file = Path(file).stem + s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '') + LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items())) + + +def init_seeds(seed=0, deterministic=False): + # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe + # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 + if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 + torch.use_deterministic_algorithms(True) + torch.backends.cudnn.deterministic = True + os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' + os.environ['PYTHONHASHSEED'] = str(seed) + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape} + + +def get_default_args(func): + # Get func() default arguments + signature = inspect.signature(func) + return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} + + +def get_latest_run(search_dir='.'): + # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(last_list, key=os.path.getctime) if last_list else '' + + +def file_age(path=__file__): + # Return days since last file update + dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta + return dt.days # + dt.seconds / 86400 # fractional days + + +def file_date(path=__file__): + # Return human-readable file modification date, i.e. '2021-3-26' + t = datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + +def file_size(path): + # Return file/dir size (MB) + mb = 1 << 20 # bytes to MiB (1024 ** 2) + path = Path(path) + if path.is_file(): + return path.stat().st_size / mb + elif path.is_dir(): + return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb + else: + return 0.0 + + +def check_online(): + # Check internet connectivity + import socket + + def run_once(): + # Check once + try: + socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility + return True + except OSError: + return False + + return run_once() or run_once() # check twice to increase robustness to intermittent connectivity issues + + +def git_describe(path=ROOT): # path must be a directory + # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe + try: + assert (Path(path) / '.git').is_dir() + return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] + except Exception: + return '' + + +@TryExcept() +@WorkingDirectory(ROOT) +def check_git_status(repo='WongKinYiu/yolov9', branch='main'): + # YOLO status check, recommend 'git pull' if code is out of date + url = f'https://github.com/{repo}' + msg = f', for updates see {url}' + s = colorstr('github: ') # string + assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg + assert check_online(), s + 'skipping check (offline)' + msg + + splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode()) + matches = [repo in s for s in splits] + if any(matches): + remote = splits[matches.index(True) - 1] + else: + remote = 'ultralytics' + check_output(f'git remote add {remote} {url}', shell=True) + check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch + local_branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out + n = int(check_output(f'git rev-list {local_branch}..{remote}/{branch} --count', shell=True)) # commits behind + if n > 0: + pull = 'git pull' if remote == 'origin' else f'git pull {remote} {branch}' + s += f"⚠️ YOLO is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update." + else: + s += f'up to date with {url} ✅' + LOGGER.info(s) + + +@WorkingDirectory(ROOT) +def check_git_info(path='.'): + # YOLO git info check, return {remote, branch, commit} + check_requirements('gitpython') + import git + try: + repo = git.Repo(path) + remote = repo.remotes.origin.url.replace('.git', '') # i.e. 'https://github.com/WongKinYiu/yolov9' + commit = repo.head.commit.hexsha # i.e. '3134699c73af83aac2a481435550b968d5792c0d' + try: + branch = repo.active_branch.name # i.e. 'main' + except TypeError: # not on any branch + branch = None # i.e. 'detached HEAD' state + return {'remote': remote, 'branch': branch, 'commit': commit} + except git.exc.InvalidGitRepositoryError: # path is not a git dir + return {'remote': None, 'branch': None, 'commit': None} + + +def check_python(minimum='3.7.0'): + # Check current python version vs. required python version + check_version(platform.python_version(), minimum, name='Python ', hard=True) + + +def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False): + # Check version vs. required version + current, minimum = (pkg.parse_version(x) for x in (current, minimum)) + result = (current == minimum) if pinned else (current >= minimum) # bool + s = f'WARNING ⚠️ {name}{minimum} is required by YOLO, but {name}{current} is currently installed' # string + if hard: + assert result, emojis(s) # assert min requirements met + if verbose and not result: + LOGGER.warning(s) + return result + + +@TryExcept() +def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''): + # Check installed dependencies meet YOLO requirements (pass *.txt file or list of packages or single package str) + prefix = colorstr('red', 'bold', 'requirements:') + check_python() # check python version + if isinstance(requirements, Path): # requirements.txt file + file = requirements.resolve() + assert file.exists(), f"{prefix} {file} not found, check failed." + with file.open() as f: + requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] + elif isinstance(requirements, str): + requirements = [requirements] + + s = '' + n = 0 + for r in requirements: + try: + pkg.require(r) + except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met + s += f'"{r}" ' + n += 1 + + if s and install and AUTOINSTALL: # check environment variable + LOGGER.info(f"{prefix} YOLO requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...") + try: + # assert check_online(), "AutoUpdate skipped (offline)" + LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode()) + source = file if 'file' in locals() else requirements + s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ + f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" + LOGGER.info(s) + except Exception as e: + LOGGER.warning(f'{prefix} ❌ {e}') + + +def check_img_size(imgsz, s=32, floor=0): + # Verify image size is a multiple of stride s in each dimension + if isinstance(imgsz, int): # integer i.e. img_size=640 + new_size = max(make_divisible(imgsz, int(s)), floor) + else: # list i.e. img_size=[640, 480] + imgsz = list(imgsz) # convert to list if tuple + new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] + if new_size != imgsz: + LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}') + return new_size + + +def check_imshow(warn=False): + # Check if environment supports image displays + try: + assert not is_notebook() + assert not is_docker() + cv2.imshow('test', np.zeros((1, 1, 3))) + cv2.waitKey(1) + cv2.destroyAllWindows() + cv2.waitKey(1) + return True + except Exception as e: + if warn: + LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}') + return False + + +def check_suffix(file='yolo.pt', suffix=('.pt',), msg=''): + # Check file(s) for acceptable suffix + if file and suffix: + if isinstance(suffix, str): + suffix = [suffix] + for f in file if isinstance(file, (list, tuple)) else [file]: + s = Path(f).suffix.lower() # file suffix + if len(s): + assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" + + +def check_yaml(file, suffix=('.yaml', '.yml')): + # Search/download YAML file (if necessary) and return path, checking suffix + return check_file(file, suffix) + + +def check_file(file, suffix=''): + # Search/download file (if necessary) and return path + check_suffix(file, suffix) # optional + file = str(file) # convert to str() + if os.path.isfile(file) or not file: # exists + return file + elif file.startswith(('http:/', 'https:/')): # download + url = file # warning: Pathlib turns :// -> :/ + file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth + if os.path.isfile(file): + LOGGER.info(f'Found {url} locally at {file}') # file already exists + else: + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, file) + assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check + return file + elif file.startswith('clearml://'): # ClearML Dataset ID + assert 'clearml' in sys.modules, "ClearML is not installed, so cannot use ClearML dataset. Try running 'pip install clearml'." + return file + else: # search + files = [] + for d in 'data', 'models', 'utils': # search directories + files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file + assert len(files), f'File not found: {file}' # assert file was found + assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique + return files[0] # return file + + +def check_font(font=FONT, progress=False): + # Download font to CONFIG_DIR if necessary + font = Path(font) + file = CONFIG_DIR / font.name + if not font.exists() and not file.exists(): + url = f'https://ultralytics.com/assets/{font.name}' + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, str(file), progress=progress) + + +def check_dataset(data, autodownload=True): + # Download, check and/or unzip dataset if not found locally + + # Download (optional) + extract_dir = '' + if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): + download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1) + data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) + extract_dir, autodownload = data.parent, False + + # Read yaml (optional) + if isinstance(data, (str, Path)): + data = yaml_load(data) # dictionary + + # Checks + for k in 'train', 'val', 'names': + assert k in data, emojis(f"data.yaml '{k}:' field missing ❌") + if isinstance(data['names'], (list, tuple)): # old array format + data['names'] = dict(enumerate(data['names'])) # convert to dict + assert all(isinstance(k, int) for k in data['names'].keys()), 'data.yaml names keys must be integers, i.e. 2: car' + data['nc'] = len(data['names']) + + # Resolve paths + path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.' + if not path.is_absolute(): + path = (ROOT / path).resolve() + data['path'] = path # download scripts + for k in 'train', 'val', 'test': + if data.get(k): # prepend path + if isinstance(data[k], str): + x = (path / data[k]).resolve() + if not x.exists() and data[k].startswith('../'): + x = (path / data[k][3:]).resolve() + data[k] = str(x) + else: + data[k] = [str((path / x).resolve()) for x in data[k]] + + # Parse yaml + train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) + if val: + val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + if not all(x.exists() for x in val): + LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]) + if not s or not autodownload: + raise Exception('Dataset not found ❌') + t = time.time() + if s.startswith('http') and s.endswith('.zip'): # URL + f = Path(s).name # filename + LOGGER.info(f'Downloading {s} to {f}...') + torch.hub.download_url_to_file(s, f) + Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root + unzip_file(f, path=DATASETS_DIR) # unzip + Path(f).unlink() # remove zip + r = None # success + elif s.startswith('bash '): # bash script + LOGGER.info(f'Running {s} ...') + r = os.system(s) + else: # python script + r = exec(s, {'yaml': data}) # return None + dt = f'({round(time.time() - t, 1)}s)' + s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌" + LOGGER.info(f"Dataset download {s}") + check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts + return data # dictionary + + +def check_amp(model): + # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation + from models.common import AutoShape, DetectMultiBackend + + def amp_allclose(model, im): + # All close FP32 vs AMP results + m = AutoShape(model, verbose=False) # model + a = m(im).xywhn[0] # FP32 inference + m.amp = True + b = m(im).xywhn[0] # AMP inference + return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance + + prefix = colorstr('AMP: ') + device = next(model.parameters()).device # get model device + if device.type in ('cpu', 'mps'): + return False # AMP only used on CUDA devices + f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check + im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3)) + try: + #assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolo.pt', device), im) + LOGGER.info(f'{prefix}checks passed ✅') + return True + except Exception: + help_url = 'https://github.com/ultralytics/yolov5/issues/7908' + LOGGER.warning(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}') + return False + + +def yaml_load(file='data.yaml'): + # Single-line safe yaml loading + with open(file, errors='ignore') as f: + return yaml.safe_load(f) + + +def yaml_save(file='data.yaml', data={}): + # Single-line safe yaml saving + with open(file, 'w') as f: + yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) + + +def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')): + # Unzip a *.zip file to path/, excluding files containing strings in exclude list + if path is None: + path = Path(file).parent # default path + with ZipFile(file) as zipObj: + for f in zipObj.namelist(): # list all archived filenames in the zip + if all(x not in f for x in exclude): + zipObj.extract(f, path=path) + + +def url2file(url): + # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt + url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ + return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth + + +def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3): + # Multithreaded file download and unzip function, used in data.yaml for autodownload + def download_one(url, dir): + # Download 1 file + success = True + if os.path.isfile(url): + f = Path(url) # filename + else: # does not exist + f = dir / Path(url).name + LOGGER.info(f'Downloading {url} to {f}...') + for i in range(retry + 1): + if curl: + s = 'sS' if threads > 1 else '' # silent + r = os.system( + f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue + success = r == 0 + else: + torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download + success = f.is_file() + if success: + break + elif i < retry: + LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...') + else: + LOGGER.warning(f'❌ Failed to download {url}...') + + if unzip and success and (f.suffix == '.gz' or is_zipfile(f) or is_tarfile(f)): + LOGGER.info(f'Unzipping {f}...') + if is_zipfile(f): + unzip_file(f, dir) # unzip + elif is_tarfile(f): + os.system(f'tar xf {f} --directory {f.parent}') # unzip + elif f.suffix == '.gz': + os.system(f'tar xfz {f} --directory {f.parent}') # unzip + if delete: + f.unlink() # remove zip + + dir = Path(dir) + dir.mkdir(parents=True, exist_ok=True) # make directory + if threads > 1: + pool = ThreadPool(threads) + pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded + pool.close() + pool.join() + else: + for u in [url] if isinstance(url, (str, Path)) else url: + download_one(u, dir) + + +def make_divisible(x, divisor): + # Returns nearest x divisible by divisor + if isinstance(divisor, torch.Tensor): + divisor = int(divisor.max()) # to int + return math.ceil(x / divisor) * divisor + + +def clean_str(s): + # Cleans a string by replacing special characters with underscore _ + return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) + + +def one_cycle(y1=0.0, y2=1.0, steps=100): + # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf + return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + + +def one_flat_cycle(y1=0.0, y2=1.0, steps=100): + # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf + #return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + return lambda x: ((1 - math.cos((x - (steps // 2)) * math.pi / (steps // 2))) / 2) * (y2 - y1) + y1 if (x > (steps // 2)) else y1 + + +def colorstr(*input): + # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') + *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string + colors = { + 'black': '\033[30m', # basic colors + 'red': '\033[31m', + 'green': '\033[32m', + 'yellow': '\033[33m', + 'blue': '\033[34m', + 'magenta': '\033[35m', + 'cyan': '\033[36m', + 'white': '\033[37m', + 'bright_black': '\033[90m', # bright colors + 'bright_red': '\033[91m', + 'bright_green': '\033[92m', + 'bright_yellow': '\033[93m', + 'bright_blue': '\033[94m', + 'bright_magenta': '\033[95m', + 'bright_cyan': '\033[96m', + 'bright_white': '\033[97m', + 'end': '\033[0m', # misc + 'bold': '\033[1m', + 'underline': '\033[4m'} + return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] + + +def labels_to_class_weights(labels, nc=80): + # Get class weights (inverse frequency) from training labels + if labels[0] is None: # no labels loaded + return torch.Tensor() + + labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO + classes = labels[:, 0].astype(int) # labels = [class xywh] + weights = np.bincount(classes, minlength=nc) # occurrences per class + + # Prepend gridpoint count (for uCE training) + # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image + # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start + + weights[weights == 0] = 1 # replace empty bins with 1 + weights = 1 / weights # number of targets per class + weights /= weights.sum() # normalize + return torch.from_numpy(weights).float() + + +def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): + # Produces image weights based on class_weights and image contents + # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample + class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels]) + return (class_weights.reshape(1, nc) * class_counts).sum(1) + + +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +def xywh2ltwh(x): + """ + Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates. + + Args: + x (np.ndarray) or (torch.Tensor): The input tensor with the bounding box coordinates in the xywh format + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + return y + + +def xyxy2ltwh(x): + """ + Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right + + Args: + x (np.ndarray) or (torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def ltwh2xywh(x): + """ + Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center + + Args: + x (torch.Tensor): the input tensor + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x + y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y + return y + + +def ltwh2xyxy(x): + """ + It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + + Args: + x (np.ndarray) or (torch.Tensor): the input image + + Returns: + y (np.ndarray) or (torch.Tensor): the xyxy coordinates of the bounding boxes. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 2] = x[:, 2] + x[:, 0] # width + y[:, 3] = x[:, 3] + x[:, 1] # height + return y + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center + y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x + y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + return y + + +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): + # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x + y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y + y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x + y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y + return y + + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + if clip: + clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center + y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center + y[..., 2] = (x[..., 2] - x[..., 0]) / w # width + y[..., 3] = (x[..., 3] - x[..., 1]) / h # height + return y + + +def xyn2xy(x, w=640, h=640, padw=0, padh=0): + # Convert normalized segments into pixel segments, shape (n,2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = w * x[..., 0] + padw # top left x + y[..., 1] = h * x[..., 1] + padh # top left y + return y + + +def segment2box(segment, width=640, height=640): + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = x[inside], y[inside] + return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy + + +def segments2boxes(segments): + # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + # Up-sample an (n,2) segment + for i, s in enumerate(segments): + s = np.concatenate((s, s[0:1, :]), axis=0) + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy + return segments + + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + # Rescale boxes (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + clip_boxes(boxes, img0_shape) + return boxes + + +def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + segments[:, 0] -= pad[0] # x padding + segments[:, 1] -= pad[1] # y padding + segments /= gain + clip_segments(segments, img0_shape) + if normalize: + segments[:, 0] /= img0_shape[1] # width + segments[:, 1] /= img0_shape[0] # height + return segments + + +def clip_boxes(boxes, shape): + # Clip boxes (xyxy) to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + +def clip_segments(segments, shape): + # Clip segments (xy1,xy2,...) to image shape (height, width) + if isinstance(segments, torch.Tensor): # faster individually + segments[:, 0].clamp_(0, shape[1]) # x + segments[:, 1].clamp_(0, shape[0]) # y + else: # np.array (faster grouped) + segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x + segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0, # number of masks +): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + if isinstance(prediction, (list, tuple)): # YOLO model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + device = prediction.device + mps = 'mps' in device.type # Apple MPS + if mps: # MPS not fully supported yet, convert tensors to CPU before NMS + prediction = prediction.cpu() + bs = prediction.shape[0] # batch size + nc = prediction.shape[1] - nm - 4 # number of classes + mi = 4 + nc # mask start index + xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 2.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x.T[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Detections matrix nx6 (xyxy, conf, cls) + box, cls, mask = x.split((4, nc, nm), 1) + box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) + if multi_label: + i, j = (cls > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = cls.max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if mps: + output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + + +def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() + # Strip optimizer from 'f' to finalize training, optionally save as 's' + x = torch.load(f, map_location=torch.device('cpu')) + if x.get('ema'): + x['model'] = x['ema'] # replace model with ema + for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys + x[k] = None + x['epoch'] = -1 + x['model'].half() # to FP16 + for p in x['model'].parameters(): + p.requires_grad = False + torch.save(x, s or f) + mb = os.path.getsize(s or f) / 1E6 # filesize + LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") + + +def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')): + evolve_csv = save_dir / 'evolve.csv' + evolve_yaml = save_dir / 'hyp_evolve.yaml' + keys = tuple(keys) + tuple(hyp.keys()) # [results + hyps] + keys = tuple(x.strip() for x in keys) + vals = results + tuple(hyp.values()) + n = len(keys) + + # Download (optional) + if bucket: + url = f'gs://{bucket}/evolve.csv' + if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0): + os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local + + # Log to evolve.csv + s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header + with open(evolve_csv, 'a') as f: + f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n') + + # Save yaml + with open(evolve_yaml, 'w') as f: + data = pd.read_csv(evolve_csv) + data = data.rename(columns=lambda x: x.strip()) # strip keys + i = np.argmax(fitness(data.values[:, :4])) # + generations = len(data) + f.write('# YOLO Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' + + f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + + '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n') + yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False) + + # Print to screen + LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix + + ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}' + for x in vals) + '\n\n') + + if bucket: + os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload + + +def apply_classifier(x, model, img, im0): + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() + im0 = [im0] if isinstance(im0, np.ndarray) else im0 + for i, d in enumerate(x): # per image + if d is not None and len(d): + d = d.clone() + + # Reshape and pad cutouts + b = xyxy2xywh(d[:, :4]) # boxes + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square + b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad + d[:, :4] = xywh2xyxy(b).long() + + # Rescale boxes from img_size to im0 size + scale_boxes(img.shape[2:], d[:, :4], im0[i].shape) + + # Classes + pred_cls1 = d[:, 5].long() + ims = [] + for a in d: + cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] + im = cv2.resize(cutout, (224, 224)) # BGR + + im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + ims.append(im) + + pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction + x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections + + return x + + +def increment_path(path, exist_ok=False, sep='', mkdir=False): + # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. + path = Path(path) # os-agnostic + if path.exists() and not exist_ok: + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') + + # Method 1 + for n in range(2, 9999): + p = f'{path}{sep}{n}{suffix}' # increment path + if not os.path.exists(p): # + break + path = Path(p) + + # Method 2 (deprecated) + # dirs = glob.glob(f"{path}{sep}*") # similar paths + # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs] + # i = [int(m.groups()[0]) for m in matches if m] # indices + # n = max(i) + 1 if i else 2 # increment number + # path = Path(f"{path}{sep}{n}{suffix}") # increment path + + if mkdir: + path.mkdir(parents=True, exist_ok=True) # make directory + + return path + + +# OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------ +imshow_ = cv2.imshow # copy to avoid recursion errors + + +def imread(path, flags=cv2.IMREAD_COLOR): + return cv2.imdecode(np.fromfile(path, np.uint8), flags) + + +def imwrite(path, im): + try: + cv2.imencode(Path(path).suffix, im)[1].tofile(path) + return True + except Exception: + return False + + +def imshow(path, im): + imshow_(path.encode('unicode_escape').decode(), im) + + +cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine + +# Variables ------------------------------------------------------------------------------------------------------------ diff --git a/utils/.ipynb_checkpoints/instance-checkpoint.py b/utils/.ipynb_checkpoints/instance-checkpoint.py new file mode 100644 index 000000000..89d41ee67 --- /dev/null +++ b/utils/.ipynb_checkpoints/instance-checkpoint.py @@ -0,0 +1,390 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import abc +from itertools import repeat +from numbers import Number +from typing import List + +import numpy as np + +from .general import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh + + +def _ntuple(n): + """From PyTorch internals.""" + + def parse(x): + """Parse bounding boxes format between XYWH and LTWH.""" + return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n)) + + return parse + + +to_4tuple = _ntuple(4) + +# `xyxy` means left top and right bottom +# `xywh` means center x, center y and width, height(yolo format) +# `ltwh` means left top and width, height(coco format) +_formats = ['xyxy', 'xywh', 'ltwh'] + +__all__ = 'Bboxes', # tuple or list + + +class Bboxes: + """Now only numpy is supported.""" + + def __init__(self, bboxes, format='xyxy') -> None: + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes + assert bboxes.ndim == 2 + assert bboxes.shape[1] == 4 + self.bboxes = bboxes + self.format = format + # self.normalized = normalized + + # def convert(self, format): + # assert format in _formats + # if self.format == format: + # bboxes = self.bboxes + # elif self.format == "xyxy": + # if format == "xywh": + # bboxes = xyxy2xywh(self.bboxes) + # else: + # bboxes = xyxy2ltwh(self.bboxes) + # elif self.format == "xywh": + # if format == "xyxy": + # bboxes = xywh2xyxy(self.bboxes) + # else: + # bboxes = xywh2ltwh(self.bboxes) + # else: + # if format == "xyxy": + # bboxes = ltwh2xyxy(self.bboxes) + # else: + # bboxes = ltwh2xywh(self.bboxes) + # + # return Bboxes(bboxes, format) + + def convert(self, format): + """Converts bounding box format from one type to another.""" + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + if self.format == format: + return + elif self.format == 'xyxy': + bboxes = xyxy2xywh(self.bboxes) if format == 'xywh' else xyxy2ltwh(self.bboxes) + elif self.format == 'xywh': + bboxes = xywh2xyxy(self.bboxes) if format == 'xyxy' else xywh2ltwh(self.bboxes) + else: + bboxes = ltwh2xyxy(self.bboxes) if format == 'xyxy' else ltwh2xywh(self.bboxes) + self.bboxes = bboxes + self.format = format + + def areas(self): + """Return box areas.""" + self.convert('xyxy') + return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) + + # def denormalize(self, w, h): + # if not self.normalized: + # return + # assert (self.bboxes <= 1.0).all() + # self.bboxes[:, 0::2] *= w + # self.bboxes[:, 1::2] *= h + # self.normalized = False + # + # def normalize(self, w, h): + # if self.normalized: + # return + # assert (self.bboxes > 1.0).any() + # self.bboxes[:, 0::2] /= w + # self.bboxes[:, 1::2] /= h + # self.normalized = True + + def mul(self, scale): + """ + Args: + scale (tuple) or (list) or (int): the scale for four coords. + """ + if isinstance(scale, Number): + scale = to_4tuple(scale) + assert isinstance(scale, (tuple, list)) + assert len(scale) == 4 + self.bboxes[:, 0] *= scale[0] + self.bboxes[:, 1] *= scale[1] + self.bboxes[:, 2] *= scale[2] + self.bboxes[:, 3] *= scale[3] + + def add(self, offset): + """ + Args: + offset (tuple) or (list) or (int): the offset for four coords. + """ + if isinstance(offset, Number): + offset = to_4tuple(offset) + assert isinstance(offset, (tuple, list)) + assert len(offset) == 4 + self.bboxes[:, 0] += offset[0] + self.bboxes[:, 1] += offset[1] + self.bboxes[:, 2] += offset[2] + self.bboxes[:, 3] += offset[3] + + def __len__(self): + """Return the number of boxes.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes': + """ + Concatenate a list of Bboxes objects into a single Bboxes object. + + Args: + boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate. + axis (int, optional): The axis along which to concatenate the bounding boxes. + Defaults to 0. + + Returns: + Bboxes: A new Bboxes object containing the concatenated bounding boxes. + + Note: + The input should be a list or tuple of Bboxes objects. + """ + assert isinstance(boxes_list, (list, tuple)) + if not boxes_list: + return cls(np.empty(0)) + assert all(isinstance(box, Bboxes) for box in boxes_list) + + if len(boxes_list) == 1: + return boxes_list[0] + return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis)) + + def __getitem__(self, index) -> 'Bboxes': + """ + Retrieve a specific bounding box or a set of bounding boxes using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired bounding boxes. + + Returns: + Bboxes: A new Bboxes object containing the selected bounding boxes. + + Raises: + AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of bounding boxes. + """ + if isinstance(index, int): + return Bboxes(self.bboxes[index].view(1, -1)) + b = self.bboxes[index] + assert b.ndim == 2, f'Indexing on Bboxes with {index} failed to return a matrix!' + return Bboxes(b) + + +class Instances: + + def __init__(self, bboxes, segments=None, keypoints=None, bbox_format='xywh', normalized=True) -> None: + """ + Args: + bboxes (ndarray): bboxes with shape [N, 4]. + segments (list | ndarray): segments. + keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. + """ + if segments is None: + segments = [] + self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format) + self.keypoints = keypoints + self.normalized = normalized + + if len(segments) > 0: + # list[np.array(1000, 2)] * num_samples + segments = resample_segments(segments) + # (N, 1000, 2) + segments = np.stack(segments, axis=0) + else: + segments = np.zeros((0, 1000, 2), dtype=np.float32) + self.segments = segments + + def convert_bbox(self, format): + """Convert bounding box format.""" + self._bboxes.convert(format=format) + + def bbox_areas(self): + """Calculate the area of bounding boxes.""" + self._bboxes.areas() + + def scale(self, scale_w, scale_h, bbox_only=False): + """this might be similar with denormalize func but without normalized sign.""" + self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) + if bbox_only: + return + self.segments[..., 0] *= scale_w + self.segments[..., 1] *= scale_h + if self.keypoints is not None: + self.keypoints[..., 0] *= scale_w + self.keypoints[..., 1] *= scale_h + + def denormalize(self, w, h): + """Denormalizes boxes, segments, and keypoints from normalized coordinates.""" + if not self.normalized: + return + self._bboxes.mul(scale=(w, h, w, h)) + self.segments[..., 0] *= w + self.segments[..., 1] *= h + if self.keypoints is not None: + self.keypoints[..., 0] *= w + self.keypoints[..., 1] *= h + self.normalized = False + + def normalize(self, w, h): + """Normalize bounding boxes, segments, and keypoints to image dimensions.""" + if self.normalized: + return + self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h)) + self.segments[..., 0] /= w + self.segments[..., 1] /= h + if self.keypoints is not None: + self.keypoints[..., 0] /= w + self.keypoints[..., 1] /= h + self.normalized = True + + def add_padding(self, padw, padh): + """Handle rect and mosaic situation.""" + assert not self.normalized, 'you should add padding with absolute coordinates.' + self._bboxes.add(offset=(padw, padh, padw, padh)) + self.segments[..., 0] += padw + self.segments[..., 1] += padh + if self.keypoints is not None: + self.keypoints[..., 0] += padw + self.keypoints[..., 1] += padh + + def __getitem__(self, index) -> 'Instances': + """ + Retrieve a specific instance or a set of instances using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired instances. + + Returns: + Instances: A new Instances object containing the selected bounding boxes, + segments, and keypoints if present. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of instances. + """ + segments = self.segments[index] if len(self.segments) else self.segments + keypoints = self.keypoints[index] if self.keypoints is not None else None + bboxes = self.bboxes[index] + bbox_format = self._bboxes.format + return Instances( + bboxes=bboxes, + segments=segments, + keypoints=keypoints, + bbox_format=bbox_format, + normalized=self.normalized, + ) + + def flipud(self, h): + """Flips the coordinates of bounding boxes, segments, and keypoints vertically.""" + if self._bboxes.format == 'xyxy': + y1 = self.bboxes[:, 1].copy() + y2 = self.bboxes[:, 3].copy() + self.bboxes[:, 1] = h - y2 + self.bboxes[:, 3] = h - y1 + else: + self.bboxes[:, 1] = h - self.bboxes[:, 1] + self.segments[..., 1] = h - self.segments[..., 1] + if self.keypoints is not None: + self.keypoints[..., 1] = h - self.keypoints[..., 1] + + def fliplr(self, w): + """Reverses the order of the bounding boxes and segments horizontally.""" + if self._bboxes.format == 'xyxy': + x1 = self.bboxes[:, 0].copy() + x2 = self.bboxes[:, 2].copy() + self.bboxes[:, 0] = w - x2 + self.bboxes[:, 2] = w - x1 + else: + self.bboxes[:, 0] = w - self.bboxes[:, 0] + self.segments[..., 0] = w - self.segments[..., 0] + if self.keypoints is not None: + self.keypoints[..., 0] = w - self.keypoints[..., 0] + + def clip(self, w, h): + """Clips bounding boxes, segments, and keypoints values to stay within image boundaries.""" + ori_format = self._bboxes.format + self.convert_bbox(format='xyxy') + self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) + self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) + if ori_format != 'xyxy': + self.convert_bbox(format=ori_format) + self.segments[..., 0] = self.segments[..., 0].clip(0, w) + self.segments[..., 1] = self.segments[..., 1].clip(0, h) + if self.keypoints is not None: + self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w) + self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) + + def remove_zero_area_boxes(self): + """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them.""" + good = self._bboxes.areas() > 0 + if not all(good): + self._bboxes = Bboxes(self._bboxes.bboxes[good], format=self._bboxes.format) + if len(self.segments): + self.segments = self.segments[good] + if self.keypoints is not None: + self.keypoints = self.keypoints[good] + return good + + def update(self, bboxes, segments=None, keypoints=None): + """Updates instance variables.""" + self._bboxes = Bboxes(bboxes, format=self._bboxes.format) + if segments is not None: + self.segments = segments + if keypoints is not None: + self.keypoints = keypoints + + def __len__(self): + """Return the length of the instance list.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'Instances': + """ + Concatenates a list of Instances objects into a single Instances object. + + Args: + instances_list (List[Instances]): A list of Instances objects to concatenate. + axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0. + + Returns: + Instances: A new Instances object containing the concatenated bounding boxes, + segments, and keypoints if present. + + Note: + The `Instances` objects in the list should have the same properties, such as + the format of the bounding boxes, whether keypoints are present, and if the + coordinates are normalized. + """ + assert isinstance(instances_list, (list, tuple)) + if not instances_list: + return cls(np.empty(0)) + assert all(isinstance(instance, Instances) for instance in instances_list) + + if len(instances_list) == 1: + return instances_list[0] + + use_keypoint = instances_list[0].keypoints is not None + bbox_format = instances_list[0]._bboxes.format + normalized = instances_list[0].normalized + + cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) + cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) + cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None + return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) + + @property + def bboxes(self): + """Return bounding boxes.""" + return self._bboxes.bboxes diff --git a/utils/.ipynb_checkpoints/metrics-checkpoint.py b/utils/.ipynb_checkpoints/metrics-checkpoint.py new file mode 100644 index 000000000..cc6d81141 --- /dev/null +++ b/utils/.ipynb_checkpoints/metrics-checkpoint.py @@ -0,0 +1,395 @@ +import math +import warnings +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch + +from utils import TryExcept, threaded + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.5, 0.5, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def smooth(y, f=0.05): + # Box filter of fraction f + nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) + p = np.ones(nf // 2) # ones padding + yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded + return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed + + +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (nparray, nx1 or nx10). + conf: Objectness value from 0-1 (nparray). + pred_cls: Predicted object classes (nparray). + target_cls: True object classes (nparray). + plot: Plot precision-recall curve at mAP@0.5 + save_dir: Plot save directory + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes, nt = np.unique(target_cls, return_counts=True) + nc = unique_classes.shape[0] # number of classes, number of detections + + # Create Precision-Recall curve and compute AP for each class + px, py = np.linspace(0, 1, 1000), [] # for plotting + ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = nt[ci] # number of labels + n_p = i.sum() # number of predictions + if n_p == 0 or n_l == 0: + continue + + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + eps) # recall curve + r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Compute F1 (harmonic mean of precision and recall) + f1 = 2 * p * r / (p + r + eps) + names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data + names = dict(enumerate(names)) # to dict + if plot: + plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names) + plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1') + plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision') + plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall') + + i = smooth(f1.mean(0), 0.1).argmax() # max F1 index + p, r, f1 = p[:, i], r[:, i], f1[:, i] + tp = (r * nt).round() # true positives + fp = (tp / (p + eps) - tp).round() # false positives + return tp, fp, p, r, f1, ap, unique_classes.astype(int) + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves + # Arguments + recall: The recall curve (list) + precision: The precision curve (list) + # Returns + Average precision, precision curve, recall curve + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec + + +class ConfusionMatrix: + # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix + def __init__(self, nc, conf=0.25, iou_thres=0.45): + self.matrix = np.zeros((nc + 1, nc + 1)) + self.nc = nc # number of classes + self.conf = conf + self.iou_thres = iou_thres + + def process_batch(self, detections, labels): + if detections is None: + if labels is None: + self.matrix[self.nc, self.nc] += 1 + return + gt_classes = labels[:,0].int() + for gc in gt_classes: + self.matrix[self.nc, gc] += 1 # class FN + return + + detections = detections[detections[:, 4] > self.conf] + gt_classes = labels[:, 0].int() + + if labels.sum()==0: + for dc in detections: + self.matrix[dc[5].int(), self.nc] += 1 # class FP + return + detection_classes = detections[:, 5].int() + iou = box_iou(labels[:, 1:], detections[:, :4]) + + x = torch.where(iou > self.iou_thres) + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + else: + matches = np.zeros((0, 3)) + + n = matches.shape[0] > 0 + m0, m1, _ = matches.transpose().astype(np.int16) + for i, gc in enumerate(gt_classes): + j = m0 == i + if n and sum(j) == 1: + self.matrix[detection_classes[m1[j]], gc] += 1 # correct + else: + self.matrix[self.nc, gc] += 1 # class FN + + for i, dc in enumerate(detection_classes): + if not any (m1==i): + self.matrix[dc, self.nc] += 1 # class FP + + def matrix(self): + return self.matrix + + def tp_fp(self): + tp = self.matrix.diagonal() # true positives + fp = self.matrix.sum(1) - tp # false positives + # fn = self.matrix.sum(0) - tp # false negatives (missed detections) + return tp[:-1], fp[:-1] # remove background class + + @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure') + def plot(self, normalize=True, save_dir='', names=()): + import seaborn as sn + + array = self.matrix.astype(int) # / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns + #array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + + fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) + nc, nn = self.nc, len(names) # number of classes, names + sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size + labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels + ticklabels = (names + ['background']) if labels else "auto" + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap(array, + ax=ax, + annot=nc < 30, + annot_kws={ + "size": 8}, + cmap='Blues', + fmt='.2f', + square=True, + vmin=0.0, + xticklabels=ticklabels, + yticklabels=ticklabels).set_facecolor((1, 1, 1)) + ax.set_ylabel('True') + ax.set_ylabel('Predicted') + ax.set_title('Confusion Matrix') + fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) + plt.close(fig) + + def print(self): + for i in range(self.nc + 1): + print(' '.join(map(str, self.matrix[i]))) + + +class WIoU_Scale: + ''' monotonous: { + None: origin v1 + True: monotonic FM v2 + False: non-monotonic FM v3 + } + momentum: The momentum of running mean''' + + iou_mean = 1. + monotonous = False + _momentum = 1 - 0.5 ** (1 / 7000) + _is_train = True + + def __init__(self, iou): + self.iou = iou + self._update(self) + + @classmethod + def _update(cls, self): + if cls._is_train: cls.iou_mean = (1 - cls._momentum) * cls.iou_mean + \ + cls._momentum * self.iou.detach().mean().item() + + @classmethod + def _scaled_loss(cls, self, gamma=1.9, delta=3): + if isinstance(self.monotonous, bool): + if self.monotonous: + return (self.iou.detach() / self.iou_mean).sqrt() + else: + beta = self.iou.detach() / self.iou_mean + alpha = delta * torch.pow(gamma, beta - delta) + return beta / alpha + return 1 + + +def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, MDPIoU=False, feat_h=640, feat_w=640, eps=1e-7): + # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) + + # Get the coordinates of bounding boxes + if xywh: # transform from xywh to xyxy + (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) + w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 + b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ + b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ + else: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) + b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + + # Intersection area + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + # Union Area + union = w1 * h1 + w2 * h2 - inter + eps + + # IoU + iou = inter / union + if CIoU or DIoU or GIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 + if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) # CIoU + return iou - rho2 / c2 # DIoU + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + elif MDPIoU: + d1 = (b2_x1 - b1_x1) ** 2 + (b2_y1 - b1_y1) ** 2 + d2 = (b2_x2 - b1_x2) ** 2 + (b2_y2 - b1_y2) ** 2 + mpdiou_hw_pow = feat_h ** 2 + feat_w ** 2 + return iou - d1 / mpdiou_hw_pow - d2 / mpdiou_hw_pow # MPDIoU + return iou # IoU + + +def box_iou(box1, box2, eps=1e-7): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) + + +def bbox_ioa(box1, box2, eps=1e-7): + """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 + box1: np.array of shape(nx4) + box2: np.array of shape(mx4) + returns: np.array of shape(nxm) + """ + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1.T + b2_x1, b2_y1, b2_x2, b2_y2 = box2.T + + # Intersection area + inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ + (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps + + # Intersection over box2 area + return inter_area / box2_area + + +def wh_iou(wh1, wh2, eps=1e-7): + # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 + wh1 = wh1[:, None] # [N,1,2] + wh2 = wh2[None] # [1,M,2] + inter = torch.min(wh1, wh2).prod(2) # [N,M] + return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps) # iou = inter / (area1 + area2 - inter) + + +# Plots ---------------------------------------------------------------------------------------------------------------- + + +@threaded +def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): + # Precision-recall curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + py = np.stack(py, axis=1) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py.T): + ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) + else: + ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) + + ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + ax.set_title('Precision-Recall Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) + + +@threaded +def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): + # Metric-confidence curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py): + ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) + else: + ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) + + y = smooth(py.mean(0), 0.05) + ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + ax.set_title(f'{ylabel}-Confidence Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) diff --git a/utils/__pycache__/__init__.cpython-38.pyc b/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..038792d1c Binary files /dev/null and b/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/__pycache__/augment.cpython-38.pyc b/utils/__pycache__/augment.cpython-38.pyc new file mode 100644 index 000000000..0b1ea8d3b Binary files /dev/null and b/utils/__pycache__/augment.cpython-38.pyc differ diff --git a/utils/__pycache__/augmentations.cpython-38.pyc b/utils/__pycache__/augmentations.cpython-38.pyc new file mode 100644 index 000000000..60b727446 Binary files /dev/null and b/utils/__pycache__/augmentations.cpython-38.pyc differ diff --git a/utils/__pycache__/autoanchor.cpython-38.pyc b/utils/__pycache__/autoanchor.cpython-38.pyc new file mode 100644 index 000000000..477cae779 Binary files /dev/null and b/utils/__pycache__/autoanchor.cpython-38.pyc differ diff --git a/utils/__pycache__/autobatch.cpython-38.pyc b/utils/__pycache__/autobatch.cpython-38.pyc new file mode 100644 index 000000000..7e9cc1cc5 Binary files /dev/null and b/utils/__pycache__/autobatch.cpython-38.pyc differ diff --git a/utils/__pycache__/callbacks.cpython-38.pyc b/utils/__pycache__/callbacks.cpython-38.pyc new file mode 100644 index 000000000..b59d401f7 Binary files /dev/null and b/utils/__pycache__/callbacks.cpython-38.pyc differ diff --git a/utils/__pycache__/dataloaders.cpython-38.pyc b/utils/__pycache__/dataloaders.cpython-38.pyc new file mode 100644 index 000000000..5a1873fde Binary files /dev/null and b/utils/__pycache__/dataloaders.cpython-38.pyc differ diff --git a/utils/__pycache__/downloads.cpython-38.pyc b/utils/__pycache__/downloads.cpython-38.pyc new file mode 100644 index 000000000..75a35ef8f Binary files /dev/null and b/utils/__pycache__/downloads.cpython-38.pyc differ diff --git a/utils/__pycache__/general.cpython-38.pyc b/utils/__pycache__/general.cpython-38.pyc new file mode 100644 index 000000000..af086b353 Binary files /dev/null and b/utils/__pycache__/general.cpython-38.pyc differ diff --git a/utils/__pycache__/instance.cpython-38.pyc b/utils/__pycache__/instance.cpython-38.pyc new file mode 100644 index 000000000..4e36cd58e Binary files /dev/null and b/utils/__pycache__/instance.cpython-38.pyc differ diff --git a/utils/__pycache__/lion.cpython-38.pyc b/utils/__pycache__/lion.cpython-38.pyc new file mode 100644 index 000000000..259be7657 Binary files /dev/null and b/utils/__pycache__/lion.cpython-38.pyc differ diff --git a/utils/__pycache__/loss_tal_dual.cpython-38.pyc b/utils/__pycache__/loss_tal_dual.cpython-38.pyc new file mode 100644 index 000000000..437ad0827 Binary files /dev/null and b/utils/__pycache__/loss_tal_dual.cpython-38.pyc differ diff --git a/utils/__pycache__/metrics.cpython-38.pyc b/utils/__pycache__/metrics.cpython-38.pyc new file mode 100644 index 000000000..c54b4869d Binary files /dev/null and b/utils/__pycache__/metrics.cpython-38.pyc differ diff --git a/utils/__pycache__/plots.cpython-38.pyc b/utils/__pycache__/plots.cpython-38.pyc new file mode 100644 index 000000000..6bed0fda9 Binary files /dev/null and b/utils/__pycache__/plots.cpython-38.pyc differ diff --git a/utils/__pycache__/torch_utils.cpython-38.pyc b/utils/__pycache__/torch_utils.cpython-38.pyc new file mode 100644 index 000000000..d509bebc7 Binary files /dev/null and b/utils/__pycache__/torch_utils.cpython-38.pyc differ diff --git a/utils/augment.py b/utils/augment.py new file mode 100644 index 000000000..1ac08117d --- /dev/null +++ b/utils/augment.py @@ -0,0 +1,970 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +import random +from copy import deepcopy + +import cv2 +import numpy as np +import torch +import torchvision.transforms as T + +from utils.instance import Instances +from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy +from utils.metrics import bbox_ioa + +POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + + +def polygons2masks(imgsz, polygons, color, downsample_ratio=1): + """ + Args: + imgsz (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], N is number of polygons, M is number of points (M % 2 = 0) + color (int): color + downsample_ratio (int): downsample ratio + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(imgsz, [polygons[si].reshape(-1)], color, downsample_ratio) + masks.append(mask) + return np.array(masks) +def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index +def polygons2masks_multi(imgsz, segments, cls, downsample_ratio=1, nc=3): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((nc, imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + mc = [] + for si in range(len(segments)): + mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) + ms.append(mask) + mc.append(int(cls[si][0])) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + cls_id = mc[i] + masks[cls_id] = masks[cls_id] + ms[i] + masks[cls_id] = np.clip(masks[cls_id], a_min=0, a_max=1) + return masks, index + + +class Compose: + + def __init__(self, transforms): + """Initializes the Compose object with a list of transforms.""" + self.transforms = transforms + + def __call__(self, data): + """Applies a series of transformations to input data.""" + for t in self.transforms: + data = t(data) + return data + + def append(self, transform): + """Appends a new transform to the existing list of transforms.""" + self.transforms.append(transform) + + def tolist(self): + """Converts list of transforms to a standard Python list.""" + return self.transforms + + def __repr__(self): + """Return string representation of object.""" + format_string = f'{self.__class__.__name__}(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string + + +class BaseMixTransform: + """This implementation is from mmyolo.""" + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + self.dataset = dataset + self.pre_transform = pre_transform + self.p = p + + def __call__(self, labels): + """Applies pre-processing transforms and mixup/mosaic transforms to labels data.""" + if random.uniform(0, 1) > self.p: + return labels + + # Get index of one or three other images + indexes = self.get_indexes() + if isinstance(indexes, int): + indexes = [indexes] + + # Get images information will be used for Mosaic or MixUp + mix_labels = [self.dataset.get_image_and_label(i) for i in indexes] + + if self.pre_transform is not None: + for i, data in enumerate(mix_labels): + mix_labels[i] = self.pre_transform(data) + labels['mix_labels'] = mix_labels + + # Mosaic or MixUp + labels = self._mix_transform(labels) + labels.pop('mix_labels', None) + return labels + + def _mix_transform(self, labels): + """Applies MixUp or Mosaic augmentation to the label dictionary.""" + raise NotImplementedError + + def get_indexes(self): + """Gets a list of shuffled indexes for mosaic augmentation.""" + raise NotImplementedError + + +class Mosaic(BaseMixTransform): + """ + Mosaic augmentation. + + This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. + The augmentation is applied to a dataset with a given probability. + + Attributes: + dataset: The dataset on which the mosaic augmentation is applied. + imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640. + p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0. + n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3). + """ + + def __init__(self, dataset, imgsz=640, p=1.0, n=4): + """Initializes the object with a dataset, image size, probability, and border.""" + assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.' + assert n in (4, 9), 'grid must be equal to 4 or 9.' + super().__init__(dataset=dataset, p=p) + self.dataset = dataset + self.imgsz = imgsz + self.border = (-imgsz // 2, -imgsz // 2) # width, height + self.n = n + + def get_indexes(self, buffer=True): + """Return a list of random indexes from the dataset.""" + if buffer: # select images from buffer + return random.choices(list(self.dataset.buffer), k=self.n - 1) + else: # select any images + return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)] + + def _mix_transform(self, labels): + """Apply mixup transformation to the input image and labels.""" + assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.' + assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.' + return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels) + + def _mosaic4(self, labels): + """Create a 2x2 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y + for i in range(4): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels_patch = self._update_labels(labels_patch, padw, padh) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + final_labels['img'] = img4 + return final_labels + + def _mosaic9(self, labels): + """Create a 3x3 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + hp, wp = -1, -1 # height, width previous + for i in range(9): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padw, padh = c[:2] + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + + # Image + img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous for next iteration + + # Labels assuming imgsz*2 mosaic size + labels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1]) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + + final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]] + return final_labels + + @staticmethod + def _update_labels(labels, padw, padh): + """Update labels.""" + nh, nw = labels['img'].shape[:2] + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(nw, nh) + labels['instances'].add_padding(padw, padh) + return labels + + def _cat_labels(self, mosaic_labels): + """Return labels with mosaic border instances clipped.""" + if len(mosaic_labels) == 0: + return {} + cls = [] + instances = [] + imgsz = self.imgsz * 2 # mosaic imgsz + for labels in mosaic_labels: + cls.append(labels['cls']) + instances.append(labels['instances']) + final_labels = { + 'im_file': mosaic_labels[0]['im_file'], + 'ori_shape': mosaic_labels[0]['ori_shape'], + 'resized_shape': (imgsz, imgsz), + 'cls': np.concatenate(cls, 0), + 'instances': Instances.concatenate(instances, axis=0), + 'mosaic_border': self.border} # final_labels + final_labels['instances'].clip(imgsz, imgsz) + good = final_labels['instances'].remove_zero_area_boxes() + final_labels['cls'] = final_labels['cls'][good] + return final_labels + + +class MixUp(BaseMixTransform): + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) + + def get_indexes(self): + """Get a random index from the dataset.""" + return random.randint(0, len(self.dataset) - 1) + + def _mix_transform(self, labels): + """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf.""" + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + labels2 = labels['mix_labels'][0] + labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8) + labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0) + labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0) + return labels + + +class RandomPerspective: + + def __init__(self, + degrees=0.0, + translate=0.1, + scale=0.5, + shear=0.0, + perspective=0.0, + border=(0, 0), + pre_transform=None): + self.degrees = degrees + self.translate = translate + self.scale = scale + self.shear = shear + self.perspective = perspective + # Mosaic border + self.border = border + self.pre_transform = pre_transform + + def affine_transform(self, img, border): + """Center.""" + C = np.eye(3, dtype=np.float32) + + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3, dtype=np.float32) + P[2, 0] = random.uniform(-self.perspective, self.perspective) # x perspective (about y) + P[2, 1] = random.uniform(-self.perspective, self.perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3, dtype=np.float32) + a = random.uniform(-self.degrees, self.degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - self.scale, 1 + self.scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3, dtype=np.float32) + S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3, dtype=np.float32) + T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0] # x translation (pixels) + T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1] # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + # Affine image + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if self.perspective: + img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114)) + return img, M, s + + def apply_bboxes(self, bboxes, M): + """ + Apply affine to bboxes only. + + Args: + bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4). + M (ndarray): affine matrix. + + Returns: + new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4]. + """ + n = len(bboxes) + if n == 0: + return bboxes + + xy = np.ones((n * 4, 3), dtype=bboxes.dtype) + xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # Create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T + + def apply_segments(self, segments, M): + """ + Apply affine to segments and generate new bboxes from segments. + + Args: + segments (ndarray): list of segments, [num_samples, 500, 2]. + M (ndarray): affine matrix. + + Returns: + new_segments (ndarray): list of segments after affine, [num_samples, 500, 2]. + new_bboxes (ndarray): bboxes after affine, [N, 4]. + """ + n, num = segments.shape[:2] + if n == 0: + return [], segments + + xy = np.ones((n * num, 3), dtype=segments.dtype) + segments = segments.reshape(-1, 2) + xy[:, :2] = segments + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] + segments = xy.reshape(n, -1, 2) + bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0) + return bboxes, segments + + def apply_keypoints(self, keypoints, M): + """ + Apply affine to keypoints. + + Args: + keypoints (ndarray): keypoints, [N, 17, 3]. + M (ndarray): affine matrix. + + Return: + new_keypoints (ndarray): keypoints after affine, [N, 17, 3]. + """ + n, nkpt = keypoints.shape[:2] + if n == 0: + return keypoints + xy = np.ones((n * nkpt, 3), dtype=keypoints.dtype) + visible = keypoints[..., 2].reshape(n * nkpt, 1) + xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2) + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine + out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1]) + visible[out_mask] = 0 + return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3) + + def __call__(self, labels): + """ + Affine images and targets. + + Args: + labels (dict): a dict of `bboxes`, `segments`, `keypoints`. + """ + if self.pre_transform and 'mosaic_border' not in labels: + labels = self.pre_transform(labels) + labels.pop('ratio_pad') # do not need ratio pad + + img = labels['img'] + cls = labels['cls'] + instances = labels.pop('instances') + # Make sure the coord formats are right + instances.convert_bbox(format='xyxy') + instances.denormalize(*img.shape[:2][::-1]) + + border = labels.pop('mosaic_border', self.border) + self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h + # M is affine matrix + # scale for func:`box_candidates` + img, M, scale = self.affine_transform(img, border) + + bboxes = self.apply_bboxes(instances.bboxes, M) + + segments = instances.segments + keypoints = instances.keypoints + # Update bboxes if there are segments. + if len(segments): + bboxes, segments = self.apply_segments(segments, M) + + if keypoints is not None: + keypoints = self.apply_keypoints(keypoints, M) + new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False) + # Clip + new_instances.clip(*self.size) + + # Filter instances + instances.scale(scale_w=scale, scale_h=scale, bbox_only=True) + # Make the bboxes have the same scale with new_bboxes + i = self.box_candidates(box1=instances.bboxes.T, + box2=new_instances.bboxes.T, + area_thr=0.01 if len(segments) else 0.10) + labels['instances'] = new_instances[i] + labels['cls'] = cls[i] + labels['img'] = img + labels['resized_shape'] = img.shape[:2] + return labels + + def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) + # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + + +class RandomBlur: + def __init__(self, p) -> None: + self.p = p + + def __call__(self, labels): + img = labels['img'] + if random.random() < self.p: + k = 2*random.randint(1, 10) + 1 + img_blur = cv2.GaussianBlur(img, (5, 5), 0) + labels['img'] = img_blur + return labels + + +class RandomHSV: + + def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: + self.hgain = hgain + self.sgain = sgain + self.vgain = vgain + + def __call__(self, labels): + """Applies random horizontal or vertical flip to an image with a given probability.""" + img = labels['img'] + if self.hgain or self.sgain or self.vgain: + r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + return labels + + +class RandomFlip: + + def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None: + assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' + assert 0 <= p <= 1.0 + + self.p = p + self.direction = direction + self.flip_idx = flip_idx + + def __call__(self, labels): + """Resize image and padding for detection, instance segmentation, pose.""" + img = labels['img'] + instances = labels.pop('instances') + instances.convert_bbox(format='xywh') + h, w = img.shape[:2] + h = 1 if instances.normalized else h + w = 1 if instances.normalized else w + + # Flip up-down + if self.direction == 'vertical' and random.random() < self.p: + img = np.flipud(img) + instances.flipud(h) + if self.direction == 'horizontal' and random.random() < self.p: + img = np.fliplr(img) + instances.fliplr(w) + # For keypoints + if self.flip_idx is not None and instances.keypoints is not None: + instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :]) + labels['img'] = np.ascontiguousarray(img) + labels['instances'] = instances + return labels + + +class RandomPoints: + + def __init__(self, num_region=10, num_point=20) -> None: + self.num_region = num_region ## number of regions + self.num_point = num_point ## number of points + + def __call__(self, labels): + img = labels['img'] + h, w, c = img.shape + + for i in range(self.num_region): + # region size [50*50, 200*200] + xx, ww = np.random.randint(0, w-200), np.random.randint(50, 200) + yy, hh = np.random.randint(0, h-200), np.random.randint(50, 200) + # 20 points in each region + xs = np.random.randint(xx, xx+ww, 20) + ys = np.random.randint(yy, yy+hh, 20) + # draw ovals to region + for j in range(self.num_point): + x, y = xs[j], ys[j] + ax, ay = np.random.randint(5, 8), np.random.randint(3, 10) + angle = np.random.randint(0,180) + cc = np.random.randint(225, 255) + cv2.ellipse(img, (x, y), (ax, ay), + angle, 0, 360, + color = (cc, cc, cc), + thickness = -1) + return labels + + +class LetterBox: + """Resize image and padding for detection, instance segmentation, pose.""" + + def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32): + """Initialize LetterBox object with specific parameters.""" + self.new_shape = new_shape + self.auto = auto + self.scaleFill = scaleFill + self.scaleup = scaleup + self.stride = stride + + def __call__(self, labels=None, image=None): + """Return updated labels and image with added border.""" + if labels is None: + labels = {} + img = labels.get('img') if image is None else image + shape = img.shape[:2] # current shape [height, width] + new_shape = labels.pop('rect_shape', self.new_shape) + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not self.scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if self.auto: # minimum rectangle + dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding + elif self.scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + if labels.get('ratio_pad'): + labels['ratio_pad'] = (labels['ratio_pad'], (dw, dh)) # for evaluation + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, + value=(114, 114, 114)) # add border + if len(labels): + labels = self._update_labels(labels, ratio, dw, dh) + labels['img'] = img + labels['resized_shape'] = new_shape + return labels + else: + return img + + def _update_labels(self, labels, ratio, padw, padh): + """Update labels.""" + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(*labels['img'].shape[:2][::-1]) + labels['instances'].scale(*ratio) + labels['instances'].add_padding(padw, padh) + return labels + + +class CopyPaste: + + def __init__(self, p=0.5) -> None: + self.p = p + + def __call__(self, labels): + """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy).""" + im = labels['img'] + cls = labels['cls'] + h, w = im.shape[:2] + instances = labels.pop('instances') + instances.convert_bbox(format='xyxy') + instances.denormalize(w, h) + if self.p and len(instances.segments): + n = len(instances) + _, w, _ = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # Calculate ioa first then select indexes randomly + ins_flip = deepcopy(instances) + ins_flip.fliplr(w) + + ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes) # intersection over area, (N, M) + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(self.p * n)): + cls = np.concatenate((cls, cls[[j]]), axis=0) + instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0) + cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + labels['img'] = im + labels['cls'] = cls + labels['instances'] = instances + return labels + + +class Albumentations: + # YOLOv8 Albumentations class (optional, only used if package is installed) + def __init__(self, p=1.0): + """Initialize the transform object for YOLO bbox formatted params.""" + self.p = p + self.transform = None + prefix = colorstr('albumentations: ') + try: + import albumentations as A + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + + T = [ + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0)] # transforms + self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) + + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + def __call__(self, labels): + """Generates object detections and returns a dictionary with detection results.""" + im = labels['img'] + cls = labels['cls'] + if len(cls): + labels['instances'].convert_bbox('xywh') + labels['instances'].normalize(*im.shape[:2][::-1]) + bboxes = labels['instances'].bboxes + # TODO: add supports of segments and keypoints + if self.transform and random.random() < self.p: + new = self.transform(image=im, bboxes=bboxes, class_labels=cls) # transformed + if len(new['class_labels']) > 0: # skip update if no bbox in new im + labels['img'] = new['image'] + labels['cls'] = np.array(new['class_labels']) + bboxes = np.array(new['bboxes']) + labels['instances'].update(bboxes=bboxes) + return labels + + +# TODO: technically this is not an augmentation, maybe we should put this to another files +class Format: + + def __init__(self, + bbox_format='xywh', + normalize=True, + return_mask=False, + return_keypoint=False, + mask_ratio=4, + mask_overlap=True, + batch_idx=True, + nc=3): + self.bbox_format = bbox_format + self.normalize = normalize + self.return_mask = return_mask # set False when training detection only + self.return_keypoint = return_keypoint + self.mask_ratio = mask_ratio + self.mask_overlap = mask_overlap + self.batch_idx = batch_idx # keep the batch indexes + self.nc=nc + def __call__(self, labels): + """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'.""" + img = labels.pop('img') + h, w = img.shape[:2] + cls = labels.pop('cls') + instances = labels.pop('instances') + instances.convert_bbox(format=self.bbox_format) + instances.denormalize(w, h) + nl = len(instances) + if self.return_mask: + if nl: + masks, instances, cls = self._format_segments(instances, cls, w, h) + masks = torch.from_numpy(masks) + else: + masks = torch.zeros(1 if self.mask_overlap else nl, self.nc, img.shape[0] // self.mask_ratio, + img.shape[1] // self.mask_ratio) + labels['masks'] = masks + if self.normalize: + instances.normalize(w, h) + labels['img'] = self._format_img(img) + labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl) + labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) + if self.return_keypoint: + labels['keypoints'] = torch.from_numpy(instances.keypoints) + # Then we can use collate_fn + if self.batch_idx: + labels['batch_idx'] = torch.zeros(nl) + return labels + + def _format_img(self, img): + """Format the image for YOLOv5 from Numpy array to PyTorch tensor.""" + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) + img = torch.from_numpy(img) + return img + + def _format_segments(self, instances, cls, w, h): + """convert polygon points to bitmap.""" + segments = instances.segments + if self.mask_overlap: + masks, sorted_idx = polygons2masks_multi((h, w), segments, cls, downsample_ratio=self.mask_ratio, nc=self.nc) + masks = masks[None] # (2, 640, 640) -> (1, 2, 640, 640) + instances = instances[sorted_idx] + cls = cls[sorted_idx] + else: + masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio) + + return masks, instances, cls + + +def v9_transforms(dataset, imgsz, hyp): + """Convert images to a size suitable for YOLOv8 training.""" + pre_transform = Compose([ + Mosaic(dataset, imgsz=imgsz, p=hyp['mosaic']), + CopyPaste(p=hyp['copy_paste']), + RandomPerspective( + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective'], + pre_transform=LetterBox(new_shape=(imgsz, imgsz)), + )]) + #flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation + #if dataset.use_keypoints: + # kpt_shape = dataset.data.get('kpt_shape', None) + # if flip_idx is None and hyp['fliplr'] > 0.0: + # hyp['fliplr'] = 0.0 + # LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'") + # elif flip_idx: + # if len(flip_idx) != kpt_shape[0]: + # raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}') + # elif flip_idx[0] != 0: + # raise ValueError(f'data.yaml flip_idx={flip_idx} must be zero-index (start from 0)') + + return Compose([ + pre_transform, + MixUp(dataset, pre_transform=pre_transform, p=hyp['mixup']), + Albumentations(p=1.0), + RandomHSV(hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']), + RandomFlip(direction='vertical', p=hyp['flipud']), + RandomFlip(direction='horizontal', p=hyp['fliplr']), + ]) # transforms + + +# Classification augmentations ----------------------------------------------------------------------------------------- +def classify_transforms(size=224, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD + # Transforms to apply if albumentations not installed + if not isinstance(size, int): + raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)') + if any(mean) or any(std): + return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(mean, std, inplace=True)]) + else: + return T.Compose([CenterCrop(size), ToTensor()]) + + +def hsv2colorjitter(h, s, v): + """Map HSV (hue, saturation, value) jitter into ColorJitter values (brightness, contrast, saturation, hue)""" + return v, v, s, h + + +def classify_albumentations( + augment=True, + size=224, + scale=(0.08, 1.0), + hflip=0.5, + vflip=0.0, + hsv_h=0.015, # image HSV-Hue augmentation (fraction) + hsv_s=0.7, # image HSV-Saturation augmentation (fraction) + hsv_v=0.4, # image HSV-Value augmentation (fraction) + mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN + std=(1.0, 1.0, 1.0), # IMAGENET_STD + auto_aug=False, +): + # YOLOv8 classification Albumentations (optional, only used if package is installed) + prefix = colorstr('albumentations: ') + try: + import albumentations as A + from albumentations.pytorch import ToTensorV2 + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + if augment: # Resize and crop + T = [A.RandomResizedCrop(height=size, width=size, scale=scale)] + if auto_aug: + # TODO: implement AugMix, AutoAug & RandAug in albumentations + LOGGER.info(f'{prefix}auto augmentations are currently not supported') + else: + if hflip > 0: + T += [A.HorizontalFlip(p=hflip)] + if vflip > 0: + T += [A.VerticalFlip(p=vflip)] + if any((hsv_h, hsv_s, hsv_v)): + T += [A.ColorJitter(*hsv2colorjitter(hsv_h, hsv_s, hsv_v))] # brightness, contrast, saturation, hue + else: # Use fixed crop for eval set (reproducibility) + T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] + T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + return A.Compose(T) + + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + +class ClassifyLetterBox: + # YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, size=(640, 640), auto=False, stride=32): + """Resizes image and crops it to center with max dimensions 'h' and 'w'.""" + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + self.auto = auto # pass max size integer, automatically solve for short side using stride + self.stride = stride # used with auto + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + r = min(self.h / imh, self.w / imw) # ratio of new/old + h, w = round(imh * r), round(imw * r) # resized image + hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w + top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) + im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + return im_out + + +class CenterCrop: + # YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) + def __init__(self, size=640): + """Converts an image from numpy array to PyTorch tensor.""" + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + m = min(imh, imw) # min dimension + top, left = (imh - m) // 2, (imw - m) // 2 + return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) + + +class ToTensor: + # YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, half=False): + """Initialize YOLOv8 ToTensor object with optional half-precision support.""" + super().__init__() + self.half = half + + def __call__(self, im): # im = np.array HWC in BGR order + im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous + im = torch.from_numpy(im) # to torch + im = im.half() if self.half else im.float() # uint8 to fp16/32 + im /= 255.0 # 0-255 to 0.0-1.0 + return im diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 776042999..02c5978d0 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -25,6 +25,7 @@ from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, letterbox, mixup, random_perspective) +from utils.augment import Compose, Format, Instances, LetterBox, v9_transforms from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements, check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) @@ -168,6 +169,11 @@ def __len__(self): def __iter__(self): for _ in range(len(self)): yield next(self.iterator) + def reset(self): + """Reset iterator. + This is useful when we want to modify settings of dataset while training. + """ + self.iterator = self._get_iterator() class _RepeatSampler: @@ -456,7 +462,6 @@ def __init__(self, self.stride = stride self.path = path self.albumentations = Albumentations(size=img_size) if augment else None - try: f = [] # image files for p in path if isinstance(path, list) else [path]: @@ -496,7 +501,7 @@ def __init__(self, if cache['msgs']: LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' - + # Read cache [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items labels, shapes, self.segments = zip(*cache.values()) @@ -561,15 +566,16 @@ def __init__(self, shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride - + # Cache images into RAM/disk for faster training if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): cache_images = False self.ims = [None] * n + self.im_hw0, self.im_hw = [None] * n, [None] * n self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] + if cache_images: b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes - self.im_hw0, self.im_hw = [None] * n, [None] * n fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) @@ -581,7 +587,12 @@ def __init__(self, b += self.ims[i].nbytes pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' pbar.close() - + + self.transforms = self.build_transforms(hyp=hyp) + self.ni = len(self.labels) # number of images + self.buffer = [] + self.max_buffer_length = min((n, batch_size * 8, 1000)) if self.augment else 0 + def check_cache_ram(self, safety_margin=0.1, prefix=''): # Check image caching requirements vs available memory b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes @@ -640,85 +651,128 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): def __len__(self): return len(self.im_files) - # def __iter__(self): - # self.count = -1 - # print('ran dataset iter') - # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) - # return self - + def get_image_and_label(self, index): + img, (h0, w0), (h, w) = self.load_image(index) + bboxes = self.labels[index].copy() + segments = self.segments[index].copy() + return {'img': img, + 'im_file': self.im_files[index], + 'cls': bboxes[:, 0], + 'instances': Instances(bboxes[:,1:], segments, bbox_format='xywh', normalized=True), + 'ori_shape': (h0,w0), + 'resized_shape': (h, w)} + + def build_transforms(self, hyp=None): + """Builds and appends transforms to the list.""" + if self.augment: + hyp['mosaic'] = hyp['mosaic'] if self.augment and not self.rect else 0.0 + hyp['mixup'] = hyp['mixup'] if self.augment and not self.rect else 0.0 + transforms = v9_transforms(self, self.img_size, hyp) + else: + transforms = Compose([LetterBox(new_shape=(self.img_size, self.img_size), scaleup=False)]) + + #shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + transforms.append( + Format(bbox_format='xywh', + normalize=True, + batch_idx=True)) + return transforms + + def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights - hyp = self.hyp - mosaic = self.mosaic and random.random() < hyp['mosaic'] - if mosaic: - # Load mosaic - img, labels = self.load_mosaic(index) - shapes = None - - # MixUp augmentation - if random.random() < hyp['mixup']: - img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) - - else: - # Load image - img, (h0, w0), (h, w) = self.load_image(index) - - # Letterbox - shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - labels = self.labels[index].copy() - if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) - - if self.augment: - img, labels = random_perspective(img, - labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - - nl = len(labels) # number of labels - if nl: - labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) - - if self.augment: - # Albumentations - img, labels = self.albumentations(img, labels) - nl = len(labels) # update after albumentations - - # HSV color-space - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) - - # Flip up-down - if random.random() < hyp['flipud']: - img = np.flipud(img) - if nl: - labels[:, 2] = 1 - labels[:, 2] - - # Flip left-right - if random.random() < hyp['fliplr']: - img = np.fliplr(img) - if nl: - labels[:, 1] = 1 - labels[:, 1] - - # Cutouts - # labels = cutout(img, labels, p=0.5) - # nl = len(labels) # update after cutout - - labels_out = torch.zeros((nl, 6)) - if nl: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img = np.ascontiguousarray(img) - - return torch.from_numpy(img), labels_out, self.im_files[index], shapes + data = self.get_image_and_label(index) + data['ratio_pad'] = (data['resized_shape'][0] / data['ori_shape'][0], + data['resized_shape'][1] / data['ori_shape'][1]) # for evaluation + if self.rect: + data['rect_shape'] = self.batch_shapes[self.batch[index]] + + data = self.transforms(data) + labels_out = torch.cat([data['batch_idx'].view(-1,1), + data['cls'].view(-1,1), + data['bboxes']], dim=1) + if data.get('ratio_pad') is None: + return data['img'], labels_out, data['im_file'], (data['ori_shape'], None) + return data['img'], labels_out, data['im_file'], (data['ori_shape'], data['ratio_pad']) + + + #def __getitem__(self, index): + # t0 = time.time() + # index = self.indices[index] # linear, shuffled, or image_weights + # + # hyp = self.hyp + # mosaic = self.mosaic and random.random() < hyp['mosaic'] + # if mosaic: + # # Load mosaic + # img, labels = self.load_mosaic(index) + # shapes = None +# + # # MixUp augmentation + # if random.random() < hyp['mixup']: + # img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) +# + # else: + # # Load image + # img, (h0, w0), (h, w) = self.load_image(index) + # + # # Letterbox + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + # img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + # shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling +# + # labels = self.labels[index].copy() + # if labels.size: # normalized xywh to pixel xyxy format + # labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) +# + # if self.augment: + # img, labels = random_perspective(img, + # labels, + # degrees=hyp['degrees'], + # translate=hyp['translate'], + # scale=hyp['scale'], + # shear=hyp['shear'], + # perspective=hyp['perspective']) + # + # #print(img.shape, labels) + # nl = len(labels) # number of labels + # if nl: + # labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) +# + # if self.augment: + # # Albumentations + # img, labels = self.albumentations(img, labels) + # nl = len(labels) # update after albumentations +# + # # HSV color-space + # augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) +# + # # Flip up-down + # if random.random() < hyp['flipud']: + # img = np.flipud(img) + # if nl: + # labels[:, 2] = 1 - labels[:, 2] +# + # # Flip left-right + # if random.random() < hyp['fliplr']: + # img = np.fliplr(img) + # if nl: + # labels[:, 1] = 1 - labels[:, 1] +# + # # Cutouts + # # labels = cutout(img, labels, p=0.5) + # # nl = len(labels) # update after cutout + # + # labels_out = torch.zeros((nl, 6)) + # if nl: + # labels_out[:, 1:] = torch.from_numpy(labels) +# + # # Convert + # img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + # img = np.ascontiguousarray(img) + # # print("loaded: ", (time.time()-t0)*1000) # about 800-900 + # #print(torch.from_numpy(img), labels_out) + # return torch.from_numpy(img), labels_out, self.im_files[index], shapes def load_image(self, i): # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) @@ -734,6 +788,12 @@ def load_image(self, i): if r != 1: # if sizes are not equal interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp) + if self.augment: + self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + self.buffer.append(i) + if len(self.buffer) >= self.max_buffer_length: + j = self.buffer.pop(0) + self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized @@ -775,6 +835,7 @@ def load_mosaic(self, index): # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() + #print(labels) if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] diff --git a/utils/general.py b/utils/general.py index efe78b29a..08d8b14ff 100644 --- a/utils/general.py +++ b/utils/general.py @@ -748,6 +748,65 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] +def xywh2ltwh(x): + """ + Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates. + + Args: + x (np.ndarray) or (torch.Tensor): The input tensor with the bounding box coordinates in the xywh format + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + return y + + +def xyxy2ltwh(x): + """ + Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right + + Args: + x (np.ndarray) or (torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def ltwh2xywh(x): + """ + Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center + + Args: + x (torch.Tensor): the input tensor + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x + y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y + return y + + +def ltwh2xyxy(x): + """ + It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + + Args: + x (np.ndarray) or (torch.Tensor): the input image + + Returns: + y (np.ndarray) or (torch.Tensor): the xyxy coordinates of the bounding boxes. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 2] = x[:, 2] + x[:, 0] # width + y[:, 3] = x[:, 3] + x[:, 1] # height + return y + + def xyxy2xywh(x): # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) @@ -898,7 +957,6 @@ def non_max_suppression( Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ - if isinstance(prediction, (list, tuple)): # YOLO model in validation model, output = (inference_out, loss_out) prediction = prediction[0] # select only inference output diff --git a/utils/instance.py b/utils/instance.py new file mode 100644 index 000000000..89d41ee67 --- /dev/null +++ b/utils/instance.py @@ -0,0 +1,390 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import abc +from itertools import repeat +from numbers import Number +from typing import List + +import numpy as np + +from .general import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh + + +def _ntuple(n): + """From PyTorch internals.""" + + def parse(x): + """Parse bounding boxes format between XYWH and LTWH.""" + return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n)) + + return parse + + +to_4tuple = _ntuple(4) + +# `xyxy` means left top and right bottom +# `xywh` means center x, center y and width, height(yolo format) +# `ltwh` means left top and width, height(coco format) +_formats = ['xyxy', 'xywh', 'ltwh'] + +__all__ = 'Bboxes', # tuple or list + + +class Bboxes: + """Now only numpy is supported.""" + + def __init__(self, bboxes, format='xyxy') -> None: + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes + assert bboxes.ndim == 2 + assert bboxes.shape[1] == 4 + self.bboxes = bboxes + self.format = format + # self.normalized = normalized + + # def convert(self, format): + # assert format in _formats + # if self.format == format: + # bboxes = self.bboxes + # elif self.format == "xyxy": + # if format == "xywh": + # bboxes = xyxy2xywh(self.bboxes) + # else: + # bboxes = xyxy2ltwh(self.bboxes) + # elif self.format == "xywh": + # if format == "xyxy": + # bboxes = xywh2xyxy(self.bboxes) + # else: + # bboxes = xywh2ltwh(self.bboxes) + # else: + # if format == "xyxy": + # bboxes = ltwh2xyxy(self.bboxes) + # else: + # bboxes = ltwh2xywh(self.bboxes) + # + # return Bboxes(bboxes, format) + + def convert(self, format): + """Converts bounding box format from one type to another.""" + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + if self.format == format: + return + elif self.format == 'xyxy': + bboxes = xyxy2xywh(self.bboxes) if format == 'xywh' else xyxy2ltwh(self.bboxes) + elif self.format == 'xywh': + bboxes = xywh2xyxy(self.bboxes) if format == 'xyxy' else xywh2ltwh(self.bboxes) + else: + bboxes = ltwh2xyxy(self.bboxes) if format == 'xyxy' else ltwh2xywh(self.bboxes) + self.bboxes = bboxes + self.format = format + + def areas(self): + """Return box areas.""" + self.convert('xyxy') + return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) + + # def denormalize(self, w, h): + # if not self.normalized: + # return + # assert (self.bboxes <= 1.0).all() + # self.bboxes[:, 0::2] *= w + # self.bboxes[:, 1::2] *= h + # self.normalized = False + # + # def normalize(self, w, h): + # if self.normalized: + # return + # assert (self.bboxes > 1.0).any() + # self.bboxes[:, 0::2] /= w + # self.bboxes[:, 1::2] /= h + # self.normalized = True + + def mul(self, scale): + """ + Args: + scale (tuple) or (list) or (int): the scale for four coords. + """ + if isinstance(scale, Number): + scale = to_4tuple(scale) + assert isinstance(scale, (tuple, list)) + assert len(scale) == 4 + self.bboxes[:, 0] *= scale[0] + self.bboxes[:, 1] *= scale[1] + self.bboxes[:, 2] *= scale[2] + self.bboxes[:, 3] *= scale[3] + + def add(self, offset): + """ + Args: + offset (tuple) or (list) or (int): the offset for four coords. + """ + if isinstance(offset, Number): + offset = to_4tuple(offset) + assert isinstance(offset, (tuple, list)) + assert len(offset) == 4 + self.bboxes[:, 0] += offset[0] + self.bboxes[:, 1] += offset[1] + self.bboxes[:, 2] += offset[2] + self.bboxes[:, 3] += offset[3] + + def __len__(self): + """Return the number of boxes.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes': + """ + Concatenate a list of Bboxes objects into a single Bboxes object. + + Args: + boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate. + axis (int, optional): The axis along which to concatenate the bounding boxes. + Defaults to 0. + + Returns: + Bboxes: A new Bboxes object containing the concatenated bounding boxes. + + Note: + The input should be a list or tuple of Bboxes objects. + """ + assert isinstance(boxes_list, (list, tuple)) + if not boxes_list: + return cls(np.empty(0)) + assert all(isinstance(box, Bboxes) for box in boxes_list) + + if len(boxes_list) == 1: + return boxes_list[0] + return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis)) + + def __getitem__(self, index) -> 'Bboxes': + """ + Retrieve a specific bounding box or a set of bounding boxes using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired bounding boxes. + + Returns: + Bboxes: A new Bboxes object containing the selected bounding boxes. + + Raises: + AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of bounding boxes. + """ + if isinstance(index, int): + return Bboxes(self.bboxes[index].view(1, -1)) + b = self.bboxes[index] + assert b.ndim == 2, f'Indexing on Bboxes with {index} failed to return a matrix!' + return Bboxes(b) + + +class Instances: + + def __init__(self, bboxes, segments=None, keypoints=None, bbox_format='xywh', normalized=True) -> None: + """ + Args: + bboxes (ndarray): bboxes with shape [N, 4]. + segments (list | ndarray): segments. + keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. + """ + if segments is None: + segments = [] + self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format) + self.keypoints = keypoints + self.normalized = normalized + + if len(segments) > 0: + # list[np.array(1000, 2)] * num_samples + segments = resample_segments(segments) + # (N, 1000, 2) + segments = np.stack(segments, axis=0) + else: + segments = np.zeros((0, 1000, 2), dtype=np.float32) + self.segments = segments + + def convert_bbox(self, format): + """Convert bounding box format.""" + self._bboxes.convert(format=format) + + def bbox_areas(self): + """Calculate the area of bounding boxes.""" + self._bboxes.areas() + + def scale(self, scale_w, scale_h, bbox_only=False): + """this might be similar with denormalize func but without normalized sign.""" + self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) + if bbox_only: + return + self.segments[..., 0] *= scale_w + self.segments[..., 1] *= scale_h + if self.keypoints is not None: + self.keypoints[..., 0] *= scale_w + self.keypoints[..., 1] *= scale_h + + def denormalize(self, w, h): + """Denormalizes boxes, segments, and keypoints from normalized coordinates.""" + if not self.normalized: + return + self._bboxes.mul(scale=(w, h, w, h)) + self.segments[..., 0] *= w + self.segments[..., 1] *= h + if self.keypoints is not None: + self.keypoints[..., 0] *= w + self.keypoints[..., 1] *= h + self.normalized = False + + def normalize(self, w, h): + """Normalize bounding boxes, segments, and keypoints to image dimensions.""" + if self.normalized: + return + self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h)) + self.segments[..., 0] /= w + self.segments[..., 1] /= h + if self.keypoints is not None: + self.keypoints[..., 0] /= w + self.keypoints[..., 1] /= h + self.normalized = True + + def add_padding(self, padw, padh): + """Handle rect and mosaic situation.""" + assert not self.normalized, 'you should add padding with absolute coordinates.' + self._bboxes.add(offset=(padw, padh, padw, padh)) + self.segments[..., 0] += padw + self.segments[..., 1] += padh + if self.keypoints is not None: + self.keypoints[..., 0] += padw + self.keypoints[..., 1] += padh + + def __getitem__(self, index) -> 'Instances': + """ + Retrieve a specific instance or a set of instances using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired instances. + + Returns: + Instances: A new Instances object containing the selected bounding boxes, + segments, and keypoints if present. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of instances. + """ + segments = self.segments[index] if len(self.segments) else self.segments + keypoints = self.keypoints[index] if self.keypoints is not None else None + bboxes = self.bboxes[index] + bbox_format = self._bboxes.format + return Instances( + bboxes=bboxes, + segments=segments, + keypoints=keypoints, + bbox_format=bbox_format, + normalized=self.normalized, + ) + + def flipud(self, h): + """Flips the coordinates of bounding boxes, segments, and keypoints vertically.""" + if self._bboxes.format == 'xyxy': + y1 = self.bboxes[:, 1].copy() + y2 = self.bboxes[:, 3].copy() + self.bboxes[:, 1] = h - y2 + self.bboxes[:, 3] = h - y1 + else: + self.bboxes[:, 1] = h - self.bboxes[:, 1] + self.segments[..., 1] = h - self.segments[..., 1] + if self.keypoints is not None: + self.keypoints[..., 1] = h - self.keypoints[..., 1] + + def fliplr(self, w): + """Reverses the order of the bounding boxes and segments horizontally.""" + if self._bboxes.format == 'xyxy': + x1 = self.bboxes[:, 0].copy() + x2 = self.bboxes[:, 2].copy() + self.bboxes[:, 0] = w - x2 + self.bboxes[:, 2] = w - x1 + else: + self.bboxes[:, 0] = w - self.bboxes[:, 0] + self.segments[..., 0] = w - self.segments[..., 0] + if self.keypoints is not None: + self.keypoints[..., 0] = w - self.keypoints[..., 0] + + def clip(self, w, h): + """Clips bounding boxes, segments, and keypoints values to stay within image boundaries.""" + ori_format = self._bboxes.format + self.convert_bbox(format='xyxy') + self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) + self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) + if ori_format != 'xyxy': + self.convert_bbox(format=ori_format) + self.segments[..., 0] = self.segments[..., 0].clip(0, w) + self.segments[..., 1] = self.segments[..., 1].clip(0, h) + if self.keypoints is not None: + self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w) + self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) + + def remove_zero_area_boxes(self): + """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them.""" + good = self._bboxes.areas() > 0 + if not all(good): + self._bboxes = Bboxes(self._bboxes.bboxes[good], format=self._bboxes.format) + if len(self.segments): + self.segments = self.segments[good] + if self.keypoints is not None: + self.keypoints = self.keypoints[good] + return good + + def update(self, bboxes, segments=None, keypoints=None): + """Updates instance variables.""" + self._bboxes = Bboxes(bboxes, format=self._bboxes.format) + if segments is not None: + self.segments = segments + if keypoints is not None: + self.keypoints = keypoints + + def __len__(self): + """Return the length of the instance list.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'Instances': + """ + Concatenates a list of Instances objects into a single Instances object. + + Args: + instances_list (List[Instances]): A list of Instances objects to concatenate. + axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0. + + Returns: + Instances: A new Instances object containing the concatenated bounding boxes, + segments, and keypoints if present. + + Note: + The `Instances` objects in the list should have the same properties, such as + the format of the bounding boxes, whether keypoints are present, and if the + coordinates are normalized. + """ + assert isinstance(instances_list, (list, tuple)) + if not instances_list: + return cls(np.empty(0)) + assert all(isinstance(instance, Instances) for instance in instances_list) + + if len(instances_list) == 1: + return instances_list[0] + + use_keypoint = instances_list[0].keypoints is not None + bbox_format = instances_list[0]._bboxes.format + normalized = instances_list[0].normalized + + cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) + cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) + cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None + return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) + + @property + def bboxes(self): + """Return bounding boxes.""" + return self._bboxes.bboxes diff --git a/utils/loggers/__pycache__/__init__.cpython-38.pyc b/utils/loggers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..0dca5260c Binary files /dev/null and b/utils/loggers/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/loggers/clearml/__pycache__/__init__.cpython-38.pyc b/utils/loggers/clearml/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..c0f909f84 Binary files /dev/null and b/utils/loggers/clearml/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/loggers/clearml/__pycache__/clearml_utils.cpython-38.pyc b/utils/loggers/clearml/__pycache__/clearml_utils.cpython-38.pyc new file mode 100644 index 000000000..31bf16791 Binary files /dev/null and b/utils/loggers/clearml/__pycache__/clearml_utils.cpython-38.pyc differ diff --git a/utils/loggers/comet/__pycache__/__init__.cpython-38.pyc b/utils/loggers/comet/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..ed587361e Binary files /dev/null and b/utils/loggers/comet/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/loggers/comet/__pycache__/comet_utils.cpython-38.pyc b/utils/loggers/comet/__pycache__/comet_utils.cpython-38.pyc new file mode 100644 index 000000000..4e71d8e38 Binary files /dev/null and b/utils/loggers/comet/__pycache__/comet_utils.cpython-38.pyc differ diff --git a/utils/loggers/wandb/__pycache__/__init__.cpython-38.pyc b/utils/loggers/wandb/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..e84ce7ce9 Binary files /dev/null and b/utils/loggers/wandb/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/loggers/wandb/__pycache__/wandb_utils.cpython-38.pyc b/utils/loggers/wandb/__pycache__/wandb_utils.cpython-38.pyc new file mode 100644 index 000000000..b039799b9 Binary files /dev/null and b/utils/loggers/wandb/__pycache__/wandb_utils.cpython-38.pyc differ diff --git a/utils/metrics.py b/utils/metrics.py index 1229f2b10..cc6d81141 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -11,7 +11,7 @@ def fitness(x): # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + w = [0.5, 0.5, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (x[:, :4] * w).sum(1) @@ -127,23 +127,22 @@ def __init__(self, nc, conf=0.25, iou_thres=0.45): self.iou_thres = iou_thres def process_batch(self, detections, labels): - """ - Return intersection-over-union (Jaccard index) of boxes. - Both sets of boxes are expected to be in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - None, updates confusion matrix accordingly - """ if detections is None: - gt_classes = labels.int() + if labels is None: + self.matrix[self.nc, self.nc] += 1 + return + gt_classes = labels[:,0].int() for gc in gt_classes: - self.matrix[self.nc, gc] += 1 # background FN + self.matrix[self.nc, gc] += 1 # class FN return - + detections = detections[detections[:, 4] > self.conf] gt_classes = labels[:, 0].int() + + if labels.sum()==0: + for dc in detections: + self.matrix[dc[5].int(), self.nc] += 1 # class FP + return detection_classes = detections[:, 5].int() iou = box_iou(labels[:, 1:], detections[:, :4]) @@ -159,18 +158,17 @@ def process_batch(self, detections, labels): matches = np.zeros((0, 3)) n = matches.shape[0] > 0 - m0, m1, _ = matches.transpose().astype(int) + m0, m1, _ = matches.transpose().astype(np.int16) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: self.matrix[detection_classes[m1[j]], gc] += 1 # correct else: - self.matrix[self.nc, gc] += 1 # true background - - if n: - for i, dc in enumerate(detection_classes): - if not any(m1 == i): - self.matrix[dc, self.nc] += 1 # predicted background + self.matrix[self.nc, gc] += 1 # class FN + + for i, dc in enumerate(detection_classes): + if not any (m1==i): + self.matrix[dc, self.nc] += 1 # class FP def matrix(self): return self.matrix @@ -185,8 +183,8 @@ def tp_fp(self): def plot(self, normalize=True, save_dir='', names=()): import seaborn as sn - array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns - array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + array = self.matrix.astype(int) # / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns + #array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) nc, nn = self.nc, len(names) # number of classes, names @@ -215,7 +213,7 @@ def plot(self, normalize=True, save_dir='', names=()): def print(self): for i in range(self.nc + 1): print(' '.join(map(str, self.matrix[i]))) - + class WIoU_Scale: ''' monotonous: { diff --git a/utils/segment/__pycache__/__init__.cpython-38.pyc b/utils/segment/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..3c5c7b3e2 Binary files /dev/null and b/utils/segment/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/segment/__pycache__/general.cpython-38.pyc b/utils/segment/__pycache__/general.cpython-38.pyc new file mode 100644 index 000000000..b35b42915 Binary files /dev/null and b/utils/segment/__pycache__/general.cpython-38.pyc differ diff --git a/utils/tal/__pycache__/__init__.cpython-38.pyc b/utils/tal/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 000000000..e97325ce8 Binary files /dev/null and b/utils/tal/__pycache__/__init__.cpython-38.pyc differ diff --git a/utils/tal/__pycache__/anchor_generator.cpython-38.pyc b/utils/tal/__pycache__/anchor_generator.cpython-38.pyc new file mode 100644 index 000000000..b942edf34 Binary files /dev/null and b/utils/tal/__pycache__/anchor_generator.cpython-38.pyc differ diff --git a/utils/tal/__pycache__/assigner.cpython-38.pyc b/utils/tal/__pycache__/assigner.cpython-38.pyc new file mode 100644 index 000000000..e27c0d806 Binary files /dev/null and b/utils/tal/__pycache__/assigner.cpython-38.pyc differ diff --git a/val_dual.py b/val_dual.py index 4f0af05bf..51203ba1a 100644 --- a/val_dual.py +++ b/val_dual.py @@ -164,7 +164,7 @@ def run( prefix=colorstr(f'{task}: '))[0] seen = 0 - confusion_matrix = ConfusionMatrix(nc=nc) + confusion_matrix = ConfusionMatrix(nc=nc, conf=conf_thres) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) @@ -220,9 +220,12 @@ def run( if npr == 0: if nl: + if plots: + confusion_matrix.process_batch(None, labels) stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0])) + else: if plots: - confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) + confusion_matrix.process_batch(detections=None, labels=None) continue # Predictions @@ -239,6 +242,9 @@ def run( correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) + else: + if plots: + confusion_matrix.process_batch(predn, torch.zeros(1,5)) stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log