Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/utils/datasets.py
Views: 475
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license1"""2Dataloaders and dataset utils3"""45import glob6import hashlib7import json8import os9import random10import shutil11import time12from itertools import repeat13from multiprocessing.pool import Pool, ThreadPool14from pathlib import Path15from threading import Thread16from zipfile import ZipFile1718import cv219import numpy as np20import torch21import torch.nn.functional as F22import yaml23from PIL import ExifTags, Image, ImageOps24from torch.utils.data import DataLoader, Dataset, dataloader, distributed25from tqdm import tqdm2627from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective28from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,29segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)30from utils.torch_utils import torch_distributed_zero_first31from utils.rboxs_utils import poly_filter, poly2rbox3233# Parameters34HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'35IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes36VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes37WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) # DPP3839# Get orientation exif tag40for orientation in ExifTags.TAGS.keys():41if ExifTags.TAGS[orientation] == 'Orientation':42break434445def get_hash(paths):46# Returns a single hash value of a list of paths (files or dirs)47size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes48h = hashlib.md5(str(size).encode()) # hash sizes49h.update(''.join(paths).encode()) # hash paths50return h.hexdigest() # return hash515253def exif_size(img):54# Returns exif-corrected PIL size55s = img.size # (width, height)56try:57rotation = dict(img._getexif().items())[orientation]58if rotation == 6: # rotation 27059s = (s[1], s[0])60elif rotation == 8: # rotation 9061s = (s[1], s[0])62except:63pass6465return s666768def exif_transpose(image):69"""70Transpose a PIL image accordingly if it has an EXIF Orientation tag.71Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()7273:param image: The image to transpose.74:return: An image.75"""76exif = image.getexif()77orientation = exif.get(0x0112, 1) # default 178if orientation > 1:79method = {2: Image.FLIP_LEFT_RIGHT,803: Image.ROTATE_180,814: Image.FLIP_TOP_BOTTOM,825: Image.TRANSPOSE,836: Image.ROTATE_270,847: Image.TRANSVERSE,858: Image.ROTATE_90,86}.get(orientation)87if method is not None:88image = image.transpose(method)89del exif[0x0112]90image.info["exif"] = exif.tobytes()91return image929394def create_dataloader(path, imgsz, batch_size, stride, names, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,95rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', shuffle=False):96if rect and shuffle:97LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')98shuffle = False99with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP100dataset = LoadImagesAndLabels(path, names, imgsz, batch_size,101augment=augment, # augmentation102hyp=hyp, # hyperparameters103rect=rect, # rectangular batches104cache_images=cache,105single_cls=single_cls,106stride=int(stride),107pad=pad,108image_weights=image_weights,109prefix=prefix)110111batch_size = min(batch_size, len(dataset))112nw = min([os.cpu_count() // WORLD_SIZE, batch_size if batch_size > 1 else 0, workers]) # number of workers113sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)114loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates115return loader(dataset,116batch_size=batch_size,117shuffle=shuffle and sampler is None,118num_workers=nw,119sampler=sampler,120pin_memory=True,121collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn), dataset122123124class InfiniteDataLoader(dataloader.DataLoader):125""" Dataloader that reuses workers126127Uses same syntax as vanilla DataLoader128"""129130def __init__(self, *args, **kwargs):131super().__init__(*args, **kwargs)132object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))133self.iterator = super().__iter__()134135def __len__(self):136return len(self.batch_sampler.sampler)137138def __iter__(self):139for i in range(len(self)):140yield next(self.iterator)141142143class _RepeatSampler:144""" Sampler that repeats forever145146Args:147sampler (Sampler)148"""149150def __init__(self, sampler):151self.sampler = sampler152153def __iter__(self):154while True:155yield from iter(self.sampler)156157158class LoadImages:159# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`160def __init__(self, path, img_size=640, stride=32, auto=True):161p = str(Path(path).resolve()) # os-agnostic absolute path162if '*' in p:163files = sorted(glob.glob(p, recursive=True)) # glob164elif os.path.isdir(p):165files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir166elif os.path.isfile(p):167files = [p] # files168else:169raise Exception(f'ERROR: {p} does not exist')170171images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]172videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]173ni, nv = len(images), len(videos)174175self.img_size = img_size176self.stride = stride177self.files = images + videos178self.nf = ni + nv # number of files179self.video_flag = [False] * ni + [True] * nv180self.mode = 'image'181self.auto = auto182if any(videos):183self.new_video(videos[0]) # new video184else:185self.cap = None186assert self.nf > 0, f'No images or videos found in {p}. ' \187f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'188189def __iter__(self):190self.count = 0191return self192193def __next__(self):194if self.count == self.nf:195raise StopIteration196path = self.files[self.count]197198if self.video_flag[self.count]:199# Read video200self.mode = 'video'201ret_val, img0 = self.cap.read()202while not ret_val:203self.count += 1204self.cap.release()205if self.count == self.nf: # last video206raise StopIteration207else:208path = self.files[self.count]209self.new_video(path)210ret_val, img0 = self.cap.read()211212self.frame += 1213s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '214215else:216# Read image217self.count += 1218img0 = cv2.imread(path) # BGR219assert img0 is not None, f'Image Not Found {path}'220s = f'image {self.count}/{self.nf} {path}: '221222# Padded resize223img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]224225# Convert226img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB227img = np.ascontiguousarray(img)228229return path, img, img0, self.cap, s230231def new_video(self, path):232self.frame = 0233self.cap = cv2.VideoCapture(path)234self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))235236def __len__(self):237return self.nf # number of files238239240class LoadWebcam: # for inference241# YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`242def __init__(self, pipe='0', img_size=640, stride=32):243self.img_size = img_size244self.stride = stride245self.pipe = eval(pipe) if pipe.isnumeric() else pipe246self.cap = cv2.VideoCapture(self.pipe) # video capture object247self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size248249def __iter__(self):250self.count = -1251return self252253def __next__(self):254self.count += 1255if cv2.waitKey(1) == ord('q'): # q to quit256self.cap.release()257cv2.destroyAllWindows()258raise StopIteration259260# Read frame261ret_val, img0 = self.cap.read()262img0 = cv2.flip(img0, 1) # flip left-right263264265assert ret_val, f'Camera Error {self.pipe}'266img_path = 'webcam.jpg'267s = f'webcam {self.count}: '268269# Padded resize270img = letterbox(img0, self.img_size, stride=self.stride)[0]271272# Convert273img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB274img = np.ascontiguousarray(img)275276return img_path, img, img0, None, s277278def __len__(self):279return 0280281282class LoadStreams:283# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`284def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):285self.mode = 'stream'286self.img_size = img_size287self.stride = stride288289if os.path.isfile(sources):290with open(sources) as f:291sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]292else:293sources = [sources]294295n = len(sources)296self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n297self.sources = [clean_str(x) for x in sources] # clean source names for later298self.auto = auto299for i, s in enumerate(sources): # index, source300# Start thread to read frames from video stream301st = f'{i + 1}/{n}: {s}... '302if 'youtube.com/' in s or 'youtu.be/' in s: # if source is YouTube video303check_requirements(('pafy', 'youtube_dl'))304import pafy305s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL306s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam307cap = cv2.VideoCapture(s)308assert cap.isOpened(), f'{st}Failed to open {s}'309w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))310h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))311self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback312self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback313314_, self.imgs[i] = cap.read() # guarantee first frame315self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)316LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")317self.threads[i].start()318LOGGER.info('') # newline319320# check for common shapes321s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])322self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal323if not self.rect:324LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')325326def update(self, i, cap, stream):327# Read stream `i` frames in daemon thread328n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame329while cap.isOpened() and n < f:330n += 1331# _, self.imgs[index] = cap.read()332cap.grab()333if n % read == 0:334success, im = cap.retrieve()335if success:336self.imgs[i] = im337else:338LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')339self.imgs[i] = np.zeros_like(self.imgs[i])340cap.open(stream) # re-open stream if signal was lost341time.sleep(1 / self.fps[i]) # wait time342343def __iter__(self):344self.count = -1345return self346347def __next__(self):348self.count += 1349if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit350cv2.destroyAllWindows()351raise StopIteration352353# Letterbox354img0 = self.imgs.copy()355img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]356357# Stack358img = np.stack(img, 0)359360# Convert361img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW362img = np.ascontiguousarray(img)363364return self.sources, img, img0, None, ''365366def __len__(self):367return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years368369370def img2label_paths(img_paths):371# Define label paths as a function of image paths372sa, sb = os.sep + 'images' + os.sep, os.sep + 'labelTxt' + os.sep # /images/, /labels/ substrings373return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]374375376class LoadImagesAndLabels(Dataset):377# YOLOv5 train_loader/val_loader, loads images and labels for training and validation378cache_version = 0.6 # dataset labels *.cache version379380def __init__(self, path, cls_names, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,381cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):382"""383Returns:384Dataset.labels (list): n_imgs * array(num_gt_perimg, [cls_id, poly])385Dataset.shapes (array): (n_imgs, [ori_img_width, ori_img_height])386387Dataset.batch_shapes (array): (n_batches, [h_rect, w_rect])388"""389self.img_size = img_size390self.augment = augment391self.hyp = hyp392self.image_weights = image_weights393self.rect = False if image_weights else rect394self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)395self.mosaic_border = [-img_size // 2, -img_size // 2]396self.stride = stride397self.path = path398self.albumentations = Albumentations() if augment else None399self.cls_names = cls_names400401try:402f = [] # image files403for p in path if isinstance(path, list) else [path]:404p = Path(p) # os-agnostic405if p.is_dir(): # dir406f += glob.glob(str(p / '**' / '*.*'), recursive=True)407# f = list(p.rglob('*.*')) # pathlib408elif p.is_file(): # file409with open(p) as t:410t = t.read().strip().splitlines()411parent = str(p.parent) + os.sep412f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path413# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)414else:415raise Exception(f'{prefix}{p} does not exist')416self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)417# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib418assert self.img_files, f'{prefix}No images found'419except Exception as e:420raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')421422# Check cache423self.label_files = img2label_paths(self.img_files) # labels424cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')425try:426cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict427assert cache['version'] == self.cache_version # same version428assert cache['hash'] == get_hash(self.label_files + self.img_files) # same hash429except:430cache, exists = self.cache_labels(cache_path, prefix), False # cache431432# Display cache433nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total434if exists:435d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"436tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results437if cache['msgs']:438LOGGER.info('\n'.join(cache['msgs'])) # display warnings439assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'440441# Read cache442[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items443labels, shapes, self.segments = zip(*cache.values())444self.labels = list(labels) # labels(list[array]): n_imgs * array(num_gt_perimg, [cls_id, poly])445self.shapes = np.array(shapes, dtype=np.float64) # img_ori shape446self.img_files = list(cache.keys()) # update447self.label_files = img2label_paths(cache.keys()) # update448n = len(shapes) # number of images449bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index450nb = bi[-1] + 1 # number of batches451self.batch = bi # batch index of image452self.n = n453self.indices = range(n)454455# Update labels456include_class = [] # filter labels to include only these classes (optional)457include_class_array = np.array(include_class).reshape(1, -1)458for i, (label, segment) in enumerate(zip(self.labels, self.segments)):459if include_class:460j = (label[:, 0:1] == include_class_array).any(1)461self.labels[i] = label[j]462if segment:463self.segments[i] = segment[j]464if single_cls: # single-class training, merge all classes into 0465self.labels[i][:, 0] = 0466if segment:467self.segments[i][:, 0] = 0468469# Rectangular Training470if self.rect:471# Sort by aspect ratio472s = self.shapes # wh473ar = s[:, 1] / s[:, 0] # aspect ratio474irect = ar.argsort()475self.img_files = [self.img_files[i] for i in irect]476self.label_files = [self.label_files[i] for i in irect]477self.labels = [self.labels[i] for i in irect]478self.shapes = s[irect] # wh479ar = ar[irect]480481# Set training image shapes482shapes = [[1, 1]] * nb483for i in range(nb):484ari = ar[bi == i]485mini, maxi = ari.min(), ari.max()486if maxi < 1: # batch图像高宽比均小于1时, shape=[h/w, 1] = [h_ratio, w_ratio]487shapes[i] = [maxi, 1]488elif mini > 1: # batch图像高宽比均大于1时, shape=[1, w/h] = [h_ratio, w_ratio]489shapes[i] = [1, 1 / mini]490491self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # (nb, [h_rect, w_rect])492493# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)494self.imgs, self.img_npy = [None] * n, [None] * n495if cache_images:496if cache_images == 'disk':497self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')498self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]499self.im_cache_dir.mkdir(parents=True, exist_ok=True)500gb = 0 # Gigabytes of cached images501self.img_hw0, self.img_hw = [None] * n, [None] * n502results = ThreadPool(NUM_THREADS).imap(lambda x: load_image_label(*x), zip(repeat(self), range(n)))503pbar = tqdm(enumerate(results), total=n)504for i, x in pbar:505if cache_images == 'disk':506if not self.img_npy[i].exists():507np.save(self.img_npy[i].as_posix(), x[0])508gb += self.img_npy[i].stat().st_size509else:510self.imgs[i], self.img_hw0[i], self.img_hw[i], self.labels[i] = x # im, hw_orig, hw_resized, label_resized = load_image_label(self, i)511gb += self.imgs[i].nbytes512pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'513pbar.close()514515def cache_labels(self, path=Path('./labels.cache'), prefix=''):516# Cache dataset labels, check images and read shapes517x = {} # dict518nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages519desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."520with Pool(NUM_THREADS) as pool:521pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix), repeat(self.cls_names))),522desc=desc, total=len(self.img_files))523for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:524nm += nm_f525nf += nf_f526ne += ne_f527nc += nc_f528if im_file:529x[im_file] = [l, shape, segments]530if msg:531msgs.append(msg)532pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"533534pbar.close()535if msgs:536LOGGER.info('\n'.join(msgs))537if nf == 0:538LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')539x['hash'] = get_hash(self.label_files + self.img_files)540x['results'] = nf, nm, ne, nc, len(self.img_files)541x['msgs'] = msgs # warnings542x['version'] = self.cache_version # cache version543try:544np.save(path, x) # save cache for next time545path.with_suffix('.cache.npy').rename(path) # remove .npy suffix546LOGGER.info(f'{prefix}New cache created: {path}')547except Exception as e:548LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable549return x550551def __len__(self):552return len(self.img_files)553554# def __iter__(self):555# self.count = -1556# print('ran dataset iter')557# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)558# return self559560def __getitem__(self, index):561'''562Augment the [clsid poly] labels and trans label format to rbox.563Returns:564img (tensor): (3, height, width), RGB565labels_out (tensor): (n, [None clsid cx cy l s theta gaussian_θ_labels]) θ∈[-pi/2, pi/2)566img_file (str): img_dir567shapes : None or [(h_raw, w_raw), (hw_ratios, wh_paddings)], for COCO mAP rescaling568'''569index = self.indices[index] # linear, shuffled, or image_weights570571hyp = self.hyp572mosaic = self.mosaic and random.random() < hyp['mosaic']573if mosaic:574# Load mosaic575img, labels = load_mosaic(self, index)576shapes = None577578# MixUp augmentation579if random.random() < hyp['mixup']:580img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))581582else:583# Load image and label584img, (h0, w0), (h, w), img_label = load_image_label(self, index)585586# Letterbox587shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape [h_rect, w_rect]588img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) # ratio[w_ratio, h_ratio], pad[w_padding, h_padding]589shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling [(h_raw, w_raw), (hw_ratios, wh_paddings)]590591labels = img_label.copy() # labels (array): (num_gt_perimg, [cls_id, poly])592if labels.size:593# labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])594labels[:, [1, 3, 5, 7]] = img_label[:, [1, 3, 5, 7]] * ratio[0] + pad[0]595labels[:, [2, 4, 6, 8]] = img_label[:, [2, 4, 6, 8]] * ratio[1] + pad[1]596597if self.augment:598img, labels = random_perspective(img, labels,599degrees=hyp['degrees'],600translate=hyp['translate'],601scale=hyp['scale'],602shear=hyp['shear'],603perspective=hyp['perspective'])604605nl = len(labels) # number of labels606# if nl:607# labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)608609610if self.augment:611# Albumentations612# img, labels = self.albumentations(img, labels)613# nl = len(labels) # update after albumentations614615# HSV color-space616augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])617618img_h, img_w = img.shape[0], img.shape[1]619# Flip up-down620if random.random() < hyp['flipud']:621img = np.flipud(img)622if nl:623# labels[:, 2] = 1 - labels[:, 2]624labels[:, 2::2] = img_h - labels[:, 2::2] - 1625626# Flip left-right627if random.random() < hyp['fliplr']:628img = np.fliplr(img)629if nl:630# labels[:, 1] = 1 - labels[:, 1]631labels[:, 1::2] = img_w - labels[:, 1::2] - 1632633# Cutouts634# labels = cutout(img, labels, p=0.5)635# nl = len(labels) # update after cutout636if nl:637# *[clsid poly] to *[clsid cx cy l s theta gaussian_θ_labels] θ∈[-pi/2, pi/2) non-normalized638rboxes, csl_labels = poly2rbox(polys=labels[:, 1:],639num_cls_thata=hyp['cls_theta'] if hyp else 180,640radius=hyp['csl_radius'] if hyp else 6.0,641use_pi=True, use_gaussian=True)642labels_obb = np.concatenate((labels[:, :1], rboxes, csl_labels), axis=1)643labels_mask = (rboxes[:, 0] >= 0) & (rboxes[:, 0] < img.shape[1]) \644& (rboxes[:, 1] >= 0) & (rboxes[:, 0] < img.shape[0]) \645& (rboxes[:, 2] > 5) | (rboxes[:, 3] > 5)646labels_obb = labels_obb[labels_mask]647nl = len(labels_obb) # update after filter648649if hyp:650c_num = 7 + hyp['cls_theta'] # [index_of_batch clsid cx cy l s theta gaussian_θ_labels]651else:652c_num = 187653654# labels_out = torch.zeros((nl, 6))655labels_out = torch.zeros((nl, c_num))656if nl:657# labels_out[:, 1:] = torch.from_numpy(labels)658labels_out[:, 1:] = torch.from_numpy(labels_obb)659660# Convert661img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB662img = np.ascontiguousarray(img)663664return torch.from_numpy(img), labels_out, self.img_files[index], shapes665666@staticmethod667def collate_fn(batch):668img, label, path, shapes = zip(*batch) # transposed; (tupe(b*tensor))669for i, l in enumerate(label):670l[:, 0] = i # add target image index for build_targets()671return torch.stack(img, 0), torch.cat(label, 0), path, shapes672673@staticmethod674def collate_fn4(batch):675img, label, path, shapes = zip(*batch) # transposed676n = len(shapes) // 4677img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]678679ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])680wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])681s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale682for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW683i *= 4684if random.random() < 0.5:685im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[6860].type(img[i].type())687l = label[i]688else:689im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)690l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s691img4.append(im)692label4.append(l)693694for i, l in enumerate(label4):695l[:, 0] = i # add target image index for build_targets()696697return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4698699700# Ancillary functions --------------------------------------------------------------------------------------------------701def load_image_label(self, i):702# loads 1 image from dataset index 'i', returns im, original hw, resized hw703im = self.imgs[i]704label = self.labels[i].copy() # labels (array): (num_gt_perimg, [cls_id, poly])705if im is None: # not cached in ram706npy = self.img_npy[i]707if npy and npy.exists(): # load npy708im = np.load(npy)709else: # read image710path = self.img_files[i]711im = cv2.imread(path) # BGR712assert im is not None, f'Image Not Found {path}'713h0, w0 = im.shape[:2] # orig hw714r = self.img_size / max(h0, w0) # ratio715if r != 1: # if sizes are not equal716im = cv2.resize(im, (int(w0 * r), int(h0 * r)),717interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)718label[:, 1:] *= r719return im, (h0, w0), im.shape[:2], label # im, hw_original, hw_resized, resized_label720else:721return self.imgs[i], self.img_hw0[i], self.img_hw[i], self.labels[i] # im, hw_original, hw_resized, resized_label722723724def load_mosaic(self, index):725# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic726labels4, segments4 = [], []727s = self.img_size728yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y729indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices730random.shuffle(indices)731for i, index in enumerate(indices):732# Load image733img, _, (h, w), img_label = load_image_label(self, index)734735# place img in img4736if i == 0: # top left737img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles738x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)739x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)740elif i == 1: # top right741x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc742x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h743elif i == 2: # bottom left744x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)745x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)746elif i == 3: # bottom right747x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)748x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)749750img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]751padw = x1a - x1b752padh = y1a - y1b753754# Labels755labels, segments = img_label.copy(), self.segments[index].copy() # labels (array): (num_gt_perimg, [cls_id, poly])756if labels.size:757# labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format758labels[:, [1, 3, 5, 7]] = img_label[:, [1, 3, 5, 7]] + padw759labels[:, [2, 4, 6, 8]] = img_label[:, [2, 4, 6, 8]] + padh760segments = [xyn2xy(x, w, h, padw, padh) for x in segments]761labels4.append(labels)762segments4.extend(segments)763764# Concat/clip labels765labels4 = np.concatenate(labels4, 0)766# for x in (labels4[:, 1:], *segments4):767for x in (segments4):768np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()769h_filter = 2 * s770w_filter = 2 * s771labels_mask = poly_filter(polys=labels4[:, 1:].copy(), h=h_filter, w=w_filter)772labels4 = labels4[labels_mask]773# img4, labels4 = replicate(img4, labels4) # replicate774775# Augment776img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])777img4, labels4 = random_perspective(img4, labels4, segments4,778degrees=self.hyp['degrees'],779translate=self.hyp['translate'],780scale=self.hyp['scale'],781shear=self.hyp['shear'],782perspective=self.hyp['perspective'],783border=self.mosaic_border) # border to remove784785return img4, labels4786787788def load_mosaic9(self, index):789# YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic790labels9, segments9 = [], []791s = self.img_size792indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices793random.shuffle(indices)794for i, index in enumerate(indices):795# Load image796img, _, (h, w), img_label = load_image_label(self, index)797798# place img in img9799if i == 0: # center800img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles801h0, w0 = h, w802c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates803elif i == 1: # top804c = s, s - h, s + w, s805elif i == 2: # top right806c = s + wp, s - h, s + wp + w, s807elif i == 3: # right808c = s + w0, s, s + w0 + w, s + h809elif i == 4: # bottom right810c = s + w0, s + hp, s + w0 + w, s + hp + h811elif i == 5: # bottom812c = s + w0 - w, s + h0, s + w0, s + h0 + h813elif i == 6: # bottom left814c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h815elif i == 7: # left816c = s - w, s + h0 - h, s, s + h0817elif i == 8: # top left818c = s - w, s + h0 - hp - h, s, s + h0 - hp819820padx, pady = c[:2]821x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords822823# Labels824labels, segments = img_label.copy(), self.segments[index].copy() # labels (array): (num_gt_perimg, [cls_id, poly])825if labels.size:826# labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format827segments = [xyn2xy(x, w, h, padx, pady) for x in segments]828labels_ = labels.clone() if isinstance(labels, torch.Tensor) else np.copy(labels)829labels_[:, [1, 3, 5, 7]] = labels[:, [1, 3, 5, 7]] + padx830labels_[:, [2, 4, 6, 8]] = labels[:, [2, 4, 6, 8]] + pady831labels = labels_832833labels9.append(labels)834segments9.extend(segments)835836# Image837img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]838hp, wp = h, w # height, width previous839840# Offset841yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y842img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]843844# Concat/clip labels845labels9 = np.concatenate(labels9, 0)846# labels9[:, [1, 3]] -= xc847# labels9[:, [2, 4]] -= yc848labels9[:, [1, 3, 5, 7]] -= xc849labels9[:, [2, 4, 6, 8]] -= yc850851c = np.array([xc, yc]) # centers852segments9 = [x - c for x in segments9]853854# for x in (labels9[:, 1:], *segments9):855for x in (segments9):856np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()857h_filter = 2 * s858w_filter = 2 * s859labels_mask = poly_filter(polys=labels9[:, 1:].copy(), h=h_filter, w=w_filter)860labels9 = labels9[labels_mask]861# img9, labels9 = replicate(img9, labels9) # replicate862863# Augment864img9, labels9 = random_perspective(img9, labels9, segments9,865degrees=self.hyp['degrees'],866translate=self.hyp['translate'],867scale=self.hyp['scale'],868shear=self.hyp['shear'],869perspective=self.hyp['perspective'],870border=self.mosaic_border) # border to remove871872return img9, labels9873874875def create_folder(path='./new'):876# Create folder877if os.path.exists(path):878shutil.rmtree(path) # delete output folder879os.makedirs(path) # make new output folder880881882def flatten_recursive(path='../datasets/coco128'):883# Flatten a recursive directory by bringing all files to top level884new_path = Path(path + '_flat')885create_folder(new_path)886for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):887shutil.copyfile(file, new_path / Path(file).name)888889890def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()891# Convert detection dataset into classification dataset, with one directory per class892path = Path(path) # images dir893shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing894files = list(path.rglob('*.*'))895n = len(files) # number of files896for im_file in tqdm(files, total=n):897if im_file.suffix[1:] in IMG_FORMATS:898# image899im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB900h, w = im.shape[:2]901902# labels903lb_file = Path(img2label_paths([str(im_file)])[0])904if Path(lb_file).exists():905with open(lb_file) as f:906lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels907908for j, x in enumerate(lb):909c = int(x[0]) # class910f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename911if not f.parent.is_dir():912f.parent.mkdir(parents=True)913914b = x[1:] * [w, h, w, h] # box915# b[2:] = b[2:].max() # rectangle to square916b[2:] = b[2:] * 1.2 + 3 # pad917b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)918919b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image920b[[1, 3]] = np.clip(b[[1, 3]], 0, h)921assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'922923924def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):925""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files926Usage: from utils.datasets import *; autosplit()927Arguments928path: Path to images directory929weights: Train, val, test weights (list, tuple)930annotated_only: Only use images with an annotated txt file931"""932path = Path(path) # images dir933files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only934n = len(files) # number of files935random.seed(0) # for reproducibility936indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split937938txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files939[(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing940941print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)942for i, img in tqdm(zip(indices, files), total=n):943if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label944with open(path.parent / txt[i], 'a') as f:945f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file946947948def verify_image_label(args):949# Verify one image-label pair950im_file, lb_file, prefix, cls_name_list = args951nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments952try:953# verify images954im = Image.open(im_file)955im.verify() # PIL verify956shape = exif_size(im) # image size957assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'958assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'959if im.format.lower() in ('jpg', 'jpeg'):960with open(im_file, 'rb') as f:961f.seek(-2, 2)962if f.read() != b'\xff\xd9': # corrupt JPEG963ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)964msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'965966# verify labels967if os.path.isfile(lb_file):968nf = 1 # label found969with open(lb_file) as f:970labels = [x.split() for x in f.read().strip().splitlines() if len(x)]971972# Yolov5-obb does not support segment labels yet973# if any([len(x) > 8 for x in l]): # is segment974# classes = np.array([x[0] for x in l], dtype=np.float32)975# segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)976# l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)977l_ = []978for label in labels:979if label[-1] == "2": # diffcult980continue981cls_id = cls_name_list.index(label[8])982l_.append(np.concatenate((cls_id, label[:8]), axis=None))983l = np.array(l_, dtype=np.float32)984nl = len(l)985if nl:986assert len(label) == 10, f'Yolov5-OBB labels require 10 columns, which same as DOTA Dataset, {len(label)} columns detected'987assert (l >= 0).all(), f'negative label values {l[l < 0]}, please check your dota format labels'988#assert (l[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}'989_, i = np.unique(l, axis=0, return_index=True)990if len(i) < nl: # duplicate row check991l = l[i] # remove duplicates992if segments:993segments = segments[i]994msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'995else:996ne = 1 # label empty997# l = np.zeros((0, 5), dtype=np.float32)998l = np.zeros((0, 9), dtype=np.float32)999else:1000nm = 1 # label missing1001# l = np.zeros((0, 5), dtype=np.float32)1002l = np.zeros((0, 9), dtype=np.float32)1003return im_file, l, shape, segments, nm, nf, ne, nc, msg1004except Exception as e:1005nc = 11006msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'1007return [None, None, None, None, nm, nf, ne, nc, msg]100810091010def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):1011""" Return dataset statistics dictionary with images and instances counts per split per class1012To run in parent directory: export PYTHONPATH="$PWD/yolov5"1013Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)1014Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')1015Arguments1016path: Path to data.yaml or data.zip (with data.yaml inside data.zip)1017autodownload: Attempt to download dataset if not found locally1018verbose: Print stats dictionary1019"""10201021def round_labels(labels):1022# Update labels to integer class and 6 decimal place floats1023return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]10241025def unzip(path):1026# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'1027if str(path).endswith('.zip'): # path is data.zip1028assert Path(path).is_file(), f'Error unzipping {path}, file not found'1029ZipFile(path).extractall(path=path.parent) # unzip1030dir = path.with_suffix('') # dataset directory == zip name1031return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path1032else: # path is data.yaml1033return False, None, path10341035def hub_ops(f, max_dim=1920):1036# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing1037f_new = im_dir / Path(f).name # dataset-hub image filename1038try: # use PIL1039im = Image.open(f)1040r = max_dim / max(im.height, im.width) # ratio1041if r < 1.0: # image too large1042im = im.resize((int(im.width * r), int(im.height * r)))1043im.save(f_new, 'JPEG', quality=75, optimize=True) # save1044except Exception as e: # use OpenCV1045print(f'WARNING: HUB ops PIL failure {f}: {e}')1046im = cv2.imread(f)1047im_height, im_width = im.shape[:2]1048r = max_dim / max(im_height, im_width) # ratio1049if r < 1.0: # image too large1050im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)1051cv2.imwrite(str(f_new), im)10521053zipped, data_dir, yaml_path = unzip(Path(path))1054with open(check_yaml(yaml_path), errors='ignore') as f:1055data = yaml.safe_load(f) # data dict1056if zipped:1057data['path'] = data_dir # TODO: should this be dir.resolve()?1058check_dataset(data, autodownload) # download dataset if missing1059hub_dir = Path(data['path'] + ('-hub' if hub else ''))1060stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary1061for split in 'train', 'val', 'test':1062if data.get(split) is None:1063stats[split] = None # i.e. no test set1064continue1065x = []1066dataset = LoadImagesAndLabels(data[split]) # load dataset1067for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):1068x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))1069x = np.array(x) # shape(128x80)1070stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},1071'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),1072'per_class': (x > 0).sum(0).tolist()},1073'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in1074zip(dataset.img_files, dataset.labels)]}10751076if hub:1077im_dir = hub_dir / 'images'1078im_dir.mkdir(parents=True, exist_ok=True)1079for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):1080pass10811082# Profile1083stats_path = hub_dir / 'stats.json'1084if profile:1085for _ in range(1):1086file = stats_path.with_suffix('.npy')1087t1 = time.time()1088np.save(file, stats)1089t2 = time.time()1090x = np.load(file, allow_pickle=True)1091print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')10921093file = stats_path.with_suffix('.json')1094t1 = time.time()1095with open(file, 'w') as f:1096json.dump(stats, f) # save stats *.json1097t2 = time.time()1098with open(file) as f:1099x = json.load(f) # load hyps dict1100print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')11011102# Save, print and return1103if hub:1104print(f'Saving {stats_path.resolve()}...')1105with open(stats_path, 'w') as f:1106json.dump(stats, f) # save stats.json1107if verbose:1108print(json.dumps(stats, indent=2, sort_keys=False))1109return stats111011111112