Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/models/common.py
Views: 475
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license1"""2Common modules3"""45import json6import math7import platform8import warnings9from collections import OrderedDict, namedtuple10from copy import copy11from pathlib import Path1213import cv214import numpy as np15import pandas as pd16import requests17import torch18import torch.nn as nn19from PIL import Image20from torch.cuda import amp2122from utils.datasets import exif_transpose, letterbox23from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,24make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)25from utils.plots import Annotator, colors, save_one_box26from utils.torch_utils import copy_attr, time_sync272829def autopad(k, p=None): # kernel, padding30# Pad to 'same'31if p is None:32p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad33return p343536class Conv(nn.Module):37# Standard convolution38def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups39super().__init__()40self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)41self.bn = nn.BatchNorm2d(c2)42self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())4344def forward(self, x):45return self.act(self.bn(self.conv(x)))4647def forward_fuse(self, x):48return self.act(self.conv(x))495051class DWConv(Conv):52# Depth-wise convolution class53def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups54super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)555657class TransformerLayer(nn.Module):58# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)59def __init__(self, c, num_heads):60super().__init__()61self.q = nn.Linear(c, c, bias=False)62self.k = nn.Linear(c, c, bias=False)63self.v = nn.Linear(c, c, bias=False)64self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)65self.fc1 = nn.Linear(c, c, bias=False)66self.fc2 = nn.Linear(c, c, bias=False)6768def forward(self, x):69x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x70x = self.fc2(self.fc1(x)) + x71return x727374class TransformerBlock(nn.Module):75# Vision Transformer https://arxiv.org/abs/2010.1192976def __init__(self, c1, c2, num_heads, num_layers):77super().__init__()78self.conv = None79if c1 != c2:80self.conv = Conv(c1, c2)81self.linear = nn.Linear(c2, c2) # learnable position embedding82self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))83self.c2 = c28485def forward(self, x):86if self.conv is not None:87x = self.conv(x)88b, _, w, h = x.shape89p = x.flatten(2).permute(2, 0, 1)90return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)919293class Bottleneck(nn.Module):94# Standard bottleneck95def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion96super().__init__()97c_ = int(c2 * e) # hidden channels98self.cv1 = Conv(c1, c_, 1, 1)99self.cv2 = Conv(c_, c2, 3, 1, g=g)100self.add = shortcut and c1 == c2101102def forward(self, x):103return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))104105106class BottleneckCSP(nn.Module):107# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks108def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion109super().__init__()110c_ = int(c2 * e) # hidden channels111self.cv1 = Conv(c1, c_, 1, 1)112self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)113self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)114self.cv4 = Conv(2 * c_, c2, 1, 1)115self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)116self.act = nn.SiLU()117self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))118119def forward(self, x):120y1 = self.cv3(self.m(self.cv1(x)))121y2 = self.cv2(x)122return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))123124125class C3(nn.Module):126# CSP Bottleneck with 3 convolutions127def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion128super().__init__()129c_ = int(c2 * e) # hidden channels130self.cv1 = Conv(c1, c_, 1, 1)131self.cv2 = Conv(c1, c_, 1, 1)132self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)133self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))134# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])135136def forward(self, x):137return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))138139140class C3TR(C3):141# C3 module with TransformerBlock()142def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):143super().__init__(c1, c2, n, shortcut, g, e)144c_ = int(c2 * e)145self.m = TransformerBlock(c_, c_, 4, n)146147148class C3SPP(C3):149# C3 module with SPP()150def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):151super().__init__(c1, c2, n, shortcut, g, e)152c_ = int(c2 * e)153self.m = SPP(c_, c_, k)154155156class C3Ghost(C3):157# C3 module with GhostBottleneck()158def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):159super().__init__(c1, c2, n, shortcut, g, e)160c_ = int(c2 * e) # hidden channels161self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))162163164class SPP(nn.Module):165# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729166def __init__(self, c1, c2, k=(5, 9, 13)):167super().__init__()168c_ = c1 // 2 # hidden channels169self.cv1 = Conv(c1, c_, 1, 1)170self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)171self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])172173def forward(self, x):174x = self.cv1(x)175with warnings.catch_warnings():176warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning177return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))178179180class SPPF(nn.Module):181# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher182def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))183super().__init__()184c_ = c1 // 2 # hidden channels185self.cv1 = Conv(c1, c_, 1, 1)186self.cv2 = Conv(c_ * 4, c2, 1, 1)187self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)188189def forward(self, x):190x = self.cv1(x)191with warnings.catch_warnings():192warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning193y1 = self.m(x)194y2 = self.m(y1)195return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))196197198class Focus(nn.Module):199# Focus wh information into c-space200def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups201super().__init__()202self.conv = Conv(c1 * 4, c2, k, s, p, g, act)203# self.contract = Contract(gain=2)204205def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)206return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))207# return self.conv(self.contract(x))208209210class GhostConv(nn.Module):211# Ghost Convolution https://github.com/huawei-noah/ghostnet212def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups213super().__init__()214c_ = c2 // 2 # hidden channels215self.cv1 = Conv(c1, c_, k, s, None, g, act)216self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)217218def forward(self, x):219y = self.cv1(x)220return torch.cat([y, self.cv2(y)], 1)221222223class GhostBottleneck(nn.Module):224# Ghost Bottleneck https://github.com/huawei-noah/ghostnet225def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride226super().__init__()227c_ = c2 // 2228self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw229DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw230GhostConv(c_, c2, 1, 1, act=False)) # pw-linear231self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),232Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()233234def forward(self, x):235return self.conv(x) + self.shortcut(x)236237238class Contract(nn.Module):239# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)240def __init__(self, gain=2):241super().__init__()242self.gain = gain243244def forward(self, x):245b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'246s = self.gain247x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)248x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)249return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)250251252class Expand(nn.Module):253# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)254def __init__(self, gain=2):255super().__init__()256self.gain = gain257258def forward(self, x):259b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'260s = self.gain261x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)262x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)263return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)264265266class Concat(nn.Module):267# Concatenate a list of tensors along dimension268def __init__(self, dimension=1):269super().__init__()270self.d = dimension271272def forward(self, x):273return torch.cat(x, self.d)274275276class DetectMultiBackend(nn.Module):277# YOLOv5 MultiBackend class for python inference on various backends278def __init__(self, weights='yolov5s.pt', device=None, dnn=False):279# Usage:280# PyTorch: weights = *.pt281# TorchScript: *.torchscript282# CoreML: *.mlmodel283# TensorFlow: *_saved_model284# TensorFlow: *.pb285# TensorFlow Lite: *.tflite286# ONNX Runtime: *.onnx287# OpenCV DNN: *.onnx with dnn=True288# TensorRT: *.engine289from models.experimental import attempt_download, attempt_load # scoped to avoid circular import290291super().__init__()292w = str(weights[0] if isinstance(weights, list) else weights)293suffix = Path(w).suffix.lower()294suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']295check_suffix(w, suffixes) # check weights have acceptable suffix296pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans297stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults298w = attempt_download(w) # download if not local299300if jit: # TorchScript301LOGGER.info(f'Loading {w} for TorchScript inference...')302extra_files = {'config.txt': ''} # model metadata303model = torch.jit.load(w, _extra_files=extra_files)304if extra_files['config.txt']:305d = json.loads(extra_files['config.txt']) # extra_files dict306stride, names = int(d['stride']), d['names']307elif pt: # PyTorch308model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)309stride = int(model.stride.max()) # model stride310names = model.module.names if hasattr(model, 'module') else model.names # get class names311self.model = model # explicitly assign for to(), cpu(), cuda(), half()312elif coreml: # CoreML313LOGGER.info(f'Loading {w} for CoreML inference...')314import coremltools as ct315model = ct.models.MLModel(w)316elif dnn: # ONNX OpenCV DNN317LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')318check_requirements(('opencv-python>=4.5.4',))319net = cv2.dnn.readNetFromONNX(w)320elif onnx: # ONNX Runtime321LOGGER.info(f'Loading {w} for ONNX Runtime inference...')322cuda = torch.cuda.is_available()323check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))324import onnxruntime325providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']326session = onnxruntime.InferenceSession(w, providers=providers)327elif engine: # TensorRT328LOGGER.info(f'Loading {w} for TensorRT inference...')329import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download330check_version(trt.__version__, '8.0.0', verbose=True) # version requirement331Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))332logger = trt.Logger(trt.Logger.INFO)333with open(w, 'rb') as f, trt.Runtime(logger) as runtime:334model = runtime.deserialize_cuda_engine(f.read())335bindings = OrderedDict()336for index in range(model.num_bindings):337name = model.get_binding_name(index)338dtype = trt.nptype(model.get_binding_dtype(index))339shape = tuple(model.get_binding_shape(index))340data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)341bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))342binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())343context = model.create_execution_context()344batch_size = bindings['images'].shape[0]345else: # TensorFlow model (TFLite, pb, saved_model)346if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt347LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')348import tensorflow as tf349350def wrap_frozen_graph(gd, inputs, outputs):351x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped352return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),353tf.nest.map_structure(x.graph.as_graph_element, outputs))354355graph_def = tf.Graph().as_graph_def()356graph_def.ParseFromString(open(w, 'rb').read())357frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")358elif saved_model:359LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')360import tensorflow as tf361model = tf.keras.models.load_model(w)362elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python363if 'edgetpu' in w.lower():364LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')365import tflite_runtime.interpreter as tfli366delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime367'Darwin': 'libedgetpu.1.dylib',368'Windows': 'edgetpu.dll'}[platform.system()]369interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])370else:371LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')372import tensorflow as tf373interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model374interpreter.allocate_tensors() # allocate375input_details = interpreter.get_input_details() # inputs376output_details = interpreter.get_output_details() # outputs377self.__dict__.update(locals()) # assign all variables to self378379def forward(self, im, augment=False, visualize=False, val=False):380# YOLOv5 MultiBackend inference381b, ch, h, w = im.shape # batch, channel, height, width382if self.pt or self.jit: # PyTorch383y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)384return y if val else y[0]385elif self.coreml: # CoreML386im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)387im = Image.fromarray((im[0] * 255).astype('uint8'))388# im = im.resize((192, 320), Image.ANTIALIAS)389y = self.model.predict({'image': im}) # coordinates are xywh normalized390box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels391conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)392y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)393elif self.onnx: # ONNX394im = im.cpu().numpy() # torch to numpy395if self.dnn: # ONNX OpenCV DNN396self.net.setInput(im)397y = self.net.forward()398else: # ONNX Runtime399y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]400elif self.engine: # TensorRT401assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)402self.binding_addrs['images'] = int(im.data_ptr())403self.context.execute_v2(list(self.binding_addrs.values()))404y = self.bindings['output'].data405else: # TensorFlow model (TFLite, pb, saved_model)406im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)407if self.pb:408y = self.frozen_func(x=self.tf.constant(im)).numpy()409elif self.saved_model:410y = self.model(im, training=False).numpy()411elif self.tflite:412input, output = self.input_details[0], self.output_details[0]413int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model414if int8:415scale, zero_point = input['quantization']416im = (im / scale + zero_point).astype(np.uint8) # de-scale417self.interpreter.set_tensor(input['index'], im)418self.interpreter.invoke()419y = self.interpreter.get_tensor(output['index'])420if int8:421scale, zero_point = output['quantization']422y = (y.astype(np.float32) - zero_point) * scale # re-scale423y[..., 0] *= w # x424y[..., 1] *= h # y425y[..., 2] *= w # w426y[..., 3] *= h # h427y = torch.tensor(y) if isinstance(y, np.ndarray) else y428return (y, []) if val else y429430def warmup(self, imgsz=(1, 3, 640, 640), half=False):431# Warmup model by running inference once432if self.pt or self.engine or self.onnx: # warmup types433if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models434im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image435self.forward(im) # warmup436437438class AutoShape(nn.Module):439# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS440conf = 0.25 # NMS confidence threshold441iou = 0.45 # NMS IoU threshold442agnostic = False # NMS class-agnostic443multi_label = False # NMS multiple labels per box444classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs445max_det = 1000 # maximum number of detections per image446amp = False # Automatic Mixed Precision (AMP) inference447448def __init__(self, model):449super().__init__()450LOGGER.info('Adding AutoShape... ')451copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes452self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance453self.pt = not self.dmb or model.pt # PyTorch model454self.model = model.eval()455456def _apply(self, fn):457# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers458self = super()._apply(fn)459if self.pt:460m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()461m.stride = fn(m.stride)462m.grid = list(map(fn, m.grid))463if isinstance(m.anchor_grid, list):464m.anchor_grid = list(map(fn, m.anchor_grid))465return self466467@torch.no_grad()468def forward(self, imgs, size=640, augment=False, profile=False):469# Inference from various sources. For height=640, width=1280, RGB images example inputs are:470# file: imgs = 'data/images/zidane.jpg' # str or PosixPath471# URI: = 'https://ultralytics.com/images/zidane.jpg'472# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)473# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)474# numpy: = np.zeros((640,1280,3)) # HWC475# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)476# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images477478t = [time_sync()]479p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type480autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference481if isinstance(imgs, torch.Tensor): # torch482with amp.autocast(enabled=autocast):483return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference484485# Pre-process486n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images487shape0, shape1, files = [], [], [] # image and inference shapes, filenames488for i, im in enumerate(imgs):489f = f'image{i}' # filename490if isinstance(im, (str, Path)): # filename or uri491im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im492im = np.asarray(exif_transpose(im))493elif isinstance(im, Image.Image): # PIL Image494im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f495files.append(Path(f).with_suffix('.jpg').name)496if im.shape[0] < 5: # image in CHW497im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)498im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input499s = im.shape[:2] # HWC500shape0.append(s) # image shape501g = (size / max(s)) # gain502shape1.append([y * g for y in s])503imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update504shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape505x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad506x = np.stack(x, 0) if n > 1 else x[0][None] # stack507x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW508x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32509t.append(time_sync())510511with amp.autocast(enabled=autocast):512# Inference513y = self.model(x, augment, profile) # forward514t.append(time_sync())515516# Post-process517y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,518agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS519for i in range(n):520scale_coords(shape1, y[i][:, :4], shape0[i])521522t.append(time_sync())523return Detections(imgs, y, files, t, self.names, x.shape)524525526class Detections:527# YOLOv5 detections class for inference results528def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):529super().__init__()530d = pred[0].device # device531gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations532self.imgs = imgs # list of images as numpy arrays533self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)534self.names = names # class names535self.files = files # image filenames536self.times = times # profiling times537self.xyxy = pred # xyxy pixels538self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels539self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized540self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized541self.n = len(self.pred) # number of images (batch size)542self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)543self.s = shape # inference BCHW shape544545def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):546crops = []547for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):548s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string549if pred.shape[0]:550for c in pred[:, -1].unique():551n = (pred[:, -1] == c).sum() # detections per class552s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string553if show or save or render or crop:554annotator = Annotator(im, example=str(self.names))555for *box, conf, cls in reversed(pred): # xyxy, confidence, class556label = f'{self.names[int(cls)]} {conf:.2f}'557if crop:558file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None559crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,560'im': save_one_box(box, im, file=file, save=save)})561else: # all others562annotator.box_label(box, label, color=colors(cls))563im = annotator.im564else:565s += '(no detections)'566567im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np568if pprint:569LOGGER.info(s.rstrip(', '))570if show:571im.show(self.files[i]) # show572if save:573f = self.files[i]574im.save(save_dir / f) # save575if i == self.n - 1:576LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")577if render:578self.imgs[i] = np.asarray(im)579if crop:580if save:581LOGGER.info(f'Saved results to {save_dir}\n')582return crops583584def print(self):585self.display(pprint=True) # print results586LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %587self.t)588589def show(self):590self.display(show=True) # show results591592def save(self, save_dir='runs/detect/exp'):593save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir594self.display(save=True, save_dir=save_dir) # save results595596def crop(self, save=True, save_dir='runs/detect/exp'):597save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None598return self.display(crop=True, save=save, save_dir=save_dir) # crop results599600def render(self):601self.display(render=True) # render results602return self.imgs603604def pandas(self):605# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])606new = copy(self) # return copy607ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns608cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns609for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):610a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update611setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])612return new613614def tolist(self):615# return a list of Detections objects, i.e. 'for result in results.tolist():'616r = range(self.n) # iterable617x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]618# for d in x:619# for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:620# setattr(d, k, getattr(d, k)[0]) # pop out of list621return x622623def __len__(self):624return self.n625626627class Classify(nn.Module):628# Classification head, i.e. x(b,c1,20,20) to x(b,c2)629def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups630super().__init__()631self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)632self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)633self.flat = nn.Flatten()634635def forward(self, x):636z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list637return self.flat(self.conv(z)) # flatten to x(b,c2)638639640