CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hukaixuan19970627

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: hukaixuan19970627/yolov5_obb
Path: blob/master/models/common.py
Views: 475
1
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
"""
3
Common modules
4
"""
5
6
import json
7
import math
8
import platform
9
import warnings
10
from collections import OrderedDict, namedtuple
11
from copy import copy
12
from pathlib import Path
13
14
import cv2
15
import numpy as np
16
import pandas as pd
17
import requests
18
import torch
19
import torch.nn as nn
20
from PIL import Image
21
from torch.cuda import amp
22
23
from utils.datasets import exif_transpose, letterbox
24
from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,
25
make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
26
from utils.plots import Annotator, colors, save_one_box
27
from utils.torch_utils import copy_attr, time_sync
28
29
30
def autopad(k, p=None): # kernel, padding
31
# Pad to 'same'
32
if p is None:
33
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
34
return p
35
36
37
class Conv(nn.Module):
38
# Standard convolution
39
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
40
super().__init__()
41
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
42
self.bn = nn.BatchNorm2d(c2)
43
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
44
45
def forward(self, x):
46
return self.act(self.bn(self.conv(x)))
47
48
def forward_fuse(self, x):
49
return self.act(self.conv(x))
50
51
52
class DWConv(Conv):
53
# Depth-wise convolution class
54
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
55
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
56
57
58
class TransformerLayer(nn.Module):
59
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
60
def __init__(self, c, num_heads):
61
super().__init__()
62
self.q = nn.Linear(c, c, bias=False)
63
self.k = nn.Linear(c, c, bias=False)
64
self.v = nn.Linear(c, c, bias=False)
65
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
66
self.fc1 = nn.Linear(c, c, bias=False)
67
self.fc2 = nn.Linear(c, c, bias=False)
68
69
def forward(self, x):
70
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
71
x = self.fc2(self.fc1(x)) + x
72
return x
73
74
75
class TransformerBlock(nn.Module):
76
# Vision Transformer https://arxiv.org/abs/2010.11929
77
def __init__(self, c1, c2, num_heads, num_layers):
78
super().__init__()
79
self.conv = None
80
if c1 != c2:
81
self.conv = Conv(c1, c2)
82
self.linear = nn.Linear(c2, c2) # learnable position embedding
83
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
84
self.c2 = c2
85
86
def forward(self, x):
87
if self.conv is not None:
88
x = self.conv(x)
89
b, _, w, h = x.shape
90
p = x.flatten(2).permute(2, 0, 1)
91
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
92
93
94
class Bottleneck(nn.Module):
95
# Standard bottleneck
96
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
97
super().__init__()
98
c_ = int(c2 * e) # hidden channels
99
self.cv1 = Conv(c1, c_, 1, 1)
100
self.cv2 = Conv(c_, c2, 3, 1, g=g)
101
self.add = shortcut and c1 == c2
102
103
def forward(self, x):
104
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
105
106
107
class BottleneckCSP(nn.Module):
108
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
109
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
110
super().__init__()
111
c_ = int(c2 * e) # hidden channels
112
self.cv1 = Conv(c1, c_, 1, 1)
113
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
114
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
115
self.cv4 = Conv(2 * c_, c2, 1, 1)
116
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
117
self.act = nn.SiLU()
118
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
119
120
def forward(self, x):
121
y1 = self.cv3(self.m(self.cv1(x)))
122
y2 = self.cv2(x)
123
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
124
125
126
class C3(nn.Module):
127
# CSP Bottleneck with 3 convolutions
128
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
129
super().__init__()
130
c_ = int(c2 * e) # hidden channels
131
self.cv1 = Conv(c1, c_, 1, 1)
132
self.cv2 = Conv(c1, c_, 1, 1)
133
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
134
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
135
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
136
137
def forward(self, x):
138
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
139
140
141
class C3TR(C3):
142
# C3 module with TransformerBlock()
143
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
144
super().__init__(c1, c2, n, shortcut, g, e)
145
c_ = int(c2 * e)
146
self.m = TransformerBlock(c_, c_, 4, n)
147
148
149
class C3SPP(C3):
150
# C3 module with SPP()
151
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
152
super().__init__(c1, c2, n, shortcut, g, e)
153
c_ = int(c2 * e)
154
self.m = SPP(c_, c_, k)
155
156
157
class C3Ghost(C3):
158
# C3 module with GhostBottleneck()
159
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
160
super().__init__(c1, c2, n, shortcut, g, e)
161
c_ = int(c2 * e) # hidden channels
162
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
163
164
165
class SPP(nn.Module):
166
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
167
def __init__(self, c1, c2, k=(5, 9, 13)):
168
super().__init__()
169
c_ = c1 // 2 # hidden channels
170
self.cv1 = Conv(c1, c_, 1, 1)
171
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
172
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
173
174
def forward(self, x):
175
x = self.cv1(x)
176
with warnings.catch_warnings():
177
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
178
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
179
180
181
class SPPF(nn.Module):
182
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
183
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
184
super().__init__()
185
c_ = c1 // 2 # hidden channels
186
self.cv1 = Conv(c1, c_, 1, 1)
187
self.cv2 = Conv(c_ * 4, c2, 1, 1)
188
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
189
190
def forward(self, x):
191
x = self.cv1(x)
192
with warnings.catch_warnings():
193
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
194
y1 = self.m(x)
195
y2 = self.m(y1)
196
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
197
198
199
class Focus(nn.Module):
200
# Focus wh information into c-space
201
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
202
super().__init__()
203
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
204
# self.contract = Contract(gain=2)
205
206
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
207
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
208
# return self.conv(self.contract(x))
209
210
211
class GhostConv(nn.Module):
212
# Ghost Convolution https://github.com/huawei-noah/ghostnet
213
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
214
super().__init__()
215
c_ = c2 // 2 # hidden channels
216
self.cv1 = Conv(c1, c_, k, s, None, g, act)
217
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
218
219
def forward(self, x):
220
y = self.cv1(x)
221
return torch.cat([y, self.cv2(y)], 1)
222
223
224
class GhostBottleneck(nn.Module):
225
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
226
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
227
super().__init__()
228
c_ = c2 // 2
229
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
230
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
231
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
232
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
233
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
234
235
def forward(self, x):
236
return self.conv(x) + self.shortcut(x)
237
238
239
class Contract(nn.Module):
240
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
241
def __init__(self, gain=2):
242
super().__init__()
243
self.gain = gain
244
245
def forward(self, x):
246
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
247
s = self.gain
248
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
249
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
250
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
251
252
253
class Expand(nn.Module):
254
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
255
def __init__(self, gain=2):
256
super().__init__()
257
self.gain = gain
258
259
def forward(self, x):
260
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
261
s = self.gain
262
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
263
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
264
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
265
266
267
class Concat(nn.Module):
268
# Concatenate a list of tensors along dimension
269
def __init__(self, dimension=1):
270
super().__init__()
271
self.d = dimension
272
273
def forward(self, x):
274
return torch.cat(x, self.d)
275
276
277
class DetectMultiBackend(nn.Module):
278
# YOLOv5 MultiBackend class for python inference on various backends
279
def __init__(self, weights='yolov5s.pt', device=None, dnn=False):
280
# Usage:
281
# PyTorch: weights = *.pt
282
# TorchScript: *.torchscript
283
# CoreML: *.mlmodel
284
# TensorFlow: *_saved_model
285
# TensorFlow: *.pb
286
# TensorFlow Lite: *.tflite
287
# ONNX Runtime: *.onnx
288
# OpenCV DNN: *.onnx with dnn=True
289
# TensorRT: *.engine
290
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
291
292
super().__init__()
293
w = str(weights[0] if isinstance(weights, list) else weights)
294
suffix = Path(w).suffix.lower()
295
suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
296
check_suffix(w, suffixes) # check weights have acceptable suffix
297
pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
298
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
299
w = attempt_download(w) # download if not local
300
301
if jit: # TorchScript
302
LOGGER.info(f'Loading {w} for TorchScript inference...')
303
extra_files = {'config.txt': ''} # model metadata
304
model = torch.jit.load(w, _extra_files=extra_files)
305
if extra_files['config.txt']:
306
d = json.loads(extra_files['config.txt']) # extra_files dict
307
stride, names = int(d['stride']), d['names']
308
elif pt: # PyTorch
309
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
310
stride = int(model.stride.max()) # model stride
311
names = model.module.names if hasattr(model, 'module') else model.names # get class names
312
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
313
elif coreml: # CoreML
314
LOGGER.info(f'Loading {w} for CoreML inference...')
315
import coremltools as ct
316
model = ct.models.MLModel(w)
317
elif dnn: # ONNX OpenCV DNN
318
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
319
check_requirements(('opencv-python>=4.5.4',))
320
net = cv2.dnn.readNetFromONNX(w)
321
elif onnx: # ONNX Runtime
322
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
323
cuda = torch.cuda.is_available()
324
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
325
import onnxruntime
326
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
327
session = onnxruntime.InferenceSession(w, providers=providers)
328
elif engine: # TensorRT
329
LOGGER.info(f'Loading {w} for TensorRT inference...')
330
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
331
check_version(trt.__version__, '8.0.0', verbose=True) # version requirement
332
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
333
logger = trt.Logger(trt.Logger.INFO)
334
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
335
model = runtime.deserialize_cuda_engine(f.read())
336
bindings = OrderedDict()
337
for index in range(model.num_bindings):
338
name = model.get_binding_name(index)
339
dtype = trt.nptype(model.get_binding_dtype(index))
340
shape = tuple(model.get_binding_shape(index))
341
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
342
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
343
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
344
context = model.create_execution_context()
345
batch_size = bindings['images'].shape[0]
346
else: # TensorFlow model (TFLite, pb, saved_model)
347
if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
348
LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
349
import tensorflow as tf
350
351
def wrap_frozen_graph(gd, inputs, outputs):
352
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
353
return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
354
tf.nest.map_structure(x.graph.as_graph_element, outputs))
355
356
graph_def = tf.Graph().as_graph_def()
357
graph_def.ParseFromString(open(w, 'rb').read())
358
frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
359
elif saved_model:
360
LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
361
import tensorflow as tf
362
model = tf.keras.models.load_model(w)
363
elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
364
if 'edgetpu' in w.lower():
365
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
366
import tflite_runtime.interpreter as tfli
367
delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime
368
'Darwin': 'libedgetpu.1.dylib',
369
'Windows': 'edgetpu.dll'}[platform.system()]
370
interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
371
else:
372
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
373
import tensorflow as tf
374
interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model
375
interpreter.allocate_tensors() # allocate
376
input_details = interpreter.get_input_details() # inputs
377
output_details = interpreter.get_output_details() # outputs
378
self.__dict__.update(locals()) # assign all variables to self
379
380
def forward(self, im, augment=False, visualize=False, val=False):
381
# YOLOv5 MultiBackend inference
382
b, ch, h, w = im.shape # batch, channel, height, width
383
if self.pt or self.jit: # PyTorch
384
y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
385
return y if val else y[0]
386
elif self.coreml: # CoreML
387
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
388
im = Image.fromarray((im[0] * 255).astype('uint8'))
389
# im = im.resize((192, 320), Image.ANTIALIAS)
390
y = self.model.predict({'image': im}) # coordinates are xywh normalized
391
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
392
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
393
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
394
elif self.onnx: # ONNX
395
im = im.cpu().numpy() # torch to numpy
396
if self.dnn: # ONNX OpenCV DNN
397
self.net.setInput(im)
398
y = self.net.forward()
399
else: # ONNX Runtime
400
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
401
elif self.engine: # TensorRT
402
assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
403
self.binding_addrs['images'] = int(im.data_ptr())
404
self.context.execute_v2(list(self.binding_addrs.values()))
405
y = self.bindings['output'].data
406
else: # TensorFlow model (TFLite, pb, saved_model)
407
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
408
if self.pb:
409
y = self.frozen_func(x=self.tf.constant(im)).numpy()
410
elif self.saved_model:
411
y = self.model(im, training=False).numpy()
412
elif self.tflite:
413
input, output = self.input_details[0], self.output_details[0]
414
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
415
if int8:
416
scale, zero_point = input['quantization']
417
im = (im / scale + zero_point).astype(np.uint8) # de-scale
418
self.interpreter.set_tensor(input['index'], im)
419
self.interpreter.invoke()
420
y = self.interpreter.get_tensor(output['index'])
421
if int8:
422
scale, zero_point = output['quantization']
423
y = (y.astype(np.float32) - zero_point) * scale # re-scale
424
y[..., 0] *= w # x
425
y[..., 1] *= h # y
426
y[..., 2] *= w # w
427
y[..., 3] *= h # h
428
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
429
return (y, []) if val else y
430
431
def warmup(self, imgsz=(1, 3, 640, 640), half=False):
432
# Warmup model by running inference once
433
if self.pt or self.engine or self.onnx: # warmup types
434
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
435
im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
436
self.forward(im) # warmup
437
438
439
class AutoShape(nn.Module):
440
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
441
conf = 0.25 # NMS confidence threshold
442
iou = 0.45 # NMS IoU threshold
443
agnostic = False # NMS class-agnostic
444
multi_label = False # NMS multiple labels per box
445
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
446
max_det = 1000 # maximum number of detections per image
447
amp = False # Automatic Mixed Precision (AMP) inference
448
449
def __init__(self, model):
450
super().__init__()
451
LOGGER.info('Adding AutoShape... ')
452
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
453
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
454
self.pt = not self.dmb or model.pt # PyTorch model
455
self.model = model.eval()
456
457
def _apply(self, fn):
458
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
459
self = super()._apply(fn)
460
if self.pt:
461
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
462
m.stride = fn(m.stride)
463
m.grid = list(map(fn, m.grid))
464
if isinstance(m.anchor_grid, list):
465
m.anchor_grid = list(map(fn, m.anchor_grid))
466
return self
467
468
@torch.no_grad()
469
def forward(self, imgs, size=640, augment=False, profile=False):
470
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
471
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
472
# URI: = 'https://ultralytics.com/images/zidane.jpg'
473
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
474
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
475
# numpy: = np.zeros((640,1280,3)) # HWC
476
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
477
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
478
479
t = [time_sync()]
480
p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type
481
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
482
if isinstance(imgs, torch.Tensor): # torch
483
with amp.autocast(enabled=autocast):
484
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
485
486
# Pre-process
487
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
488
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
489
for i, im in enumerate(imgs):
490
f = f'image{i}' # filename
491
if isinstance(im, (str, Path)): # filename or uri
492
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
493
im = np.asarray(exif_transpose(im))
494
elif isinstance(im, Image.Image): # PIL Image
495
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
496
files.append(Path(f).with_suffix('.jpg').name)
497
if im.shape[0] < 5: # image in CHW
498
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
499
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
500
s = im.shape[:2] # HWC
501
shape0.append(s) # image shape
502
g = (size / max(s)) # gain
503
shape1.append([y * g for y in s])
504
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
505
shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape
506
x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad
507
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
508
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
509
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
510
t.append(time_sync())
511
512
with amp.autocast(enabled=autocast):
513
# Inference
514
y = self.model(x, augment, profile) # forward
515
t.append(time_sync())
516
517
# Post-process
518
y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,
519
agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS
520
for i in range(n):
521
scale_coords(shape1, y[i][:, :4], shape0[i])
522
523
t.append(time_sync())
524
return Detections(imgs, y, files, t, self.names, x.shape)
525
526
527
class Detections:
528
# YOLOv5 detections class for inference results
529
def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
530
super().__init__()
531
d = pred[0].device # device
532
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
533
self.imgs = imgs # list of images as numpy arrays
534
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
535
self.names = names # class names
536
self.files = files # image filenames
537
self.times = times # profiling times
538
self.xyxy = pred # xyxy pixels
539
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
540
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
541
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
542
self.n = len(self.pred) # number of images (batch size)
543
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
544
self.s = shape # inference BCHW shape
545
546
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
547
crops = []
548
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
549
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
550
if pred.shape[0]:
551
for c in pred[:, -1].unique():
552
n = (pred[:, -1] == c).sum() # detections per class
553
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
554
if show or save or render or crop:
555
annotator = Annotator(im, example=str(self.names))
556
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
557
label = f'{self.names[int(cls)]} {conf:.2f}'
558
if crop:
559
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
560
crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
561
'im': save_one_box(box, im, file=file, save=save)})
562
else: # all others
563
annotator.box_label(box, label, color=colors(cls))
564
im = annotator.im
565
else:
566
s += '(no detections)'
567
568
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
569
if pprint:
570
LOGGER.info(s.rstrip(', '))
571
if show:
572
im.show(self.files[i]) # show
573
if save:
574
f = self.files[i]
575
im.save(save_dir / f) # save
576
if i == self.n - 1:
577
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
578
if render:
579
self.imgs[i] = np.asarray(im)
580
if crop:
581
if save:
582
LOGGER.info(f'Saved results to {save_dir}\n')
583
return crops
584
585
def print(self):
586
self.display(pprint=True) # print results
587
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
588
self.t)
589
590
def show(self):
591
self.display(show=True) # show results
592
593
def save(self, save_dir='runs/detect/exp'):
594
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
595
self.display(save=True, save_dir=save_dir) # save results
596
597
def crop(self, save=True, save_dir='runs/detect/exp'):
598
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
599
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
600
601
def render(self):
602
self.display(render=True) # render results
603
return self.imgs
604
605
def pandas(self):
606
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
607
new = copy(self) # return copy
608
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
609
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
610
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
611
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
612
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
613
return new
614
615
def tolist(self):
616
# return a list of Detections objects, i.e. 'for result in results.tolist():'
617
r = range(self.n) # iterable
618
x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
619
# for d in x:
620
# for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
621
# setattr(d, k, getattr(d, k)[0]) # pop out of list
622
return x
623
624
def __len__(self):
625
return self.n
626
627
628
class Classify(nn.Module):
629
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
630
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
631
super().__init__()
632
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
633
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
634
self.flat = nn.Flatten()
635
636
def forward(self, x):
637
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
638
return self.flat(self.conv(z)) # flatten to x(b,c2)
639
640