CoCalc -- common.py

hukaixuan19970627
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: hukaixuan19970627/yolov5_obb
Path: blob/master/models/common.py
Views: ⁴⁷⁵
1
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
"""
3
Common modules
4
"""
5

6
import json
7
import math
8
import platform
9
import warnings
10
from collections import OrderedDict, namedtuple
11
from copy import copy
12
from pathlib import Path
13

14
import cv2
15
import numpy as np
16
import pandas as pd
17
import requests
18
import torch
19
import torch.nn as nn
20
from PIL import Image
21
from torch.cuda import amp
22

23
from utils.datasets import exif_transpose, letterbox
24
from utils.general import (LOGGER, check_requirements, check_suffix, check_version, colorstr, increment_path,
25
                           make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
26
from utils.plots import Annotator, colors, save_one_box
27
from utils.torch_utils import copy_attr, time_sync
28

29

30
def autopad(k, p=None):  # kernel, padding
31
    # Pad to 'same'
32
    if p is None:
33
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
34
    return p
35

36

37
class Conv(nn.Module):
38
    # Standard convolution
39
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
40
        super().__init__()
41
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
42
        self.bn = nn.BatchNorm2d(c2)
43
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
44

45
    def forward(self, x):
46
        return self.act(self.bn(self.conv(x)))
47

48
    def forward_fuse(self, x):
49
        return self.act(self.conv(x))
50

51

52
class DWConv(Conv):
53
    # Depth-wise convolution class
54
    def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
55
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
56

57

58
class TransformerLayer(nn.Module):
59
    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
60
    def __init__(self, c, num_heads):
61
        super().__init__()
62
        self.q = nn.Linear(c, c, bias=False)
63
        self.k = nn.Linear(c, c, bias=False)
64
        self.v = nn.Linear(c, c, bias=False)
65
        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
66
        self.fc1 = nn.Linear(c, c, bias=False)
67
        self.fc2 = nn.Linear(c, c, bias=False)
68

69
    def forward(self, x):
70
        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
71
        x = self.fc2(self.fc1(x)) + x
72
        return x
73

74

75
class TransformerBlock(nn.Module):
76
    # Vision Transformer https://arxiv.org/abs/2010.11929
77
    def __init__(self, c1, c2, num_heads, num_layers):
78
        super().__init__()
79
        self.conv = None
80
        if c1 != c2:
81
            self.conv = Conv(c1, c2)
82
        self.linear = nn.Linear(c2, c2)  # learnable position embedding
83
        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
84
        self.c2 = c2
85

86
    def forward(self, x):
87
        if self.conv is not None:
88
            x = self.conv(x)
89
        b, _, w, h = x.shape
90
        p = x.flatten(2).permute(2, 0, 1)
91
        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
92

93

94
class Bottleneck(nn.Module):
95
    # Standard bottleneck
96
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
97
        super().__init__()
98
        c_ = int(c2 * e)  # hidden channels
99
        self.cv1 = Conv(c1, c_, 1, 1)
100
        self.cv2 = Conv(c_, c2, 3, 1, g=g)
101
        self.add = shortcut and c1 == c2
102

103
    def forward(self, x):
104
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
105

106

107
class BottleneckCSP(nn.Module):
108
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
109
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
110
        super().__init__()
111
        c_ = int(c2 * e)  # hidden channels
112
        self.cv1 = Conv(c1, c_, 1, 1)
113
        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
114
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
115
        self.cv4 = Conv(2 * c_, c2, 1, 1)
116
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
117
        self.act = nn.SiLU()
118
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
119

120
    def forward(self, x):
121
        y1 = self.cv3(self.m(self.cv1(x)))
122
        y2 = self.cv2(x)
123
        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
124

125

126
class C3(nn.Module):
127
    # CSP Bottleneck with 3 convolutions
128
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
129
        super().__init__()
130
        c_ = int(c2 * e)  # hidden channels
131
        self.cv1 = Conv(c1, c_, 1, 1)
132
        self.cv2 = Conv(c1, c_, 1, 1)
133
        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
134
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
135
        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
136

137
    def forward(self, x):
138
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
139

140

141
class C3TR(C3):
142
    # C3 module with TransformerBlock()
143
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
144
        super().__init__(c1, c2, n, shortcut, g, e)
145
        c_ = int(c2 * e)
146
        self.m = TransformerBlock(c_, c_, 4, n)
147

148

149
class C3SPP(C3):
150
    # C3 module with SPP()
151
    def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
152
        super().__init__(c1, c2, n, shortcut, g, e)
153
        c_ = int(c2 * e)
154
        self.m = SPP(c_, c_, k)
155

156

157
class C3Ghost(C3):
158
    # C3 module with GhostBottleneck()
159
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
160
        super().__init__(c1, c2, n, shortcut, g, e)
161
        c_ = int(c2 * e)  # hidden channels
162
        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
163

164

165
class SPP(nn.Module):
166
    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
167
    def __init__(self, c1, c2, k=(5, 9, 13)):
168
        super().__init__()
169
        c_ = c1 // 2  # hidden channels
170
        self.cv1 = Conv(c1, c_, 1, 1)
171
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
172
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
173

174
    def forward(self, x):
175
        x = self.cv1(x)
176
        with warnings.catch_warnings():
177
            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
178
            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
179

180

181
class SPPF(nn.Module):
182
    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
183
    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
184
        super().__init__()
185
        c_ = c1 // 2  # hidden channels
186
        self.cv1 = Conv(c1, c_, 1, 1)
187
        self.cv2 = Conv(c_ * 4, c2, 1, 1)
188
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
189

190
    def forward(self, x):
191
        x = self.cv1(x)
192
        with warnings.catch_warnings():
193
            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
194
            y1 = self.m(x)
195
            y2 = self.m(y1)
196
            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
197

198

199
class Focus(nn.Module):
200
    # Focus wh information into c-space
201
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
202
        super().__init__()
203
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
204
        # self.contract = Contract(gain=2)
205

206
    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
207
        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
208
        # return self.conv(self.contract(x))
209

210

211
class GhostConv(nn.Module):
212
    # Ghost Convolution https://github.com/huawei-noah/ghostnet
213
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
214
        super().__init__()
215
        c_ = c2 // 2  # hidden channels
216
        self.cv1 = Conv(c1, c_, k, s, None, g, act)
217
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
218

219
    def forward(self, x):
220
        y = self.cv1(x)
221
        return torch.cat([y, self.cv2(y)], 1)
222

223

224
class GhostBottleneck(nn.Module):
225
    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
226
    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
227
        super().__init__()
228
        c_ = c2 // 2
229
        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
230
                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
231
                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
232
        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
233
                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
234

235
    def forward(self, x):
236
        return self.conv(x) + self.shortcut(x)
237

238

239
class Contract(nn.Module):
240
    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
241
    def __init__(self, gain=2):
242
        super().__init__()
243
        self.gain = gain
244

245
    def forward(self, x):
246
        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
247
        s = self.gain
248
        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
249
        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
250
        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
251

252

253
class Expand(nn.Module):
254
    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
255
    def __init__(self, gain=2):
256
        super().__init__()
257
        self.gain = gain
258

259
    def forward(self, x):
260
        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
261
        s = self.gain
262
        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
263
        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
264
        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
265

266

267
class Concat(nn.Module):
268
    # Concatenate a list of tensors along dimension
269
    def __init__(self, dimension=1):
270
        super().__init__()
271
        self.d = dimension
272

273
    def forward(self, x):
274
        return torch.cat(x, self.d)
275

276

277
class DetectMultiBackend(nn.Module):
278
    # YOLOv5 MultiBackend class for python inference on various backends
279
    def __init__(self, weights='yolov5s.pt', device=None, dnn=False):
280
        # Usage:
281
        #   PyTorch:      weights = *.pt
282
        #   TorchScript:            *.torchscript
283
        #   CoreML:                 *.mlmodel
284
        #   TensorFlow:             *_saved_model
285
        #   TensorFlow:             *.pb
286
        #   TensorFlow Lite:        *.tflite
287
        #   ONNX Runtime:           *.onnx
288
        #   OpenCV DNN:             *.onnx with dnn=True
289
        #   TensorRT:               *.engine
290
        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
291

292
        super().__init__()
293
        w = str(weights[0] if isinstance(weights, list) else weights)
294
        suffix = Path(w).suffix.lower()
295
        suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
296
        check_suffix(w, suffixes)  # check weights have acceptable suffix
297
        pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes)  # backend booleans
298
        stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
299
        w = attempt_download(w)  # download if not local
300

301
        if jit:  # TorchScript
302
            LOGGER.info(f'Loading {w} for TorchScript inference...')
303
            extra_files = {'config.txt': ''}  # model metadata
304
            model = torch.jit.load(w, _extra_files=extra_files)
305
            if extra_files['config.txt']:
306
                d = json.loads(extra_files['config.txt'])  # extra_files dict
307
                stride, names = int(d['stride']), d['names']
308
        elif pt:  # PyTorch
309
            model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
310
            stride = int(model.stride.max())  # model stride
311
            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
312
            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
313
        elif coreml:  # CoreML
314
            LOGGER.info(f'Loading {w} for CoreML inference...')
315
            import coremltools as ct
316
            model = ct.models.MLModel(w)
317
        elif dnn:  # ONNX OpenCV DNN
318
            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
319
            check_requirements(('opencv-python>=4.5.4',))
320
            net = cv2.dnn.readNetFromONNX(w)
321
        elif onnx:  # ONNX Runtime
322
            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
323
            cuda = torch.cuda.is_available()
324
            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
325
            import onnxruntime
326
            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
327
            session = onnxruntime.InferenceSession(w, providers=providers)
328
        elif engine:  # TensorRT
329
            LOGGER.info(f'Loading {w} for TensorRT inference...')
330
            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
331
            check_version(trt.__version__, '8.0.0', verbose=True)  # version requirement
332
            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
333
            logger = trt.Logger(trt.Logger.INFO)
334
            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
335
                model = runtime.deserialize_cuda_engine(f.read())
336
            bindings = OrderedDict()
337
            for index in range(model.num_bindings):
338
                name = model.get_binding_name(index)
339
                dtype = trt.nptype(model.get_binding_dtype(index))
340
                shape = tuple(model.get_binding_shape(index))
341
                data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
342
                bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
343
            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
344
            context = model.create_execution_context()
345
            batch_size = bindings['images'].shape[0]
346
        else:  # TensorFlow model (TFLite, pb, saved_model)
347
            if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
348
                LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
349
                import tensorflow as tf
350

351
                def wrap_frozen_graph(gd, inputs, outputs):
352
                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
353
                    return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
354
                                   tf.nest.map_structure(x.graph.as_graph_element, outputs))
355

356
                graph_def = tf.Graph().as_graph_def()
357
                graph_def.ParseFromString(open(w, 'rb').read())
358
                frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
359
            elif saved_model:
360
                LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
361
                import tensorflow as tf
362
                model = tf.keras.models.load_model(w)
363
            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
364
                if 'edgetpu' in w.lower():
365
                    LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
366
                    import tflite_runtime.interpreter as tfli
367
                    delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
368
                                'Darwin': 'libedgetpu.1.dylib',
369
                                'Windows': 'edgetpu.dll'}[platform.system()]
370
                    interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
371
                else:
372
                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
373
                    import tensorflow as tf
374
                    interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
375
                interpreter.allocate_tensors()  # allocate
376
                input_details = interpreter.get_input_details()  # inputs
377
                output_details = interpreter.get_output_details()  # outputs
378
        self.__dict__.update(locals())  # assign all variables to self
379

380
    def forward(self, im, augment=False, visualize=False, val=False):
381
        # YOLOv5 MultiBackend inference
382
        b, ch, h, w = im.shape  # batch, channel, height, width
383
        if self.pt or self.jit:  # PyTorch
384
            y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
385
            return y if val else y[0]
386
        elif self.coreml:  # CoreML
387
            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
388
            im = Image.fromarray((im[0] * 255).astype('uint8'))
389
            # im = im.resize((192, 320), Image.ANTIALIAS)
390
            y = self.model.predict({'image': im})  # coordinates are xywh normalized
391
            box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
392
            conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
393
            y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
394
        elif self.onnx:  # ONNX
395
            im = im.cpu().numpy()  # torch to numpy
396
            if self.dnn:  # ONNX OpenCV DNN
397
                self.net.setInput(im)
398
                y = self.net.forward()
399
            else:  # ONNX Runtime
400
                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
401
        elif self.engine:  # TensorRT
402
            assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
403
            self.binding_addrs['images'] = int(im.data_ptr())
404
            self.context.execute_v2(list(self.binding_addrs.values()))
405
            y = self.bindings['output'].data
406
        else:  # TensorFlow model (TFLite, pb, saved_model)
407
            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
408
            if self.pb:
409
                y = self.frozen_func(x=self.tf.constant(im)).numpy()
410
            elif self.saved_model:
411
                y = self.model(im, training=False).numpy()
412
            elif self.tflite:
413
                input, output = self.input_details[0], self.output_details[0]
414
                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
415
                if int8:
416
                    scale, zero_point = input['quantization']
417
                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
418
                self.interpreter.set_tensor(input['index'], im)
419
                self.interpreter.invoke()
420
                y = self.interpreter.get_tensor(output['index'])
421
                if int8:
422
                    scale, zero_point = output['quantization']
423
                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
424
            y[..., 0] *= w  # x
425
            y[..., 1] *= h  # y
426
            y[..., 2] *= w  # w
427
            y[..., 3] *= h  # h
428
        y = torch.tensor(y) if isinstance(y, np.ndarray) else y
429
        return (y, []) if val else y
430

431
    def warmup(self, imgsz=(1, 3, 640, 640), half=False):
432
        # Warmup model by running inference once
433
        if self.pt or self.engine or self.onnx:  # warmup types
434
            if isinstance(self.device, torch.device) and self.device.type != 'cpu':  # only warmup GPU models
435
                im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float)  # input image
436
                self.forward(im)  # warmup
437

438

439
class AutoShape(nn.Module):
440
    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
441
    conf = 0.25  # NMS confidence threshold
442
    iou = 0.45  # NMS IoU threshold
443
    agnostic = False  # NMS class-agnostic
444
    multi_label = False  # NMS multiple labels per box
445
    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
446
    max_det = 1000  # maximum number of detections per image
447
    amp = False  # Automatic Mixed Precision (AMP) inference
448

449
    def __init__(self, model):
450
        super().__init__()
451
        LOGGER.info('Adding AutoShape... ')
452
        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
453
        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
454
        self.pt = not self.dmb or model.pt  # PyTorch model
455
        self.model = model.eval()
456

457
    def _apply(self, fn):
458
        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
459
        self = super()._apply(fn)
460
        if self.pt:
461
            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
462
            m.stride = fn(m.stride)
463
            m.grid = list(map(fn, m.grid))
464
            if isinstance(m.anchor_grid, list):
465
                m.anchor_grid = list(map(fn, m.anchor_grid))
466
        return self
467

468
    @torch.no_grad()
469
    def forward(self, imgs, size=640, augment=False, profile=False):
470
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
471
        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
472
        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
473
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
474
        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
475
        #   numpy:           = np.zeros((640,1280,3))  # HWC
476
        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
477
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
478

479
        t = [time_sync()]
480
        p = next(self.model.parameters()) if self.pt else torch.zeros(1)  # for device and type
481
        autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
482
        if isinstance(imgs, torch.Tensor):  # torch
483
            with amp.autocast(enabled=autocast):
484
                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
485

486
        # Pre-process
487
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
488
        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
489
        for i, im in enumerate(imgs):
490
            f = f'image{i}'  # filename
491
            if isinstance(im, (str, Path)):  # filename or uri
492
                im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
493
                im = np.asarray(exif_transpose(im))
494
            elif isinstance(im, Image.Image):  # PIL Image
495
                im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
496
            files.append(Path(f).with_suffix('.jpg').name)
497
            if im.shape[0] < 5:  # image in CHW
498
                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
499
            im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3)  # enforce 3ch input
500
            s = im.shape[:2]  # HWC
501
            shape0.append(s)  # image shape
502
            g = (size / max(s))  # gain
503
            shape1.append([y * g for y in s])
504
            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
505
        shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)]  # inference shape
506
        x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs]  # pad
507
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
508
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
509
        x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
510
        t.append(time_sync())
511

512
        with amp.autocast(enabled=autocast):
513
            # Inference
514
            y = self.model(x, augment, profile)  # forward
515
            t.append(time_sync())
516

517
            # Post-process
518
            y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,
519
                                    agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det)  # NMS
520
            for i in range(n):
521
                scale_coords(shape1, y[i][:, :4], shape0[i])
522

523
            t.append(time_sync())
524
            return Detections(imgs, y, files, t, self.names, x.shape)
525

526

527
class Detections:
528
    # YOLOv5 detections class for inference results
529
    def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
530
        super().__init__()
531
        d = pred[0].device  # device
532
        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs]  # normalizations
533
        self.imgs = imgs  # list of images as numpy arrays
534
        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
535
        self.names = names  # class names
536
        self.files = files  # image filenames
537
        self.times = times  # profiling times
538
        self.xyxy = pred  # xyxy pixels
539
        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
540
        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
541
        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
542
        self.n = len(self.pred)  # number of images (batch size)
543
        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
544
        self.s = shape  # inference BCHW shape
545

546
    def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
547
        crops = []
548
        for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
549
            s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
550
            if pred.shape[0]:
551
                for c in pred[:, -1].unique():
552
                    n = (pred[:, -1] == c).sum()  # detections per class
553
                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
554
                if show or save or render or crop:
555
                    annotator = Annotator(im, example=str(self.names))
556
                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
557
                        label = f'{self.names[int(cls)]} {conf:.2f}'
558
                        if crop:
559
                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
560
                            crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
561
                                          'im': save_one_box(box, im, file=file, save=save)})
562
                        else:  # all others
563
                            annotator.box_label(box, label, color=colors(cls))
564
                    im = annotator.im
565
            else:
566
                s += '(no detections)'
567

568
            im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
569
            if pprint:
570
                LOGGER.info(s.rstrip(', '))
571
            if show:
572
                im.show(self.files[i])  # show
573
            if save:
574
                f = self.files[i]
575
                im.save(save_dir / f)  # save
576
                if i == self.n - 1:
577
                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
578
            if render:
579
                self.imgs[i] = np.asarray(im)
580
        if crop:
581
            if save:
582
                LOGGER.info(f'Saved results to {save_dir}\n')
583
            return crops
584

585
    def print(self):
586
        self.display(pprint=True)  # print results
587
        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
588
                    self.t)
589

590
    def show(self):
591
        self.display(show=True)  # show results
592

593
    def save(self, save_dir='runs/detect/exp'):
594
        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
595
        self.display(save=True, save_dir=save_dir)  # save results
596

597
    def crop(self, save=True, save_dir='runs/detect/exp'):
598
        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
599
        return self.display(crop=True, save=save, save_dir=save_dir)  # crop results
600

601
    def render(self):
602
        self.display(render=True)  # render results
603
        return self.imgs
604

605
    def pandas(self):
606
        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
607
        new = copy(self)  # return copy
608
        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
609
        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
610
        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
611
            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
612
            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
613
        return new
614

615
    def tolist(self):
616
        # return a list of Detections objects, i.e. 'for result in results.tolist():'
617
        r = range(self.n)  # iterable
618
        x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
619
        # for d in x:
620
        #    for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
621
        #        setattr(d, k, getattr(d, k)[0])  # pop out of list
622
        return x
623

624
    def __len__(self):
625
        return self.n
626

627

628
class Classify(nn.Module):
629
    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
630
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
631
        super().__init__()
632
        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
633
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
634
        self.flat = nn.Flatten()
635

636
    def forward(self, x):
637
        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
638
        return self.flat(self.conv(z))  # flatten to x(b,c2)
639

640
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.