CoCalc -- transform.py

GitHub Repository: jantic/deoldify
Path: blob/master/fastai/vision/transform.py
⁸⁴⁰ views
1
"Image transformations for data augmentation. All transforms are done on the tensor level"
2
from ..torch_core import *
3
from .image import *
4
from .image import _affine_mult
5

6
__all__ = ['brightness', 'contrast', 'crop', 'crop_pad', 'cutout', 'dihedral', 'dihedral_affine', 'flip_affine', 'flip_lr',
7
           'get_transforms', 'jitter', 'pad', 'perspective_warp', 'rand_pad', 'rand_crop', 'rand_zoom', 'rgb_randomize', 'rotate', 'skew', 'squish',
8
           'rand_resize_crop', 'symmetric_warp', 'tilt', 'zoom', 'zoom_crop']
9

10
_pad_mode_convert = {'reflection':'reflect', 'zeros':'constant', 'border':'replicate'}
11

12
#NB: Although TfmLighting etc can be used as decorators, that doesn't work in Windows,
13
#    so we do it manually for now.
14

15
def _brightness(x, change:uniform):
16
    "Apply `change` in brightness of image `x`."
17
    return x.add_(scipy.special.logit(change))
18
brightness = TfmLighting(_brightness)
19

20
def _contrast(x, scale:log_uniform):
21
    "Apply `scale` to contrast of image `x`."
22
    return x.mul_(scale)
23
contrast = TfmLighting(_contrast)
24

25
def _rotate(degrees:uniform):
26
    "Rotate image by `degrees`."
27
    angle = degrees * math.pi / 180
28
    return [[float(cos(angle)), float(-sin(angle)), 0.],
29
            [float(sin(angle)),  float(cos(angle)), 0.],
30
            [0.        ,  0.        , 1.]]
31
rotate = TfmAffine(_rotate)
32

33
def _get_zoom_mat(sw:float, sh:float, c:float, r:float)->AffineMatrix:
34
    "`sw`,`sh` scale width,height - `c`,`r` focus col,row."
35
    return [[sw, 0,  c],
36
            [0, sh,  r],
37
            [0,  0, 1.]]
38

39
def _zoom(scale:uniform=1.0, row_pct:uniform=0.5, col_pct:uniform=0.5):
40
    "Zoom image by `scale`. `row_pct`,`col_pct` select focal point of zoom."
41
    s = 1-1/scale
42
    col_c = s * (2*col_pct - 1)
43
    row_c = s * (2*row_pct - 1)
44
    return _get_zoom_mat(1/scale, 1/scale, col_c, row_c)
45
zoom = TfmAffine(_zoom)
46

47
def _squish(scale:uniform=1.0, row_pct:uniform=0.5, col_pct:uniform=0.5):
48
    "Squish image by `scale`. `row_pct`,`col_pct` select focal point of zoom."
49
    if scale <= 1:
50
        col_c = (1-scale) * (2*col_pct - 1)
51
        return _get_zoom_mat(scale, 1, col_c, 0.)
52
    else:
53
        row_c = (1-1/scale) * (2*row_pct - 1)
54
        return _get_zoom_mat(1, 1/scale, 0., row_c)
55
squish = TfmAffine(_squish)
56

57
def _jitter(c, magnitude:uniform):
58
    "Replace pixels by random neighbors at `magnitude`."
59
    c.flow.add_((torch.rand_like(c.flow)-0.5)*magnitude*2)
60
    return c
61
jitter = TfmCoord(_jitter)
62

63
def _flip_lr(x):
64
    "Flip `x` horizontally."
65
    #return x.flip(2)
66
    if isinstance(x, ImagePoints):
67
        x.flow.flow[...,0] *= -1
68
        return x
69
    return tensor(np.ascontiguousarray(np.array(x)[...,::-1]))
70
flip_lr = TfmPixel(_flip_lr)
71

72
def _flip_affine() -> TfmAffine:
73
    "Flip `x` horizontally."
74
    return [[-1, 0, 0.],
75
            [0,  1, 0],
76
            [0,  0, 1.]]
77
flip_affine = TfmAffine(_flip_affine)
78

79
def _dihedral(x, k:partial(uniform_int,0,7)):
80
    "Randomly flip `x` image based on `k`."
81
    flips=[]
82
    if k&1: flips.append(1)
83
    if k&2: flips.append(2)
84
    if flips: x = torch.flip(x,flips)
85
    if k&4: x = x.transpose(1,2)
86
    return x.contiguous()
87
dihedral = TfmPixel(_dihedral)
88

89
def _dihedral_affine(k:partial(uniform_int,0,7)):
90
    "Randomly flip `x` image based on `k`."
91
    x = -1 if k&1 else 1
92
    y = -1 if k&2 else 1
93
    if k&4: return [[0, x, 0.],
94
                    [y, 0, 0],
95
                    [0, 0, 1.]]
96
    return [[x, 0, 0.],
97
            [0, y, 0],
98
            [0, 0, 1.]]
99
dihedral_affine = TfmAffine(_dihedral_affine)
100

101
def _pad_coord(x, row_pad:int, col_pad:int, mode='zeros'):
102
    #TODO: implement other padding modes than zeros?
103
    h,w = x.size
104
    pad = torch.Tensor([w/(w + 2*col_pad), h/(h + 2*row_pad)])
105
    x.flow = FlowField((h+2*row_pad, w+2*col_pad) , x.flow.flow * pad[None])
106
    return x
107

108
def _pad_default(x, padding:int, mode='reflection'):
109
    "Pad `x` with `padding` pixels. `mode` fills in space ('zeros','reflection','border')."
110
    mode = _pad_mode_convert[mode]
111
    return F.pad(x[None], (padding,)*4, mode=mode)[0]
112

113
def _pad_image_points(x, padding:int, mode='reflection'):
114
    return _pad_coord(x, padding, padding, mode)
115

116
def _pad(x, padding:int, mode='reflection'):
117
    f_pad = _pad_image_points if isinstance(x, ImagePoints) else  _pad_default
118
    return f_pad(x, padding, mode)
119

120
pad = TfmPixel(_pad, order=-10)
121

122
def _cutout(x, n_holes:uniform_int=1, length:uniform_int=40):
123
    "Cut out `n_holes` number of square holes of size `length` in image at random locations."
124
    h,w = x.shape[1:]
125
    for n in range(n_holes):
126
        h_y = np.random.randint(0, h)
127
        h_x = np.random.randint(0, w)
128
        y1 = int(np.clip(h_y - length / 2, 0, h))
129
        y2 = int(np.clip(h_y + length / 2, 0, h))
130
        x1 = int(np.clip(h_x - length / 2, 0, w))
131
        x2 = int(np.clip(h_x + length / 2, 0, w))
132
        x[:, y1:y2, x1:x2] = 0
133
    return x
134

135
cutout = TfmPixel(_cutout, order=20)
136

137
def _rgb_randomize(x, channel:int=None, thresh:float=0.3):
138
    "Randomize one of the channels of the input image"
139
    if channel is None: channel = np.random.randint(0, x.shape[0] - 1)
140
    x[channel] = torch.rand(x.shape[1:]) * np.random.uniform(0, thresh)
141
    return x
142

143
rgb_randomize = TfmPixel(_rgb_randomize)
144

145
def _minus_epsilon(row_pct:float, col_pct:float, eps:float=1e-7):
146
    if row_pct==1.: row_pct -= 1e-7
147
    if col_pct==1.: col_pct -= 1e-7
148
    return row_pct,col_pct
149

150
def _crop_default(x, size, row_pct:uniform=0.5, col_pct:uniform=0.5):
151
    "Crop `x` to `size` pixels. `row_pct`,`col_pct` select focal point of crop."
152
    rows,cols = tis2hw(size)
153
    row_pct,col_pct = _minus_epsilon(row_pct,col_pct)
154
    row = int((x.size(1)-rows+1) * row_pct)
155
    col = int((x.size(2)-cols+1) * col_pct)
156
    return x[:, row:row+rows, col:col+cols].contiguous()
157

158
def _crop_image_points(x, size, row_pct=0.5, col_pct=0.5):
159
    h,w = x.size
160
    rows,cols = tis2hw(size)
161
    row_pct,col_pct = _minus_epsilon(row_pct,col_pct)
162
    x.flow.flow.mul_(torch.Tensor([w/cols, h/rows])[None])
163
    row = int((h-rows+1) * row_pct)
164
    col = int((w-cols+1) * col_pct)
165
    x.flow.flow.add_(-1 + torch.Tensor([w/cols-2*col/cols, h/rows-2*row/rows])[None])
166
    x.size = (rows, cols)
167
    return x
168

169
def _crop(x, size, row_pct:uniform=0.5, col_pct:uniform=0.5):
170
    f_crop = _crop_image_points if isinstance(x, ImagePoints) else _crop_default
171
    return f_crop(x, size, row_pct, col_pct)
172

173
crop = TfmPixel(_crop)
174

175
def _crop_pad_default(x, size, padding_mode='reflection', row_pct:uniform = 0.5, col_pct:uniform = 0.5):
176
    "Crop and pad tfm - `row_pct`,`col_pct` sets focal point."
177
    padding_mode = _pad_mode_convert[padding_mode]
178
    size = tis2hw(size)
179
    if x.shape[1:] == torch.Size(size): return x
180
    rows,cols = size
181
    row_pct,col_pct = _minus_epsilon(row_pct,col_pct)
182
    if x.size(1)<rows or x.size(2)<cols:
183
        row_pad = max((rows-x.size(1)+1)//2, 0)
184
        col_pad = max((cols-x.size(2)+1)//2, 0)
185
        x = F.pad(x[None], (col_pad,col_pad,row_pad,row_pad), mode=padding_mode)[0]
186
    row = int((x.size(1)-rows+1)*row_pct)
187
    col = int((x.size(2)-cols+1)*col_pct)
188
    x = x[:, row:row+rows, col:col+cols]
189
    return x.contiguous() # without this, get NaN later - don't know why
190

191
def _crop_pad_image_points(x, size, padding_mode='reflection', row_pct = 0.5, col_pct = 0.5):
192
    size = tis2hw(size)
193
    rows,cols = size
194
    if x.size[0]<rows or x.size[1]<cols:
195
        row_pad = max((rows-x.size[0]+1)//2, 0)
196
        col_pad = max((cols-x.size[1]+1)//2, 0)
197
        x = _pad_coord(x, row_pad, col_pad)
198
    return crop(x,(rows,cols), row_pct, col_pct)
199

200
def _crop_pad(x, size, padding_mode='reflection', row_pct:uniform = 0.5, col_pct:uniform = 0.5):
201
    f_crop_pad = _crop_pad_image_points if isinstance(x, ImagePoints) else _crop_pad_default
202
    return f_crop_pad(x, size, padding_mode, row_pct, col_pct)
203

204
crop_pad = TfmCrop(_crop_pad)
205

206
def _image_maybe_add_crop_pad(img, tfms):
207
    tfm_names = [tfm.__name__ for tfm in tfms]
208
    return [crop_pad()] + tfms if 'crop_pad' not in tfm_names else tfms
209
Image._maybe_add_crop_pad = _image_maybe_add_crop_pad
210

211
rand_pos = {'row_pct':(0,1), 'col_pct':(0,1)}
212

213
def rand_pad(padding:int, size:int, mode:str='reflection'):
214
    "Fixed `mode` `padding` and random crop of `size`"
215
    return [pad(padding=padding,mode=mode),
216
            crop(size=size, **rand_pos)]
217

218
def rand_zoom(scale:uniform=1.0, p:float=1.):
219
    "Randomized version of `zoom`."
220
    return zoom(scale=scale, **rand_pos, p=p)
221

222
def rand_crop(*args, padding_mode='reflection', p:float=1.):
223
    "Randomized version of `crop_pad`."
224
    return crop_pad(*args, **rand_pos, padding_mode=padding_mode, p=p)
225

226
def zoom_crop(scale:float, do_rand:bool=False, p:float=1.0):
227
    "Randomly zoom and/or crop."
228
    zoom_fn = rand_zoom if do_rand else zoom
229
    crop_fn = rand_crop if do_rand else crop_pad
230
    return [zoom_fn(scale=scale, p=p), crop_fn()]
231

232
def _find_coeffs(orig_pts:Points, targ_pts:Points)->Tensor:
233
    "Find 8 coeff mentioned [here](https://web.archive.org/web/20150222120106/xenia.media.mit.edu/~cwren/interpolator/)."
234
    matrix = []
235
    #The equations we'll need to solve.
236
    for p1, p2 in zip(targ_pts, orig_pts):
237
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
238
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
239

240
    A = FloatTensor(matrix)
241
    B = FloatTensor(orig_pts).view(8, 1)
242
    #The 8 scalars we seek are solution of AX = B
243
    return torch.linalg.solve(A,B)[:,0]
244

245
def _apply_perspective(coords:FlowField, coeffs:Points)->FlowField:
246
    "Transform `coords` with `coeffs`."
247
    size = coords.flow.size()
248
    #compress all the dims expect the last one ang adds ones, coords become N * 3
249
    coords.flow = coords.flow.view(-1,2)
250
    #Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
251
    coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
252
    coords.flow = torch.addmm(coeffs[:,2], coords.flow, coeffs[:,:2].t())
253
    coords.flow.mul_(1/coords.flow[:,2].unsqueeze(1))
254
    coords.flow = coords.flow[:,:2].view(size)
255
    return coords
256

257
_orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
258

259
def _do_perspective_warp(c:FlowField, targ_pts:Points, invert=False):
260
    "Apply warp to `targ_pts` from `_orig_pts` to `c` `FlowField`."
261
    if invert: return _apply_perspective(c, _find_coeffs(targ_pts, _orig_pts))
262
    return _apply_perspective(c, _find_coeffs(_orig_pts, targ_pts))
263

264
def _perspective_warp(c, magnitude:partial(uniform,size=8)=0, invert=False):
265
    "Apply warp of `magnitude` to `c`."
266
    magnitude = magnitude.view(4,2)
267
    targ_pts = [[x+m for x,m in zip(xs, ms)] for xs, ms in zip(_orig_pts, magnitude)]
268
    return _do_perspective_warp(c, targ_pts, invert)
269
perspective_warp = TfmCoord(_perspective_warp)
270

271
def _symmetric_warp(c, magnitude:partial(uniform,size=4)=0, invert=False):
272
    "Apply symmetric warp of `magnitude` to `c`."
273
    m = listify(magnitude, 4)
274
    targ_pts = [[-1-m[3],-1-m[1]], [-1-m[2],1+m[1]], [1+m[3],-1-m[0]], [1+m[2],1+m[0]]]
275
    return _do_perspective_warp(c, targ_pts, invert)
276
symmetric_warp = TfmCoord(_symmetric_warp)
277

278
def _tilt(c, direction:uniform_int, magnitude:uniform=0, invert=False):
279
    "Tilt `c` field with random `direction` and `magnitude`."
280
    orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
281
    if direction == 0:   targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]
282
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]
283
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]
284
    elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]]
285
    coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
286
    return _apply_perspective(c, coeffs)
287
tilt = TfmCoord(_tilt)
288

289
def _skew(c, direction:uniform_int, magnitude:uniform=0, invert=False):
290
    "Skew `c` field with random `direction` and `magnitude`."
291
    orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
292
    if direction == 0:   targ_pts = [[-1-magnitude,-1], [-1,1], [1,-1], [1,1]]
293
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1], [1,-1], [1,1]]
294
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1,1]]
295
    elif direction == 3: targ_pts = [[-1,-1], [-1,1+magnitude], [1,-1], [1,1]]
296
    elif direction == 4: targ_pts = [[-1,-1], [-1,1], [1+magnitude,-1], [1,1]]
297
    elif direction == 5: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1]]
298
    elif direction == 6: targ_pts = [[-1,-1], [-1,1], [1,-1], [1+magnitude,1]]
299
    elif direction == 7: targ_pts = [[-1,-1], [-1,1], [1,-1], [1,1+magnitude]]
300
    coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
301
    return _apply_perspective(c, coeffs)
302
skew = TfmCoord(_skew)
303

304
def get_transforms(do_flip:bool=True, flip_vert:bool=False, max_rotate:float=10., max_zoom:float=1.1,
305
                   max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75,
306
                   p_lighting:float=0.75, xtra_tfms:Optional[Collection[Transform]]=None)->Collection[Transform]:
307
    "Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms."
308
    res = [rand_crop()]
309
    if do_flip:    res.append(dihedral_affine() if flip_vert else flip_lr(p=0.5))
310
    if max_warp:   res.append(symmetric_warp(magnitude=(-max_warp,max_warp), p=p_affine))
311
    if max_rotate: res.append(rotate(degrees=(-max_rotate,max_rotate), p=p_affine))
312
    if max_zoom>1: res.append(rand_zoom(scale=(1.,max_zoom), p=p_affine))
313
    if max_lighting:
314
        res.append(brightness(change=(0.5*(1-max_lighting), 0.5*(1+max_lighting)), p=p_lighting))
315
        res.append(contrast(scale=(1-max_lighting, 1/(1-max_lighting)), p=p_lighting))
316
    #       train                   , valid
317
    return (res + listify(xtra_tfms), [crop_pad()])
318

319
def _compute_zs_mat(sz:TensorImageSize, scale:float, squish:float,
320
                   invert:bool, row_pct:float, col_pct:float)->AffineMatrix:
321
    "Utility routine to compute zoom/squish matrix."
322
    orig_ratio = math.sqrt(sz[1]/sz[0])
323
    for s,r,i in zip(scale,squish, invert):
324
        s,r = 1/math.sqrt(s),math.sqrt(r)
325
        if s * r <= 1 and s / r <= 1: #Test if we are completely inside the picture
326
            w,h = (s/r, s*r) if i else (s*r,s/r)
327
            col_c = (1-w) * (2*col_pct - 1)
328
            row_c = (1-h) * (2*row_pct - 1)
329
            return _get_zoom_mat(w, h, col_c, row_c)
330

331
    #Fallback, hack to emulate a center crop without cropping anything yet.
332
    if orig_ratio > 1: return _get_zoom_mat(1/orig_ratio**2, 1, 0, 0.)
333
    else:              return _get_zoom_mat(1, orig_ratio**2, 0, 0.)
334

335
def _zoom_squish(c, scale:uniform=1.0, squish:uniform=1.0, invert:rand_bool=False,
336
                row_pct:uniform=0.5, col_pct:uniform=0.5):
337
    #This is intended for scale, squish and invert to be of size 10 (or whatever) so that the transform
338
    #can try a few zoom/squishes before falling back to center crop (like torchvision.RandomResizedCrop)
339
    m = _compute_zs_mat(c.size, scale, squish, invert, row_pct, col_pct)
340
    return _affine_mult(c, FloatTensor(m))
341
zoom_squish = TfmCoord(_zoom_squish)
342

343
def rand_resize_crop(size:int, max_scale:float=2., ratios:Tuple[float,float]=(0.75,1.33)):
344
    "Randomly resize and crop the image to a ratio in `ratios` after a zoom of `max_scale`."
345
    return [zoom_squish(scale=(1.,max_scale,8), squish=(*ratios,8), invert=(0.5,8), row_pct=(0.,1.), col_pct=(0.,1.)),
346
            crop(size=size)]
347

348
Product

Resources

Company