CoCalc -- fid_score.py

GitHub Repository: jantic/deoldify
Path: blob/master/fid/fid_score.py
⁷⁸¹ views
1
#!/usr/bin/env python3
2

3
# Code adapted and modified from https://github.com/mseitzer/pytorch-fid.  Licensing
4
# and description duplicated below.
5

6
"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
7

8
The FID metric calculates the distance between two distributions of images.
9
Typically, we have summary statistics (mean & covariance matrix) of one
10
of these distributions, while the 2nd distribution is given by a GAN.
11

12
When run as a stand-alone program, it compares the distribution of
13
images that are stored as PNG/JPEG at a specified location with a
14
distribution given by summary statistics (in pickle format).
15

16
The FID is calculated by assuming that X_1 and X_2 are the activations of
17
the pool_3 layer of the inception net for generated samples and real world
18
samples respectively.
19

20
See --help to see further details.
21

22
Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
23
of Tensorflow
24

25
Copyright 2018 Institute of Bioinformatics, JKU Linz
26

27
Licensed under the Apache License, Version 2.0 (the "License");
28
you may not use this file except in compliance with the License.
29
You may obtain a copy of the License at
30

31
   http://www.apache.org/licenses/LICENSE-2.0
32

33
Unless required by applicable law or agreed to in writing, software
34
distributed under the License is distributed on an "AS IS" BASIS,
35
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36
See the License for the specific language governing permissions and
37
limitations under the License.
38
"""
39
import os
40
import pathlib
41
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
42

43
import numpy as np
44
import torch
45
from scipy import linalg
46
from torch.nn.functional import adaptive_avg_pool2d
47
import cv2
48
import imageio
49

50
try:
51
    from tqdm import tqdm
52
except ImportError:
53
    # If not tqdm is not available, provide a mock version of it
54
    def tqdm(x):
55
        return x
56

57

58
from .inception import InceptionV3
59

60
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
61
parser.add_argument(
62
    'path',
63
    type=str,
64
    nargs=2,
65
    help=('Path to the generated images or ' 'to .npz statistic files'),
66
)
67
parser.add_argument('--batch-size', type=int, default=50, help='Batch size to use')
68
parser.add_argument(
69
    '--dims',
70
    type=int,
71
    default=2048,
72
    choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
73
    help=(
74
        'Dimensionality of Inception features to use. '
75
        'By default, uses pool3 features'
76
    ),
77
)
78
parser.add_argument(
79
    '-c', '--gpu', default='', type=str, help='GPU to use (leave blank for CPU only)'
80
)
81

82

83
def load_image_resized(fn, sz):
84
    return cv2.resize(
85
        imageio.imread(str(fn)), dsize=(sz, sz), interpolation=cv2.INTER_CUBIC
86
    ).astype(np.float32)
87

88

89
def get_activations(
90
    files,
91
    model,
92
    batch_size=50,
93
    dims=2048,
94
    cuda=False,
95
    verbose=False,
96
    eval_size: int = 299,
97
):
98
    """Calculates the activations of the pool_3 layer for all images.
99

100
    Params:
101
    -- files       : List of image files paths
102
    -- model       : Instance of inception model
103
    -- batch_size  : Batch size of images for the model to process at once.
104
                     Make sure that the number of samples is a multiple of
105
                     the batch size, otherwise some samples are ignored. This
106
                     behavior is retained to match the original FID score
107
                     implementation.
108
    -- dims        : Dimensionality of features returned by Inception
109
    -- cuda        : If set to True, use GPU
110
    -- verbose     : If set to True and parameter out_step is given, the number
111
                     of calculated batches is reported.
112
    Returns:
113
    -- A numpy array of dimension (num images, dims) that contains the
114
       activations of the given tensor when feeding inception with the
115
       query tensor.
116
    """
117
    model.eval()
118

119
    if len(files) % batch_size != 0:
120
        print(
121
            (
122
                'Warning: number of images is not a multiple of the '
123
                'batch size. Some samples are going to be ignored.'
124
            )
125
        )
126
    if batch_size > len(files):
127
        print(
128
            (
129
                'Warning: batch size is bigger than the data size. '
130
                'Setting batch size to data size'
131
            )
132
        )
133
        batch_size = len(files)
134

135
    n_batches = len(files) // batch_size
136
    n_used_imgs = n_batches * batch_size
137

138
    pred_arr = np.empty((n_used_imgs, dims))
139

140
    for i in tqdm(range(n_batches)):
141
        if verbose:
142
            print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True)
143
        start = i * batch_size
144
        end = start + batch_size
145

146
        images = np.array(
147
            [load_image_resized(fn, eval_size) for fn in files[start:end]]
148
        )
149
        # images = np.array([imageio.imread(str(f)).astype(np.float32)
150
        # for f in files[start:end]])
151

152
        # Reshape to (n_images, 3, height, width)
153
        images = images.transpose((0, 3, 1, 2))
154
        images /= 255
155

156
        batch = torch.from_numpy(images).type(torch.FloatTensor)
157
        if cuda:
158
            batch = batch.cuda()
159

160
        pred = model(batch)[0]
161

162
        # If model output is not scalar, apply global spatial average pooling.
163
        # This happens if you choose a dimensionality not equal 2048.
164
        if pred.shape[2] != 1 or pred.shape[3] != 1:
165
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
166

167
        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)
168

169
    if verbose:
170
        print(' done')
171

172
    return pred_arr
173

174

175
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
176
    """Numpy implementation of the Frechet Distance.
177
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
178
    and X_2 ~ N(mu_2, C_2) is
179
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
180

181
    Stable version by Dougal J. Sutherland.
182

183
    Params:
184
    -- mu1   : Numpy array containing the activations of a layer of the
185
               inception net (like returned by the function 'get_predictions')
186
               for generated samples.
187
    -- mu2   : The sample mean over activations, precalculated on an
188
               representative data set.
189
    -- sigma1: The covariance matrix over activations for generated samples.
190
    -- sigma2: The covariance matrix over activations, precalculated on an
191
               representative data set.
192

193
    Returns:
194
    --   : The Frechet Distance.
195
    """
196

197
    mu1 = np.atleast_1d(mu1)
198
    mu2 = np.atleast_1d(mu2)
199

200
    sigma1 = np.atleast_2d(sigma1)
201
    sigma2 = np.atleast_2d(sigma2)
202

203
    assert (
204
        mu1.shape == mu2.shape
205
    ), 'Training and test mean vectors have different lengths'
206
    assert (
207
        sigma1.shape == sigma2.shape
208
    ), 'Training and test covariances have different dimensions'
209

210
    diff = mu1 - mu2
211

212
    # Product might be almost singular
213
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
214
    if not np.isfinite(covmean).all():
215
        msg = (
216
            'fid calculation produces singular product; '
217
            'adding %s to diagonal of cov estimates'
218
        ) % eps
219
        print(msg)
220
        offset = np.eye(sigma1.shape[0]) * eps
221
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
222

223
    # Numerical error might give slight imaginary component
224
    if np.iscomplexobj(covmean):
225
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
226
            m = np.max(np.abs(covmean.imag))
227
            raise ValueError('Imaginary component {}'.format(m))
228
        covmean = covmean.real
229

230
    tr_covmean = np.trace(covmean)
231

232
    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
233

234

235
def calculate_activation_statistics(
236
    files, model, batch_size=50, dims=2048, cuda=False, verbose=False
237
):
238
    """Calculation of the statistics used by the FID.
239
    Params:
240
    -- files       : List of image files paths
241
    -- model       : Instance of inception model
242
    -- batch_size  : The images numpy array is split into batches with
243
                     batch size batch_size. A reasonable batch size
244
                     depends on the hardware.
245
    -- dims        : Dimensionality of features returned by Inception
246
    -- cuda        : If set to True, use GPU
247
    -- verbose     : If set to True and parameter out_step is given, the
248
                     number of calculated batches is reported.
249
    Returns:
250
    -- mu    : The mean over samples of the activations of the pool_3 layer of
251
               the inception model.
252
    -- sigma : The covariance matrix of the activations of the pool_3 layer of
253
               the inception model.
254
    """
255
    act = get_activations(files, model, batch_size, dims, cuda, verbose)
256
    mu = np.mean(act, axis=0)
257
    sigma = np.cov(act, rowvar=False)
258
    return mu, sigma
259

260

261
def _compute_statistics_of_path(path, model, batch_size, dims, cuda):
262
    if path.endswith('.npz'):
263
        f = np.load(path)
264
        m, s = f['mu'][:], f['sigma'][:]
265
        f.close()
266
    else:
267
        path = pathlib.Path(path)
268
        files = list(path.glob('*.jpg')) + list(path.glob('*.png'))
269
        m, s = calculate_activation_statistics(files, model, batch_size, dims, cuda)
270

271
    return m, s
272

273

274
def calculate_fid_given_paths(paths, batch_size, cuda, dims):
275
    """Calculates the FID of two paths"""
276
    for p in paths:
277
        if not os.path.exists(p):
278
            raise RuntimeError('Invalid path: %s' % p)
279

280
    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
281

282
    model = InceptionV3([block_idx])
283
    if cuda:
284
        model.cuda()
285

286
    m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims, cuda)
287
    m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims, cuda)
288
    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
289

290
    return fid_value
291

292

293
if __name__ == '__main__':
294
    args = parser.parse_args()
295
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
296

297
    fid_value = calculate_fid_given_paths(
298
        args.path, args.batch_size, args.gpu != '', args.dims
299
    )
300
    print('FID: ', fid_value)
301

302
Product

Resources

Company