Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_ssim.cpp
9903 views
1
// basisu_ssim.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_ssim.h"
16
17
#ifndef M_PI
18
#define M_PI 3.14159265358979323846
19
#endif
20
21
namespace basisu
22
{
23
float gauss(int x, int y, float sigma_sqr)
24
{
25
float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr)));
26
float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow;
27
return g;
28
}
29
30
// size_x/y should be odd
31
void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags)
32
{
33
assert(size_x & size_y & 1);
34
35
if (!(size_x | size_y))
36
return;
37
38
int mid_x = size_x / 2;
39
int mid_y = size_y / 2;
40
41
double sum = 0;
42
for (int x = 0; x < size_x; x++)
43
{
44
for (int y = 0; y < size_y; y++)
45
{
46
float g;
47
if ((x > mid_x) && (y < mid_y))
48
g = pDst[(size_x - x - 1) + y * size_x];
49
else if ((x < mid_x) && (y > mid_y))
50
g = pDst[x + (size_y - y - 1) * size_x];
51
else if ((x > mid_x) && (y > mid_y))
52
g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x];
53
else
54
g = gauss(x - mid_x, y - mid_y, sigma_sqr);
55
56
pDst[x + y * size_x] = g;
57
sum += g;
58
}
59
}
60
61
if (flags & cComputeGaussianFlagNormalizeCenterToOne)
62
{
63
sum = pDst[mid_x + mid_y * size_x];
64
}
65
66
if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize))
67
{
68
double one_over_sum = 1.0f / sum;
69
for (int i = 0; i < size_x * size_y; i++)
70
pDst[i] = static_cast<float>(pDst[i] * one_over_sum);
71
72
if (flags & cComputeGaussianFlagNormalizeCenterToOne)
73
pDst[mid_x + mid_y * size_x] = 1.0f;
74
}
75
76
if (flags & cComputeGaussianFlagPrint)
77
{
78
printf("{\n");
79
for (int y = 0; y < size_y; y++)
80
{
81
printf(" ");
82
for (int x = 0; x < size_x; x++)
83
{
84
printf("%f, ", pDst[x + y * size_x]);
85
}
86
printf("\n");
87
}
88
printf("}");
89
}
90
}
91
92
void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor)
93
{
94
assert(&dst != &orig_img);
95
96
assert(odd_filter_width && (odd_filter_width & 1));
97
odd_filter_width |= 1;
98
99
vector2D<float> kernel(odd_filter_width, odd_filter_width);
100
compute_gaussian_kernel(kernel.get_ptr(), odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize);
101
102
const int dst_width = orig_img.get_width() / width_divisor;
103
const int dst_height = orig_img.get_height() / height_divisor;
104
105
const int H = odd_filter_width / 2;
106
const int L = -H;
107
108
dst.crop(dst_width, dst_height);
109
110
//#pragma omp parallel for
111
for (int oy = 0; oy < dst_height; oy++)
112
{
113
for (int ox = 0; ox < dst_width; ox++)
114
{
115
vec4F c(0.0f);
116
117
for (int yd = L; yd <= H; yd++)
118
{
119
int y = oy * height_divisor + (height_divisor >> 1) + yd;
120
121
for (int xd = L; xd <= H; xd++)
122
{
123
int x = ox * width_divisor + (width_divisor >> 1) + xd;
124
125
const vec4F &p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping);
126
127
float w = kernel(xd + H, yd + H);
128
c[0] += p[0] * w;
129
c[1] += p[1] * w;
130
c[2] += p[2] * w;
131
c[3] += p[3] * w;
132
}
133
}
134
135
dst(ox, oy).set(c[0], c[1], c[2], c[3]);
136
}
137
}
138
}
139
140
void pow_image(const imagef &src, imagef &dst, const vec4F &power)
141
{
142
dst.resize(src);
143
144
//#pragma omp parallel for
145
for (int y = 0; y < (int)dst.get_height(); y++)
146
{
147
for (uint32_t x = 0; x < dst.get_width(); x++)
148
{
149
const vec4F &p = src(x, y);
150
151
if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f))
152
dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]);
153
else
154
dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3]));
155
}
156
}
157
}
158
159
void mul_image(const imagef &src, imagef &dst, const vec4F &mul)
160
{
161
dst.resize(src);
162
163
//#pragma omp parallel for
164
for (int y = 0; y < (int)dst.get_height(); y++)
165
{
166
for (uint32_t x = 0; x < dst.get_width(); x++)
167
{
168
const vec4F &p = src(x, y);
169
dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]);
170
}
171
}
172
}
173
174
void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift)
175
{
176
dst.resize(src);
177
178
//#pragma omp parallel for
179
for (int y = 0; y < (int)dst.get_height(); y++)
180
{
181
for (uint32_t x = 0; x < dst.get_width(); x++)
182
{
183
const vec4F &p = src(x, y);
184
185
vec4F d;
186
187
for (uint32_t c = 0; c < 4; c++)
188
d[c] = scale[c] * p[c] + shift[c];
189
190
dst(x, y).set(d[0], d[1], d[2], d[3]);
191
}
192
}
193
}
194
195
void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst)
196
{
197
dst.resize(src1);
198
199
//#pragma omp parallel for
200
for (int y = 0; y < (int)dst.get_height(); y++)
201
{
202
for (uint32_t x = 0; x < dst.get_width(); x++)
203
{
204
const vec4F &s1 = src1(x, y);
205
const vec4F &s2 = src2(x, y);
206
207
dst(x, y).set(
208
s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0],
209
s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1],
210
s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2],
211
s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]);
212
}
213
}
214
}
215
216
void add_image(const imagef &src1, const imagef &src2, imagef &dst)
217
{
218
dst.resize(src1);
219
220
//#pragma omp parallel for
221
for (int y = 0; y < (int)dst.get_height(); y++)
222
{
223
for (uint32_t x = 0; x < dst.get_width(); x++)
224
{
225
const vec4F &s1 = src1(x, y);
226
const vec4F &s2 = src2(x, y);
227
228
dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]);
229
}
230
}
231
}
232
233
void adds_image(const imagef &src, const vec4F &value, imagef &dst)
234
{
235
dst.resize(src);
236
237
//#pragma omp parallel for
238
for (int y = 0; y < (int)dst.get_height(); y++)
239
{
240
for (uint32_t x = 0; x < dst.get_width(); x++)
241
{
242
const vec4F &p = src(x, y);
243
244
dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]);
245
}
246
}
247
}
248
249
void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)
250
{
251
dst.resize(src1);
252
253
//#pragma omp parallel for
254
for (int y = 0; y < (int)dst.get_height(); y++)
255
{
256
for (uint32_t x = 0; x < dst.get_width(); x++)
257
{
258
const vec4F &s1 = src1(x, y);
259
const vec4F &s2 = src2(x, y);
260
261
vec4F d;
262
263
for (uint32_t c = 0; c < 4; c++)
264
{
265
float v1 = s1[c];
266
float v2 = s2[c];
267
d[c] = v1 * v2 * scale[c];
268
}
269
270
dst(x, y) = d;
271
}
272
}
273
}
274
275
void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)
276
{
277
dst.resize(src1);
278
279
//#pragma omp parallel for
280
for (int y = 0; y < (int)dst.get_height(); y++)
281
{
282
for (uint32_t x = 0; x < dst.get_width(); x++)
283
{
284
const vec4F &s1 = src1(x, y);
285
const vec4F &s2 = src2(x, y);
286
287
vec4F d;
288
289
for (uint32_t c = 0; c < 4; c++)
290
{
291
float v = s2[c];
292
if (v == 0.0f)
293
d[c] = 0.0f;
294
else
295
d[c] = (s1[c] * scale[c]) / v;
296
}
297
298
dst(x, y) = d;
299
}
300
}
301
}
302
303
vec4F avg_image(const imagef &src)
304
{
305
vec4F avg(0.0f);
306
307
for (uint32_t y = 0; y < src.get_height(); y++)
308
{
309
for (uint32_t x = 0; x < src.get_width(); x++)
310
{
311
const vec4F &s = src(x, y);
312
313
avg += vec4F(s[0], s[1], s[2], s[3]);
314
}
315
}
316
317
avg /= static_cast<float>(src.get_total_pixels());
318
319
return avg;
320
}
321
322
// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html
323
vec4F compute_ssim(const imagef &a, const imagef &b)
324
{
325
imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3;
326
327
const float C1 = 6.50250f, C2 = 58.52250f;
328
329
pow_image(a, a_sq, vec4F(2));
330
pow_image(b, b_sq, vec4F(2));
331
mul_image(a, b, axb, vec4F(1.0f));
332
333
gaussian_filter(mu1, a, 11, 1.5f * 1.5f);
334
gaussian_filter(mu2, b, 11, 1.5f * 1.5f);
335
336
pow_image(mu1, mu1_sq, vec4F(2));
337
pow_image(mu2, mu2_sq, vec4F(2));
338
mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f));
339
340
gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f);
341
add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq);
342
343
gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f);
344
add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq);
345
346
gaussian_filter(s12, axb, 11, 1.5f * 1.5f);
347
add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12);
348
349
scale_image(mu1_mu2, t1, vec4F(2), vec4F(0));
350
adds_image(t1, vec4F(C1), t1);
351
352
scale_image(s12, t2, vec4F(2), vec4F(0));
353
adds_image(t2, vec4F(C2), t2);
354
355
mul_image(t1, t2, t3, vec4F(1));
356
357
add_image(mu1_sq, mu2_sq, t1);
358
adds_image(t1, vec4F(C1), t1);
359
360
add_image(s1_sq, s2_sq, t2);
361
adds_image(t2, vec4F(C2), t2);
362
363
mul_image(t1, t2, t1, vec4F(1));
364
365
div_image(t3, t1, smap, vec4F(1));
366
367
return avg_image(smap);
368
}
369
370
vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601)
371
{
372
image ta(a), tb(b);
373
374
if ((ta.get_width() != tb.get_width()) || (ta.get_height() != tb.get_height()))
375
{
376
debug_printf("compute_ssim: Cropping input images to equal dimensions\n");
377
378
const uint32_t w = minimum(a.get_width(), b.get_width());
379
const uint32_t h = minimum(a.get_height(), b.get_height());
380
ta.crop(w, h);
381
tb.crop(w, h);
382
}
383
384
if (!ta.get_width() || !ta.get_height())
385
{
386
assert(0);
387
return vec4F(0);
388
}
389
390
if (luma)
391
{
392
for (uint32_t y = 0; y < ta.get_height(); y++)
393
{
394
for (uint32_t x = 0; x < ta.get_width(); x++)
395
{
396
ta(x, y).set(ta(x, y).get_luma(luma_601), ta(x, y).a);
397
tb(x, y).set(tb(x, y).get_luma(luma_601), tb(x, y).a);
398
}
399
}
400
}
401
402
imagef fta, ftb;
403
404
fta.set(ta);
405
ftb.set(tb);
406
407
return compute_ssim(fta, ftb);
408
}
409
410
} // namespace basisu
411
412