CoCalc -- basisu

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_ssim.cpp
⁹⁹⁰³ views
1
// basisu_ssim.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//    http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_ssim.h"
16

17
#ifndef M_PI
18
#define M_PI 3.14159265358979323846
19
#endif
20

21
namespace basisu
22
{
23
	float gauss(int x, int y, float sigma_sqr)
24
	{
25
		float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr)));
26
		float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow;
27
		return g;
28
	}
29
		
30
	// size_x/y should be odd
31
	void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags)
32
	{
33
		assert(size_x & size_y & 1);
34

35
		if (!(size_x | size_y))
36
			return;
37

38
		int mid_x = size_x / 2;
39
		int mid_y = size_y / 2;
40

41
		double sum = 0;
42
		for (int x = 0; x < size_x; x++)
43
		{
44
			for (int y = 0; y < size_y; y++)
45
			{
46
				float g;
47
				if ((x > mid_x) && (y < mid_y))
48
					g = pDst[(size_x - x - 1) + y * size_x];
49
				else if ((x < mid_x) && (y > mid_y))
50
					g = pDst[x + (size_y - y - 1) * size_x];
51
				else if ((x > mid_x) && (y > mid_y))
52
					g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x];
53
				else
54
					g = gauss(x - mid_x, y - mid_y, sigma_sqr);
55

56
				pDst[x + y * size_x] = g;
57
				sum += g;
58
			}
59
		}
60

61
		if (flags & cComputeGaussianFlagNormalizeCenterToOne)
62
		{
63
			sum = pDst[mid_x + mid_y * size_x];
64
		}
65

66
		if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize))
67
		{
68
			double one_over_sum = 1.0f / sum;
69
			for (int i = 0; i < size_x * size_y; i++)
70
				pDst[i] = static_cast<float>(pDst[i] * one_over_sum);
71

72
			if (flags & cComputeGaussianFlagNormalizeCenterToOne)
73
				pDst[mid_x + mid_y * size_x] = 1.0f;
74
		}
75

76
		if (flags & cComputeGaussianFlagPrint)
77
		{
78
			printf("{\n");
79
			for (int y = 0; y < size_y; y++)
80
			{
81
				printf("  ");
82
				for (int x = 0; x < size_x; x++)
83
				{
84
					printf("%f, ", pDst[x + y * size_x]);
85
				}
86
				printf("\n");
87
			}
88
			printf("}");
89
		}
90
	}
91

92
	void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor)
93
	{
94
		assert(&dst != &orig_img);
95

96
		assert(odd_filter_width && (odd_filter_width & 1));
97
		odd_filter_width |= 1;
98

99
		vector2D<float> kernel(odd_filter_width, odd_filter_width);
100
		compute_gaussian_kernel(kernel.get_ptr(), odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize);
101

102
		const int dst_width = orig_img.get_width() / width_divisor;
103
		const int dst_height = orig_img.get_height() / height_divisor;
104

105
		const int H = odd_filter_width / 2;
106
		const int L = -H;
107

108
		dst.crop(dst_width, dst_height);
109

110
//#pragma omp parallel for
111
		for (int oy = 0; oy < dst_height; oy++)
112
		{
113
			for (int ox = 0; ox < dst_width; ox++)
114
			{
115
				vec4F c(0.0f);
116

117
				for (int yd = L; yd <= H; yd++)
118
				{
119
					int y = oy * height_divisor + (height_divisor >> 1) + yd;
120

121
					for (int xd = L; xd <= H; xd++)
122
					{
123
						int x = ox * width_divisor + (width_divisor >> 1) + xd;
124

125
						const vec4F &p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping);
126

127
						float w = kernel(xd + H, yd + H);
128
						c[0] += p[0] * w;
129
						c[1] += p[1] * w;
130
						c[2] += p[2] * w;
131
						c[3] += p[3] * w;
132
					}
133
				}
134

135
				dst(ox, oy).set(c[0], c[1], c[2], c[3]);
136
			}
137
		}
138
	}
139

140
	void pow_image(const imagef &src, imagef &dst, const vec4F &power)
141
	{
142
		dst.resize(src);
143

144
//#pragma omp parallel for
145
		for (int y = 0; y < (int)dst.get_height(); y++)
146
		{
147
			for (uint32_t x = 0; x < dst.get_width(); x++)
148
			{
149
				const vec4F &p = src(x, y);
150

151
				if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f))
152
					dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]);
153
				else
154
					dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3]));
155
			}
156
		}
157
	}
158

159
	void mul_image(const imagef &src, imagef &dst, const vec4F &mul)
160
	{
161
		dst.resize(src);
162

163
//#pragma omp parallel for
164
		for (int y = 0; y < (int)dst.get_height(); y++)
165
		{
166
			for (uint32_t x = 0; x < dst.get_width(); x++)
167
			{
168
				const vec4F &p = src(x, y);
169
				dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]);
170
			}
171
		}
172
	}
173

174
	void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift)
175
	{
176
		dst.resize(src);
177

178
//#pragma omp parallel for
179
		for (int y = 0; y < (int)dst.get_height(); y++)
180
		{
181
			for (uint32_t x = 0; x < dst.get_width(); x++)
182
			{
183
				const vec4F &p = src(x, y);
184

185
				vec4F d;
186

187
				for (uint32_t c = 0; c < 4; c++)
188
					d[c] = scale[c] * p[c] + shift[c];
189

190
				dst(x, y).set(d[0], d[1], d[2], d[3]);
191
			}
192
		}
193
	}
194

195
	void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst)
196
	{
197
		dst.resize(src1);
198

199
//#pragma omp parallel for
200
		for (int y = 0; y < (int)dst.get_height(); y++)
201
		{
202
			for (uint32_t x = 0; x < dst.get_width(); x++)
203
			{
204
				const vec4F &s1 = src1(x, y);
205
				const vec4F &s2 = src2(x, y);
206

207
				dst(x, y).set(
208
					s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0],
209
					s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1],
210
					s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2],
211
					s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]);
212
			}
213
		}
214
	}
215

216
	void add_image(const imagef &src1, const imagef &src2, imagef &dst)
217
	{
218
		dst.resize(src1);
219

220
//#pragma omp parallel for
221
		for (int y = 0; y < (int)dst.get_height(); y++)
222
		{
223
			for (uint32_t x = 0; x < dst.get_width(); x++)
224
			{
225
				const vec4F &s1 = src1(x, y);
226
				const vec4F &s2 = src2(x, y);
227

228
				dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]);
229
			}
230
		}
231
	}
232

233
	void adds_image(const imagef &src, const vec4F &value, imagef &dst)
234
	{
235
		dst.resize(src);
236

237
//#pragma omp parallel for
238
		for (int y = 0; y < (int)dst.get_height(); y++)
239
		{
240
			for (uint32_t x = 0; x < dst.get_width(); x++)
241
			{
242
				const vec4F &p = src(x, y);
243

244
				dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]);
245
			}
246
		}
247
	}
248

249
	void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)
250
	{
251
		dst.resize(src1);
252

253
//#pragma omp parallel for
254
		for (int y = 0; y < (int)dst.get_height(); y++)
255
		{
256
			for (uint32_t x = 0; x < dst.get_width(); x++)
257
			{
258
				const vec4F &s1 = src1(x, y);
259
				const vec4F &s2 = src2(x, y);
260

261
				vec4F d;
262

263
				for (uint32_t c = 0; c < 4; c++)
264
				{
265
					float v1 = s1[c];
266
					float v2 = s2[c];
267
					d[c] = v1 * v2 * scale[c];
268
				}
269

270
				dst(x, y) = d;
271
			}
272
		}
273
	}
274

275
	void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)
276
	{
277
		dst.resize(src1);
278

279
//#pragma omp parallel for
280
		for (int y = 0; y < (int)dst.get_height(); y++)
281
		{
282
			for (uint32_t x = 0; x < dst.get_width(); x++)
283
			{
284
				const vec4F &s1 = src1(x, y);
285
				const vec4F &s2 = src2(x, y);
286

287
				vec4F d;
288

289
				for (uint32_t c = 0; c < 4; c++)
290
				{
291
					float v = s2[c];
292
					if (v == 0.0f)
293
						d[c] = 0.0f;
294
					else
295
						d[c] = (s1[c] * scale[c]) / v;
296
				}
297

298
				dst(x, y) = d;
299
			}
300
		}
301
	}
302

303
	vec4F avg_image(const imagef &src)
304
	{
305
		vec4F avg(0.0f);
306

307
		for (uint32_t y = 0; y < src.get_height(); y++)
308
		{
309
			for (uint32_t x = 0; x < src.get_width(); x++)
310
			{
311
				const vec4F &s = src(x, y);
312

313
				avg += vec4F(s[0], s[1], s[2], s[3]);
314
			}
315
		}
316

317
		avg /= static_cast<float>(src.get_total_pixels());
318

319
		return avg;
320
	}
321
		
322
	// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html
323
	vec4F compute_ssim(const imagef &a, const imagef &b)
324
	{
325
		imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3;
326

327
		const float C1 = 6.50250f, C2 = 58.52250f;
328
				
329
		pow_image(a, a_sq, vec4F(2));
330
		pow_image(b, b_sq, vec4F(2));
331
		mul_image(a, b, axb, vec4F(1.0f));
332

333
		gaussian_filter(mu1, a, 11, 1.5f * 1.5f);
334
		gaussian_filter(mu2, b, 11, 1.5f * 1.5f);
335

336
		pow_image(mu1, mu1_sq, vec4F(2));
337
		pow_image(mu2, mu2_sq, vec4F(2));
338
		mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f));
339

340
		gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f);
341
		add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq);
342

343
		gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f);
344
		add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq);
345

346
		gaussian_filter(s12, axb, 11, 1.5f * 1.5f);
347
		add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12);
348

349
		scale_image(mu1_mu2, t1, vec4F(2), vec4F(0));
350
		adds_image(t1, vec4F(C1), t1);
351

352
		scale_image(s12, t2, vec4F(2), vec4F(0));
353
		adds_image(t2, vec4F(C2), t2);
354

355
		mul_image(t1, t2, t3, vec4F(1));
356

357
		add_image(mu1_sq, mu2_sq, t1);
358
		adds_image(t1, vec4F(C1), t1);
359

360
		add_image(s1_sq, s2_sq, t2);
361
		adds_image(t2, vec4F(C2), t2);
362

363
		mul_image(t1, t2, t1, vec4F(1));
364

365
		div_image(t3, t1, smap, vec4F(1));
366

367
		return avg_image(smap);
368
	}
369

370
	vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601)
371
	{
372
		image ta(a), tb(b);
373

374
		if ((ta.get_width() != tb.get_width()) || (ta.get_height() != tb.get_height()))
375
		{
376
			debug_printf("compute_ssim: Cropping input images to equal dimensions\n");
377

378
			const uint32_t w = minimum(a.get_width(), b.get_width());
379
			const uint32_t h = minimum(a.get_height(), b.get_height());
380
			ta.crop(w, h);
381
			tb.crop(w, h);
382
		}
383

384
		if (!ta.get_width() || !ta.get_height())
385
		{
386
			assert(0);
387
			return vec4F(0);
388
		}
389

390
		if (luma)
391
		{
392
			for (uint32_t y = 0; y < ta.get_height(); y++)
393
			{
394
				for (uint32_t x = 0; x < ta.get_width(); x++)
395
				{
396
					ta(x, y).set(ta(x, y).get_luma(luma_601), ta(x, y).a);
397
					tb(x, y).set(tb(x, y).get_luma(luma_601), tb(x, y).a);
398
				}
399
			}
400
		}
401

402
		imagef fta, ftb;
403

404
		fta.set(ta);
405
		ftb.set(tb);
406

407
		return compute_ssim(fta, ftb);
408
	}
409

410
} // namespace basisu
411

412
Product

Resources

Company