Path: blob/master/thirdparty/basis_universal/encoder/basisu_ssim.cpp
9903 views
// basisu_ssim.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#include "basisu_ssim.h"1516#ifndef M_PI17#define M_PI 3.1415926535897932384618#endif1920namespace basisu21{22float gauss(int x, int y, float sigma_sqr)23{24float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr)));25float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow;26return g;27}2829// size_x/y should be odd30void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags)31{32assert(size_x & size_y & 1);3334if (!(size_x | size_y))35return;3637int mid_x = size_x / 2;38int mid_y = size_y / 2;3940double sum = 0;41for (int x = 0; x < size_x; x++)42{43for (int y = 0; y < size_y; y++)44{45float g;46if ((x > mid_x) && (y < mid_y))47g = pDst[(size_x - x - 1) + y * size_x];48else if ((x < mid_x) && (y > mid_y))49g = pDst[x + (size_y - y - 1) * size_x];50else if ((x > mid_x) && (y > mid_y))51g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x];52else53g = gauss(x - mid_x, y - mid_y, sigma_sqr);5455pDst[x + y * size_x] = g;56sum += g;57}58}5960if (flags & cComputeGaussianFlagNormalizeCenterToOne)61{62sum = pDst[mid_x + mid_y * size_x];63}6465if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize))66{67double one_over_sum = 1.0f / sum;68for (int i = 0; i < size_x * size_y; i++)69pDst[i] = static_cast<float>(pDst[i] * one_over_sum);7071if (flags & cComputeGaussianFlagNormalizeCenterToOne)72pDst[mid_x + mid_y * size_x] = 1.0f;73}7475if (flags & cComputeGaussianFlagPrint)76{77printf("{\n");78for (int y = 0; y < size_y; y++)79{80printf(" ");81for (int x = 0; x < size_x; x++)82{83printf("%f, ", pDst[x + y * size_x]);84}85printf("\n");86}87printf("}");88}89}9091void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor)92{93assert(&dst != &orig_img);9495assert(odd_filter_width && (odd_filter_width & 1));96odd_filter_width |= 1;9798vector2D<float> kernel(odd_filter_width, odd_filter_width);99compute_gaussian_kernel(kernel.get_ptr(), odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize);100101const int dst_width = orig_img.get_width() / width_divisor;102const int dst_height = orig_img.get_height() / height_divisor;103104const int H = odd_filter_width / 2;105const int L = -H;106107dst.crop(dst_width, dst_height);108109//#pragma omp parallel for110for (int oy = 0; oy < dst_height; oy++)111{112for (int ox = 0; ox < dst_width; ox++)113{114vec4F c(0.0f);115116for (int yd = L; yd <= H; yd++)117{118int y = oy * height_divisor + (height_divisor >> 1) + yd;119120for (int xd = L; xd <= H; xd++)121{122int x = ox * width_divisor + (width_divisor >> 1) + xd;123124const vec4F &p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping);125126float w = kernel(xd + H, yd + H);127c[0] += p[0] * w;128c[1] += p[1] * w;129c[2] += p[2] * w;130c[3] += p[3] * w;131}132}133134dst(ox, oy).set(c[0], c[1], c[2], c[3]);135}136}137}138139void pow_image(const imagef &src, imagef &dst, const vec4F &power)140{141dst.resize(src);142143//#pragma omp parallel for144for (int y = 0; y < (int)dst.get_height(); y++)145{146for (uint32_t x = 0; x < dst.get_width(); x++)147{148const vec4F &p = src(x, y);149150if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f))151dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]);152else153dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3]));154}155}156}157158void mul_image(const imagef &src, imagef &dst, const vec4F &mul)159{160dst.resize(src);161162//#pragma omp parallel for163for (int y = 0; y < (int)dst.get_height(); y++)164{165for (uint32_t x = 0; x < dst.get_width(); x++)166{167const vec4F &p = src(x, y);168dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]);169}170}171}172173void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift)174{175dst.resize(src);176177//#pragma omp parallel for178for (int y = 0; y < (int)dst.get_height(); y++)179{180for (uint32_t x = 0; x < dst.get_width(); x++)181{182const vec4F &p = src(x, y);183184vec4F d;185186for (uint32_t c = 0; c < 4; c++)187d[c] = scale[c] * p[c] + shift[c];188189dst(x, y).set(d[0], d[1], d[2], d[3]);190}191}192}193194void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst)195{196dst.resize(src1);197198//#pragma omp parallel for199for (int y = 0; y < (int)dst.get_height(); y++)200{201for (uint32_t x = 0; x < dst.get_width(); x++)202{203const vec4F &s1 = src1(x, y);204const vec4F &s2 = src2(x, y);205206dst(x, y).set(207s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0],208s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1],209s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2],210s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]);211}212}213}214215void add_image(const imagef &src1, const imagef &src2, imagef &dst)216{217dst.resize(src1);218219//#pragma omp parallel for220for (int y = 0; y < (int)dst.get_height(); y++)221{222for (uint32_t x = 0; x < dst.get_width(); x++)223{224const vec4F &s1 = src1(x, y);225const vec4F &s2 = src2(x, y);226227dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]);228}229}230}231232void adds_image(const imagef &src, const vec4F &value, imagef &dst)233{234dst.resize(src);235236//#pragma omp parallel for237for (int y = 0; y < (int)dst.get_height(); y++)238{239for (uint32_t x = 0; x < dst.get_width(); x++)240{241const vec4F &p = src(x, y);242243dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]);244}245}246}247248void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)249{250dst.resize(src1);251252//#pragma omp parallel for253for (int y = 0; y < (int)dst.get_height(); y++)254{255for (uint32_t x = 0; x < dst.get_width(); x++)256{257const vec4F &s1 = src1(x, y);258const vec4F &s2 = src2(x, y);259260vec4F d;261262for (uint32_t c = 0; c < 4; c++)263{264float v1 = s1[c];265float v2 = s2[c];266d[c] = v1 * v2 * scale[c];267}268269dst(x, y) = d;270}271}272}273274void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale)275{276dst.resize(src1);277278//#pragma omp parallel for279for (int y = 0; y < (int)dst.get_height(); y++)280{281for (uint32_t x = 0; x < dst.get_width(); x++)282{283const vec4F &s1 = src1(x, y);284const vec4F &s2 = src2(x, y);285286vec4F d;287288for (uint32_t c = 0; c < 4; c++)289{290float v = s2[c];291if (v == 0.0f)292d[c] = 0.0f;293else294d[c] = (s1[c] * scale[c]) / v;295}296297dst(x, y) = d;298}299}300}301302vec4F avg_image(const imagef &src)303{304vec4F avg(0.0f);305306for (uint32_t y = 0; y < src.get_height(); y++)307{308for (uint32_t x = 0; x < src.get_width(); x++)309{310const vec4F &s = src(x, y);311312avg += vec4F(s[0], s[1], s[2], s[3]);313}314}315316avg /= static_cast<float>(src.get_total_pixels());317318return avg;319}320321// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html322vec4F compute_ssim(const imagef &a, const imagef &b)323{324imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3;325326const float C1 = 6.50250f, C2 = 58.52250f;327328pow_image(a, a_sq, vec4F(2));329pow_image(b, b_sq, vec4F(2));330mul_image(a, b, axb, vec4F(1.0f));331332gaussian_filter(mu1, a, 11, 1.5f * 1.5f);333gaussian_filter(mu2, b, 11, 1.5f * 1.5f);334335pow_image(mu1, mu1_sq, vec4F(2));336pow_image(mu2, mu2_sq, vec4F(2));337mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f));338339gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f);340add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq);341342gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f);343add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq);344345gaussian_filter(s12, axb, 11, 1.5f * 1.5f);346add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12);347348scale_image(mu1_mu2, t1, vec4F(2), vec4F(0));349adds_image(t1, vec4F(C1), t1);350351scale_image(s12, t2, vec4F(2), vec4F(0));352adds_image(t2, vec4F(C2), t2);353354mul_image(t1, t2, t3, vec4F(1));355356add_image(mu1_sq, mu2_sq, t1);357adds_image(t1, vec4F(C1), t1);358359add_image(s1_sq, s2_sq, t2);360adds_image(t2, vec4F(C2), t2);361362mul_image(t1, t2, t1, vec4F(1));363364div_image(t3, t1, smap, vec4F(1));365366return avg_image(smap);367}368369vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601)370{371image ta(a), tb(b);372373if ((ta.get_width() != tb.get_width()) || (ta.get_height() != tb.get_height()))374{375debug_printf("compute_ssim: Cropping input images to equal dimensions\n");376377const uint32_t w = minimum(a.get_width(), b.get_width());378const uint32_t h = minimum(a.get_height(), b.get_height());379ta.crop(w, h);380tb.crop(w, h);381}382383if (!ta.get_width() || !ta.get_height())384{385assert(0);386return vec4F(0);387}388389if (luma)390{391for (uint32_t y = 0; y < ta.get_height(); y++)392{393for (uint32_t x = 0; x < ta.get_width(); x++)394{395ta(x, y).set(ta(x, y).get_luma(luma_601), ta(x, y).a);396tb(x, y).set(tb(x, y).get_luma(luma_601), tb(x, y).a);397}398}399}400401imagef fta, ftb;402403fta.set(ta);404ftb.set(tb);405406return compute_ssim(fta, ftb);407}408409} // namespace basisu410411412