Path: blob/master/src/common-tests/gsvector_yuvtorgb_test.cpp
4211 views
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>1// SPDX-License-Identifier: CC-BY-NC-ND-4.023#include "common/bitutils.h"4#include "common/gsvector.h"56#include <gtest/gtest.h>78#include <algorithm>9#include <array>1011static void YUVToRGB_Vector(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,12const std::array<s16, 64>& Yblk, u32* output, bool signed_output)13{14const GSVector4i addval = signed_output ? GSVector4i::cxpr(0) : GSVector4i::cxpr(0x80808080);15for (u32 y = 0; y < 8; y++)16{17const GSVector4i Cr = GSVector4i::loadl<false>(&Crblk[(y / 2) * 8]).s16to32();18const GSVector4i Cb = GSVector4i::loadl<false>(&Cbblk[(y / 2) * 8]).s16to32();19const GSVector4i Y = GSVector4i::load<true>(&Yblk[y * 8]);2021// BT.601 YUV->RGB coefficients, rounding formula from Mednafen.22// r = clamp(sext9(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;23// g = clamp(sext9(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) + addval24// b = clamp(sext9<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval2526// Need to do the multiply as 32-bit, since 127 * 359 is greater than INT16_MAX.27// upl16(self) = interleave XYZW0000 -> XXYYZZWW.28const GSVector4i Crmul = Cr.mul32l(GSVector4i::cxpr(359)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();29const GSVector4i Cbmul = Cb.mul32l(GSVector4i::cxpr(454)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();30const GSVector4i CrCbmul = (Cb.mul32l(GSVector4i::cxpr(-88)) & GSVector4i::cxpr(~0x1F))31.add32(Cr.mul32l(GSVector4i::cxpr(-183)) & GSVector4i::cxpr(~0x07))32.add32(GSVector4i::cxpr(0x80))33.sra32<8>()34.ps32();35const GSVector4i r = Crmul.upl16(Crmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);36const GSVector4i g = CrCbmul.upl16(CrCbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);37const GSVector4i b = Cbmul.upl16(Cbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);38const GSVector4i rg = r.upl8(g);39const GSVector4i b0 = b.upl8();40const GSVector4i rgblow = rg.upl16(b0);41const GSVector4i rgbhigh = rg.uph16(b0);4243GSVector4i::store<false>(&output[y * 8 + 0], rgblow);44GSVector4i::store<false>(&output[y * 8 + 4], rgbhigh);45}46}4748static void YUVToRGB_Scalar(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,49const std::array<s16, 64>& Yblk, u32* output, bool signed_output)50{51const s32 addval = signed_output ? 0 : 0x80;52for (u32 y = 0; y < 8; y++)53{54for (u32 x = 0; x < 8; x++)55{56const s32 Cr = Crblk[(x / 2) + (y / 2) * 8];57const s32 Cb = Cbblk[(x / 2) + (y / 2) * 8];58const s32 Y = Yblk[x + y * 8];5960// BT.601 YUV->RGB coefficients, rounding from Mednafen.61const s32 r = std::clamp(SignExtendN<9, s32>(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;62const s32 g =63std::clamp(SignExtendN<9, s32>(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) +64addval;65const s32 b = std::clamp(SignExtendN<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval;6667output[y * 8 + x] =68static_cast<u32>(Truncate8(r)) | (static_cast<u32>(Truncate8(g)) << 8) | (static_cast<u32>(Truncate8(b)) << 16);69}70}71}7273TEST(GSVector, YUVToRGB)74{75alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;76alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;77alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;78for (s16 i = -128; i < 128; i++)79{80for (u32 j = 0; j < 64; j++)81crblk[j] = i;8283for (s16 k = -128; k < 128; k++)84{85for (u32 j = 0; j < 64; j++)86cbblk[j] = k;8788for (s16 l = -128; l < 128; l++)89{90for (u32 j = 0; j < 64; j++)91yblk[j] = l;9293alignas(VECTOR_ALIGNMENT) u32 rows[64];94YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false);9596alignas(VECTOR_ALIGNMENT) u32 rowv[64];97YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false);98ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);99100YUVToRGB_Scalar(crblk, cbblk, yblk, rows, true);101YUVToRGB_Vector(crblk, cbblk, yblk, rowv, true);102ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);103}104}105}106}107108#if 0109// Performance test110alignas(VECTOR_ALIGNMENT) u32 g_gsvector_yuvtorgb_temp[64];111112TEST(GSVector, YUVToRGB_Scalar)113{114alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;115alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;116alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;117for (s16 i = -128; i < 128; i++)118{119for (u32 j = 0; j < 64; j++)120crblk[j] = i;121122for (s16 k = -128; k < 128; k++)123{124for (u32 j = 0; j < 64; j++)125cbblk[j] = k;126127for (s16 l = -128; l < 128; l++)128{129for (u32 j = 0; j < 64; j++)130yblk[j] = l;131132YUVToRGB_Scalar(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);133}134}135}136}137138TEST(GSVector, YUVToRGB_Vector)139{140alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;141alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;142alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;143for (s16 i = -128; i < 128; i++)144{145for (u32 j = 0; j < 64; j++)146crblk[j] = i;147148for (s16 k = -128; k < 128; k++)149{150for (u32 j = 0; j < 64; j++)151cbblk[j] = k;152153for (s16 l = -128; l < 128; l++)154{155for (u32 j = 0; j < 64; j++)156yblk[j] = l;157158YUVToRGB_Vector(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);159}160}161}162}163164#endif165166167