CoCalc -- gsvector_yuvtorgb

GitHub Repository: stenzek/duckstation
Path: blob/master/src/common-tests/gsvector_yuvtorgb_test.cpp
⁷⁴²⁹ views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3

4
#include "common/bitutils.h"
5
#include "common/gsvector.h"
6

7
#include <gtest/gtest.h>
8

9
#include <algorithm>
10
#include <array>
11

12
static void YUVToRGB_Vector(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,
13
                            const std::array<s16, 64>& Yblk, u32* output, bool signed_output)
14
{
15
  const GSVector4i addval = signed_output ? GSVector4i::cxpr(0) : GSVector4i::cxpr(0x80808080);
16
  for (u32 y = 0; y < 8; y++)
17
  {
18
    const GSVector4i Cr = GSVector4i::loadl<false>(&Crblk[(y / 2) * 8]).s16to32();
19
    const GSVector4i Cb = GSVector4i::loadl<false>(&Cbblk[(y / 2) * 8]).s16to32();
20
    const GSVector4i Y = GSVector4i::load<true>(&Yblk[y * 8]);
21

22
    // BT.601 YUV->RGB coefficients, rounding formula from Mednafen.
23
    // r = clamp(sext9(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;
24
    // g = clamp(sext9(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) + addval
25
    // b = clamp(sext9<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval
26

27
    // Need to do the multiply as 32-bit, since 127 * 359 is greater than INT16_MAX.
28
    // upl16(self) = interleave XYZW0000 -> XXYYZZWW.
29
    const GSVector4i Crmul = Cr.mul32l(GSVector4i::cxpr(359)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();
30
    const GSVector4i Cbmul = Cb.mul32l(GSVector4i::cxpr(454)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();
31
    const GSVector4i CrCbmul = (Cb.mul32l(GSVector4i::cxpr(-88)) & GSVector4i::cxpr(~0x1F))
32
                                 .add32(Cr.mul32l(GSVector4i::cxpr(-183)) & GSVector4i::cxpr(~0x07))
33
                                 .add32(GSVector4i::cxpr(0x80))
34
                                 .sra32<8>()
35
                                 .ps32();
36
    const GSVector4i r = Crmul.upl16(Crmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
37
    const GSVector4i g = CrCbmul.upl16(CrCbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
38
    const GSVector4i b = Cbmul.upl16(Cbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
39
    const GSVector4i rg = r.upl8(g);
40
    const GSVector4i b0 = b.upl8();
41
    const GSVector4i rgblow = rg.upl16(b0);
42
    const GSVector4i rgbhigh = rg.uph16(b0);
43

44
    GSVector4i::store<false>(&output[y * 8 + 0], rgblow);
45
    GSVector4i::store<false>(&output[y * 8 + 4], rgbhigh);
46
  }
47
}
48

49
static void YUVToRGB_Scalar(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,
50
                            const std::array<s16, 64>& Yblk, u32* output, bool signed_output)
51
{
52
  const s32 addval = signed_output ? 0 : 0x80;
53
  for (u32 y = 0; y < 8; y++)
54
  {
55
    for (u32 x = 0; x < 8; x++)
56
    {
57
      const s32 Cr = Crblk[(x / 2) + (y / 2) * 8];
58
      const s32 Cb = Cbblk[(x / 2) + (y / 2) * 8];
59
      const s32 Y = Yblk[x + y * 8];
60

61
      // BT.601 YUV->RGB coefficients, rounding from Mednafen.
62
      const s32 r = std::clamp(SignExtendN<9, s32>(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;
63
      const s32 g =
64
        std::clamp(SignExtendN<9, s32>(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) +
65
        addval;
66
      const s32 b = std::clamp(SignExtendN<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval;
67

68
      output[y * 8 + x] =
69
        static_cast<u32>(Truncate8(r)) | (static_cast<u32>(Truncate8(g)) << 8) | (static_cast<u32>(Truncate8(b)) << 16);
70
    }
71
  }
72
}
73

74
TEST(GSVector, YUVToRGB)
75
{
76
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
77
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
78
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
79
  for (s16 i = -128; i < 128; i++)
80
  {
81
    for (u32 j = 0; j < 64; j++)
82
      crblk[j] = i;
83

84
    for (s16 k = -128; k < 128; k++)
85
    {
86
      for (u32 j = 0; j < 64; j++)
87
        cbblk[j] = k;
88

89
      for (s16 l = -128; l < 128; l++)
90
      {
91
        for (u32 j = 0; j < 64; j++)
92
          yblk[j] = l;
93

94
        alignas(VECTOR_ALIGNMENT) u32 rows[64];
95
        YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false);
96

97
        alignas(VECTOR_ALIGNMENT) u32 rowv[64];
98
        YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false);
99
        ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
100

101
        YUVToRGB_Scalar(crblk, cbblk, yblk, rows, true);
102
        YUVToRGB_Vector(crblk, cbblk, yblk, rowv, true);
103
        ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
104
      }
105
    }
106
  }
107
}
108

109
#if 0
110
// Performance test
111
alignas(VECTOR_ALIGNMENT) u32 g_gsvector_yuvtorgb_temp[64];
112

113
TEST(GSVector, YUVToRGB_Scalar)
114
{
115
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
116
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
117
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
118
  for (s16 i = -128; i < 128; i++)
119
  {
120
    for (u32 j = 0; j < 64; j++)
121
      crblk[j] = i;
122

123
    for (s16 k = -128; k < 128; k++)
124
    {
125
      for (u32 j = 0; j < 64; j++)
126
        cbblk[j] = k;
127

128
      for (s16 l = -128; l < 128; l++)
129
      {
130
        for (u32 j = 0; j < 64; j++)
131
          yblk[j] = l;
132

133
        YUVToRGB_Scalar(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);
134
      }
135
    }
136
  }
137
}
138

139
TEST(GSVector, YUVToRGB_Vector)
140
{
141
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
142
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
143
  alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
144
  for (s16 i = -128; i < 128; i++)
145
  {
146
    for (u32 j = 0; j < 64; j++)
147
      crblk[j] = i;
148

149
    for (s16 k = -128; k < 128; k++)
150
    {
151
      for (u32 j = 0; j < 64; j++)
152
        cbblk[j] = k;
153

154
      for (s16 l = -128; l < 128; l++)
155
      {
156
        for (u32 j = 0; j < 64; j++)
157
          yblk[j] = l;
158

159
        YUVToRGB_Vector(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);
160
      }
161
    }
162
  }
163
}
164

165
#endif
166

167
Product

Resources

Company