Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/common/gsvector_nosimd.h
4214 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
// Implementation of GSVector4/GSVector4i when the host does not support any form of SIMD.
5
6
#pragma once
7
8
#include "common/types.h"
9
10
#include <algorithm>
11
#include <cmath>
12
#include <cstring>
13
14
#define GSVECTOR_HAS_SRLV 1
15
16
class GSVector2;
17
class GSVector2i;
18
class GSVector4;
19
class GSVector4i;
20
21
#define SSATURATE8(expr) static_cast<s8>(std::clamp<decltype(expr)>(expr, -128, 127))
22
#define USATURATE8(expr) static_cast<u8>(std::clamp<decltype(expr)>(expr, 0, 255))
23
#define SSATURATE16(expr) static_cast<s16>(std::clamp<decltype(expr)>(expr, -32768, 32767))
24
#define USATURATE16(expr) static_cast<u16>(std::clamp<decltype(expr)>(expr, 0, 65535))
25
26
#define ALL_LANES_8(expr) \
27
GSVector2i ret; \
28
for (size_t i = 0; i < 8; i++) \
29
expr; \
30
return ret;
31
#define ALL_LANES_16(expr) \
32
GSVector2i ret; \
33
for (size_t i = 0; i < 4; i++) \
34
expr; \
35
return ret;
36
#define ALL_LANES_32(expr) \
37
GSVector2i ret; \
38
for (size_t i = 0; i < 2; i++) \
39
expr; \
40
return ret;
41
42
class alignas(16) GSVector2i
43
{
44
struct cxpr_init_tag
45
{
46
};
47
static constexpr cxpr_init_tag cxpr_init{};
48
49
constexpr GSVector2i(cxpr_init_tag, s32 x, s32 y) : S32{x, y} {}
50
51
constexpr GSVector2i(cxpr_init_tag, s16 s0, s16 s1, s16 s2, s16 s3) : S16{s0, s1, s2, s3} {}
52
53
constexpr GSVector2i(cxpr_init_tag, s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7)
54
: S8{b0, b1, b2, b3, b4, b5, b6, b7}
55
{
56
}
57
58
public:
59
union
60
{
61
struct
62
{
63
s32 x, y;
64
};
65
struct
66
{
67
s32 r, g;
68
};
69
float F32[2];
70
s8 S8[8];
71
s16 S16[4];
72
s32 S32[2];
73
s64 S64[1];
74
u8 U8[8];
75
u16 U16[4];
76
u32 U32[2];
77
u64 U64[1];
78
};
79
80
GSVector2i() = default;
81
82
ALWAYS_INLINE constexpr static GSVector2i cxpr(s32 x, s32 y) { return GSVector2i(cxpr_init, x, y); }
83
84
ALWAYS_INLINE constexpr static GSVector2i cxpr(s32 x) { return GSVector2i(cxpr_init, x, x); }
85
86
ALWAYS_INLINE constexpr static GSVector2i cxpr16(s16 x) { return GSVector2i(cxpr_init, x, x, x, x); }
87
88
ALWAYS_INLINE constexpr static GSVector2i cxpr16(s16 s0, s16 s1, s16 s2, s16 s3)
89
{
90
return GSVector2i(cxpr_init, s0, s1, s2, s3);
91
}
92
93
ALWAYS_INLINE constexpr static GSVector2i cxpr8(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7)
94
{
95
return GSVector2i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7);
96
}
97
98
ALWAYS_INLINE GSVector2i(s32 x, s32 y)
99
{
100
this->x = x;
101
this->y = y;
102
}
103
104
ALWAYS_INLINE GSVector2i(s16 s0, s16 s1, s16 s2, s16 s3)
105
{
106
S16[0] = s0;
107
S16[1] = s1;
108
S16[2] = s2;
109
S16[3] = s3;
110
}
111
112
ALWAYS_INLINE constexpr GSVector2i(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7)
113
: S8{b0, b1, b2, b3, b4, b5, b6, b7}
114
{
115
}
116
117
ALWAYS_INLINE GSVector2i(const GSVector2i& v) { std::memcpy(S32, v.S32, sizeof(S32)); }
118
119
ALWAYS_INLINE explicit GSVector2i(s32 i) { *this = i; }
120
121
ALWAYS_INLINE explicit GSVector2i(const GSVector2& v);
122
123
ALWAYS_INLINE static GSVector2i cast(const GSVector2& v);
124
125
ALWAYS_INLINE void operator=(const GSVector2i& v) { std::memcpy(S32, v.S32, sizeof(S32)); }
126
ALWAYS_INLINE void operator=(s32 i)
127
{
128
x = i;
129
y = i;
130
}
131
132
ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const
133
{
134
return max_s8(min).min_s8(max);
135
}
136
ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const
137
{
138
return max_s16(min).min_s16(max);
139
}
140
ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const
141
{
142
return max_s32(min).min_s32(max);
143
}
144
145
ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const
146
{
147
return max_u8(min).min_u8(max);
148
}
149
ALWAYS_INLINE GSVector2i sat_u16(const GSVector2i& min, const GSVector2i& max) const
150
{
151
return max_u16(min).min_u16(max);
152
}
153
ALWAYS_INLINE GSVector2i sat_u32(const GSVector2i& min, const GSVector2i& max) const
154
{
155
return max_u32(min).min_u32(max);
156
}
157
158
GSVector2i min_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
159
GSVector2i max_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
160
GSVector2i min_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
161
GSVector2i max_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
162
GSVector2i min_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
163
GSVector2i max_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
164
165
GSVector2i min_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); }
166
GSVector2i max_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); }
167
GSVector2i min_u16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = std::min(U16[i], v.U16[i])); }
168
GSVector2i max_u16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = std::max(U16[i], v.U16[i])); }
169
GSVector2i min_u32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = std::min(U32[i], v.U32[i])); }
170
GSVector2i max_u32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = std::max(U32[i], v.U32[i])); }
171
172
s32 addv_s32() const { return (S32[0] + S32[1]); }
173
174
u8 minv_u8() const
175
{
176
return std::min(
177
U8[0],
178
std::min(U8[1], std::min(U8[2], std::min(U8[3], std::min(U8[4], std::min(U8[5], std::min(U8[6], U8[7])))))));
179
}
180
181
u16 maxv_u8() const
182
{
183
return std::max(
184
U8[0],
185
std::max(U8[1], std::max(U8[2], std::max(U8[3], std::max(U8[4], std::max(U8[5], std::max(U8[6], U8[7])))))));
186
}
187
188
u16 minv_u16() const { return std::min(U16[0], std::min(U16[1], std::min(U16[2], U16[3]))); }
189
190
u16 maxv_u16() const { return std::max(U16[0], std::max(U16[1], std::max(U16[2], U16[3]))); }
191
192
s32 minv_s32() const { return std::min(x, y); }
193
194
u32 minv_u32() const { return std::min(U32[0], U32[1]); }
195
196
s32 maxv_s32() const { return std::max(x, y); }
197
198
u32 maxv_u32() const { return std::max(U32[0], U32[1]); }
199
200
ALWAYS_INLINE GSVector2i clamp8() const { return pu16().upl8(); }
201
202
GSVector2i blend8(const GSVector2i& v, const GSVector2i& mask) const
203
{
204
GSVector2i ret;
205
for (size_t i = 0; i < 8; i++)
206
ret.U8[i] = (mask.U8[i] & 0x80) ? v.U8[i] : U8[i];
207
return ret;
208
}
209
210
template<s32 mask>
211
GSVector2i blend16(const GSVector2i& v) const
212
{
213
GSVector2i ret;
214
for (size_t i = 0; i < 4; i++)
215
ret.U16[i] = ((mask & (1 << i)) != 0) ? v.U16[i] : U16[i];
216
return ret;
217
}
218
219
template<s32 mask>
220
GSVector2i blend32(const GSVector2i& v) const
221
{
222
GSVector2i ret;
223
for (size_t i = 0; i < 2; i++)
224
ret.U32[i] = ((mask & (1 << i)) != 0) ? v.U32[i] : U32[i];
225
return ret;
226
}
227
228
GSVector2i blend(const GSVector2i& v, const GSVector2i& mask) const
229
{
230
GSVector2i ret;
231
ret.U64[0] = (v.U64[0] & mask.U64[0]);
232
return ret;
233
}
234
235
GSVector2i shuffle8(const GSVector2i& mask) const
236
{
237
ALL_LANES_8(ret.S8[i] = (mask.S8[i] & 0x80) ? 0 : (S8[mask.S8[i] & 0xf]));
238
}
239
240
GSVector2i ps16() const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S16[(i < 4) ? i : (i - 4)])); }
241
GSVector2i pu16() const { ALL_LANES_8(ret.U8[i] = USATURATE8(U16[(i < 4) ? i : (i - 4)])); }
242
GSVector2i ps32() const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S32[(i < 2) ? i : (i - 2)])); }
243
GSVector2i pu32() const { ALL_LANES_16(ret.U16[i] = USATURATE16(U32[(i < 2) ? i : (i - 2)])); }
244
245
GSVector2i upl8() const { return GSVector2i(S8[0], 0, S8[1], 0, S8[2], 0, S8[3], 0); }
246
247
GSVector2i upl16() const { return GSVector2i(S16[0], 0, S16[1], 0); }
248
249
GSVector2i upl32() const { return GSVector2i(S32[0], 0); }
250
251
GSVector2i s8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); }
252
GSVector2i s8to32() const { ALL_LANES_32(ret.S32[i] = S8[i]); }
253
GSVector2i u8to16() const { ALL_LANES_16(ret.U16[i] = U8[i]); }
254
GSVector2i u8to32() const { ALL_LANES_32(ret.U32[i] = U8[i]); }
255
GSVector2i u16to32() const { ALL_LANES_32(ret.U32[i] = U16[i]); }
256
GSVector2i s16to32() const { ALL_LANES_32(ret.S32[i] = S16[i]); }
257
258
template<s32 v>
259
GSVector2i srl() const
260
{
261
GSVector2i ret = {};
262
if constexpr (v < 8)
263
{
264
for (s32 i = 0; i < (8 - v); i++)
265
ret.U8[i] = U8[v + i];
266
}
267
return ret;
268
}
269
270
template<s32 v>
271
GSVector2i sll() const
272
{
273
GSVector2i ret = {};
274
if constexpr (v < 8)
275
{
276
for (s32 i = 0; i < (8 - v); i++)
277
ret.U8[v + i] = U8[i];
278
}
279
return ret;
280
}
281
282
template<s32 v>
283
GSVector2i sll16() const
284
{
285
ALL_LANES_16(ret.U16[i] = U16[i] << v);
286
}
287
288
GSVector2i sll16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v); }
289
290
GSVector2i sllv16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v.U16[i]); }
291
292
template<s32 v>
293
GSVector2i srl16() const
294
{
295
ALL_LANES_16(ret.U16[i] = U16[i] >> v);
296
}
297
298
GSVector2i srl16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v); }
299
300
GSVector2i srlv16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v.U16[i]); }
301
302
template<s32 v>
303
GSVector2i sra16() const
304
{
305
ALL_LANES_16(ret.S16[i] = S16[i] >> v);
306
}
307
308
GSVector2i sra16(s32 v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v); }
309
310
GSVector2i srav16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v.S16[i]); }
311
312
template<s32 v>
313
GSVector2i sll32() const
314
{
315
ALL_LANES_32(ret.U32[i] = U32[i] << v);
316
}
317
318
GSVector2i sll32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v); }
319
320
GSVector2i sllv32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v.U32[i]); }
321
322
template<s32 v>
323
GSVector2i srl32() const
324
{
325
ALL_LANES_32(ret.U32[i] = U32[i] >> v);
326
}
327
328
GSVector2i srl32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v); }
329
330
GSVector2i srlv32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v.U32[i]); }
331
332
template<s32 v>
333
GSVector2i sra32() const
334
{
335
ALL_LANES_32(ret.S32[i] = S32[i] >> v);
336
}
337
338
GSVector2i sra32(s32 v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v); }
339
340
GSVector2i srav32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v.S32[i]); }
341
342
GSVector2i add8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] + v.S8[i]); }
343
344
GSVector2i add16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] + v.S16[i]); }
345
346
GSVector2i add32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] + v.S32[i]); }
347
348
GSVector2i adds8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] + v.S8[i])); }
349
350
GSVector2i adds16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] + v.S16[i])); }
351
352
GSVector2i addus8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] + v.U8[i])); }
353
354
GSVector2i addus16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] + v.U16[i])); }
355
356
GSVector2i sub8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] - v.S8[i]); }
357
358
GSVector2i sub16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] - v.S16[i]); }
359
360
GSVector2i sub32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] - v.S32[i]); }
361
362
GSVector2i subs8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] - v.S8[i])); }
363
364
GSVector2i subs16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] - v.S16[i])); }
365
366
GSVector2i subus8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] - v.U8[i])); }
367
368
GSVector2i subus16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] - v.U16[i])); }
369
370
GSVector2i avg8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = (U8[i] + v.U8[i]) >> 1); }
371
372
GSVector2i avg16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = (U16[i] + v.U16[i]) >> 1); }
373
374
GSVector2i mul16l(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] * v.S16[i]); }
375
376
GSVector2i mul32l(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] * v.S32[i]); }
377
378
ALWAYS_INLINE bool eq(const GSVector2i& v) const { return (std::memcmp(S32, v.S32, sizeof(S32))) == 0; }
379
380
GSVector2i eq8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] == v.S8[i]) ? -1 : 0); }
381
GSVector2i eq16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] == v.S16[i]) ? -1 : 0); }
382
GSVector2i eq32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] == v.S32[i]) ? -1 : 0); }
383
384
GSVector2i neq8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] != v.S8[i]) ? -1 : 0); }
385
GSVector2i neq16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] != v.S16[i]) ? -1 : 0); }
386
GSVector2i neq32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] != v.S32[i]) ? -1 : 0); }
387
388
GSVector2i gt8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] > v.S8[i]) ? -1 : 0); }
389
GSVector2i gt16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] > v.S16[i]) ? -1 : 0); }
390
GSVector2i gt32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] > v.S32[i]) ? -1 : 0); }
391
392
GSVector2i ge8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] >= v.S8[i]) ? -1 : 0); }
393
GSVector2i ge16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] >= v.S16[i]) ? -1 : 0); }
394
GSVector2i ge32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] >= v.S32[i]) ? -1 : 0); }
395
396
GSVector2i lt8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] < v.S8[i]) ? -1 : 0); }
397
GSVector2i lt16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] < v.S16[i]) ? -1 : 0); }
398
GSVector2i lt32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] < v.S32[i]) ? -1 : 0); }
399
400
GSVector2i le8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] <= v.S8[i]) ? -1 : 0); }
401
GSVector2i le16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] <= v.S16[i]) ? -1 : 0); }
402
GSVector2i le32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] <= v.S32[i]) ? -1 : 0); }
403
404
ALWAYS_INLINE GSVector2i andnot(const GSVector2i& v) const
405
{
406
GSVector2i ret;
407
ret.U64[0] = (~v.U64[0]) & U64[0];
408
return ret;
409
}
410
411
s32 mask() const
412
{
413
return static_cast<s32>((static_cast<u32>(U8[0] >> 7) << 0) | (static_cast<u32>(U8[1] >> 7) << 1) |
414
(static_cast<u32>(U8[2] >> 7) << 2) | (static_cast<u32>(U8[3] >> 7) << 3) |
415
(static_cast<u32>(U8[4] >> 7) << 4) | (static_cast<u32>(U8[5] >> 7) << 5) |
416
(static_cast<u32>(U8[6] >> 7) << 6) | (static_cast<u32>(U8[7] >> 7) << 7));
417
}
418
419
ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); }
420
421
ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); }
422
423
template<s32 i>
424
ALWAYS_INLINE GSVector2i insert8(s32 a) const
425
{
426
GSVector2i ret = *this;
427
ret.S8[i] = static_cast<s8>(a);
428
return ret;
429
}
430
431
template<s32 i>
432
ALWAYS_INLINE s32 extract8() const
433
{
434
return S8[i];
435
}
436
437
template<s32 i>
438
ALWAYS_INLINE GSVector2i insert16(s32 a) const
439
{
440
GSVector2i ret = *this;
441
ret.S16[i] = static_cast<s16>(a);
442
return ret;
443
}
444
445
template<s32 i>
446
ALWAYS_INLINE s32 extract16() const
447
{
448
return S16[i];
449
}
450
451
template<s32 i>
452
ALWAYS_INLINE GSVector2i insert32(s32 a) const
453
{
454
GSVector2i ret = *this;
455
ret.S32[i] = a;
456
return ret;
457
}
458
459
template<s32 i>
460
ALWAYS_INLINE s32 extract32() const
461
{
462
return S32[i];
463
}
464
465
ALWAYS_INLINE static GSVector2i load32(const void* p)
466
{
467
GSVector2i ret;
468
std::memcpy(&ret.x, p, sizeof(s32));
469
ret.y = 0;
470
return ret;
471
}
472
473
ALWAYS_INLINE static GSVector2i set32(s32 v) { return GSVector2i(v, 0); }
474
475
template<bool aligned>
476
ALWAYS_INLINE static GSVector2i load(const void* p)
477
{
478
GSVector2i ret;
479
std::memcpy(ret.S32, p, sizeof(ret.S32));
480
return ret;
481
}
482
483
template<bool aligned>
484
ALWAYS_INLINE static void store(void* p, const GSVector2i& v)
485
{
486
std::memcpy(p, v.S32, sizeof(S32));
487
}
488
489
ALWAYS_INLINE static void store32(void* p, const GSVector2i& v) { std::memcpy(p, &v.x, sizeof(s32)); }
490
491
ALWAYS_INLINE void operator&=(const GSVector2i& v) { U64[0] &= v.U64[0]; }
492
ALWAYS_INLINE void operator|=(const GSVector2i& v) { U64[0] |= v.U64[0]; }
493
ALWAYS_INLINE void operator^=(const GSVector2i& v) { U64[0] ^= v.U64[0]; }
494
495
ALWAYS_INLINE friend GSVector2i operator&(const GSVector2i& v1, const GSVector2i& v2)
496
{
497
GSVector2i ret;
498
ret.U64[0] = v1.U64[0] & v2.U64[0];
499
return ret;
500
}
501
502
ALWAYS_INLINE friend GSVector2i operator|(const GSVector2i& v1, const GSVector2i& v2)
503
{
504
GSVector2i ret;
505
ret.U64[0] = v1.U64[0] | v2.U64[0];
506
return ret;
507
}
508
509
ALWAYS_INLINE friend GSVector2i operator^(const GSVector2i& v1, const GSVector2i& v2)
510
{
511
GSVector2i ret;
512
ret.U64[0] = v1.U64[0] ^ v2.U64[0];
513
return ret;
514
}
515
516
ALWAYS_INLINE friend GSVector2i operator&(const GSVector2i& v, s32 i) { return v & GSVector2i(i); }
517
518
ALWAYS_INLINE friend GSVector2i operator|(const GSVector2i& v, s32 i) { return v | GSVector2i(i); }
519
520
ALWAYS_INLINE friend GSVector2i operator^(const GSVector2i& v, s32 i) { return v ^ GSVector2i(i); }
521
522
ALWAYS_INLINE friend GSVector2i operator~(const GSVector2i& v) { return v ^ v.eq32(v); }
523
524
ALWAYS_INLINE static constexpr GSVector2i zero() { return GSVector2i::cxpr(0, 0); }
525
526
ALWAYS_INLINE GSVector2i xy() const { return *this; }
527
ALWAYS_INLINE GSVector2i xx() const { return GSVector2i(x, x); }
528
ALWAYS_INLINE GSVector2i yx() const { return GSVector2i(y, x); }
529
ALWAYS_INLINE GSVector2i yy() const { return GSVector2i(y, y); }
530
};
531
532
class alignas(16) GSVector2
533
{
534
struct cxpr_init_tag
535
{
536
};
537
static constexpr cxpr_init_tag cxpr_init{};
538
539
constexpr GSVector2(cxpr_init_tag, float x, float y) : F32{x, y} {}
540
541
constexpr GSVector2(cxpr_init_tag, int x, int y) : I32{x, y} {}
542
543
public:
544
union
545
{
546
struct
547
{
548
float x, y;
549
};
550
struct
551
{
552
float r, g;
553
};
554
float F32[2];
555
double F64[2];
556
s8 I8[16];
557
s16 I16[8];
558
s32 I32[4];
559
s64 I64[2];
560
u8 U8[16];
561
u16 U16[8];
562
u32 U32[4];
563
u64 U64[2];
564
};
565
566
GSVector2() = default;
567
568
constexpr static GSVector2 cxpr(float x, float y) { return GSVector2(cxpr_init, x, y); }
569
570
constexpr static GSVector2 cxpr(float x) { return GSVector2(cxpr_init, x, x); }
571
572
constexpr static GSVector2 cxpr(int x, int y) { return GSVector2(cxpr_init, x, y); }
573
574
constexpr static GSVector2 cxpr(int x) { return GSVector2(cxpr_init, x, x); }
575
576
ALWAYS_INLINE GSVector2(float x, float y)
577
{
578
this->x = x;
579
this->y = y;
580
}
581
582
ALWAYS_INLINE GSVector2(int x, int y)
583
{
584
this->x = static_cast<float>(x);
585
this->y = static_cast<float>(y);
586
}
587
588
ALWAYS_INLINE explicit GSVector2(float f) { x = y = f; }
589
590
ALWAYS_INLINE explicit GSVector2(int i) { x = y = static_cast<float>(i); }
591
592
ALWAYS_INLINE explicit GSVector2(const GSVector2i& v);
593
594
ALWAYS_INLINE static GSVector2 cast(const GSVector2i& v);
595
596
ALWAYS_INLINE void operator=(float f) { x = y = f; }
597
598
GSVector2 abs() const { return GSVector2(std::fabs(x), std::fabs(y)); }
599
600
GSVector2 neg() const { return GSVector2(-x, -y); }
601
602
GSVector2 floor() const { return GSVector2(std::floor(x), std::floor(y)); }
603
604
GSVector2 ceil() const { return GSVector2(std::ceil(x), std::ceil(y)); }
605
606
GSVector2 sat(const GSVector2& min, const GSVector2& max) const
607
{
608
return GSVector2(std::clamp(x, min.x, max.x), std::clamp(y, min.y, max.y));
609
}
610
611
GSVector2 sat(const float scale = 255) const { return sat(zero(), GSVector2(scale)); }
612
613
GSVector2 clamp(const float scale = 255) const { return min(GSVector2(scale)); }
614
615
GSVector2 min(const GSVector2& v) const { return GSVector2(std::min(x, v.x), std::min(y, v.y)); }
616
617
GSVector2 max(const GSVector2& v) const { return GSVector2(std::max(x, v.x), std::max(y, v.y)); }
618
619
template<int mask>
620
GSVector2 blend32(const GSVector2& v) const
621
{
622
return GSVector2(v.F32[mask & 1], v.F32[(mask >> 1) & 1]);
623
}
624
625
ALWAYS_INLINE GSVector2 blend32(const GSVector2& v, const GSVector2& mask) const
626
{
627
return GSVector2((mask.U32[0] & 0x80000000u) ? v.x : x, (mask.U32[1] & 0x80000000u) ? v.y : y);
628
}
629
630
ALWAYS_INLINE GSVector2 andnot(const GSVector2& v) const
631
{
632
GSVector2 ret;
633
ret.U32[0] = ((~v.U32[0]) & U32[0]);
634
ret.U32[1] = ((~v.U32[1]) & U32[1]);
635
return ret;
636
}
637
638
ALWAYS_INLINE int mask() const { return (U32[0] >> 31) | ((U32[1] >> 30) & 2); }
639
640
ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); }
641
642
ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); }
643
644
ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); }
645
646
template<int src, int dst>
647
ALWAYS_INLINE GSVector2 insert32(const GSVector2& v) const
648
{
649
GSVector2 ret = *this;
650
ret.F32[dst] = v.F32[src];
651
return ret;
652
}
653
654
template<int i>
655
ALWAYS_INLINE int extract32() const
656
{
657
return I32[i];
658
}
659
660
ALWAYS_INLINE float dot(const GSVector2& v) const { return (x * v.x + y * v.y); }
661
662
ALWAYS_INLINE static constexpr GSVector2 zero() { return GSVector2::cxpr(0.0f, 0.0f); }
663
664
ALWAYS_INLINE static constexpr GSVector2 xffffffff()
665
{
666
GSVector2 ret = zero();
667
ret.U64[0] = ~ret.U64[0];
668
return ret;
669
}
670
671
template<bool aligned>
672
ALWAYS_INLINE static GSVector2 load(const void* p)
673
{
674
GSVector2 ret;
675
std::memcpy(ret.F32, p, sizeof(F32));
676
return ret;
677
}
678
679
template<bool aligned>
680
ALWAYS_INLINE static void store(void* p, const GSVector2& v)
681
{
682
std::memcpy(p, &v.F32, sizeof(F32));
683
}
684
685
ALWAYS_INLINE GSVector2 operator-() const { return neg(); }
686
687
void operator+=(const GSVector2& v_)
688
{
689
x = x + v_.x;
690
y = y + v_.y;
691
}
692
void operator-=(const GSVector2& v_)
693
{
694
x = x - v_.x;
695
y = y - v_.y;
696
}
697
void operator*=(const GSVector2& v_)
698
{
699
x = x * v_.x;
700
y = y * v_.y;
701
}
702
void operator/=(const GSVector2& v_)
703
{
704
x = x / v_.x;
705
y = y / v_.y;
706
}
707
708
void operator+=(const float v_)
709
{
710
x = x + v_;
711
y = y + v_;
712
}
713
void operator-=(const float v_)
714
{
715
x = x - v_;
716
y = y - v_;
717
}
718
void operator*=(const float v_)
719
{
720
x = x * v_;
721
y = y * v_;
722
}
723
void operator/=(const float v_)
724
{
725
x = x / v_;
726
y = y / v_;
727
}
728
729
void operator&=(const GSVector2& v_) { U64[0] &= v_.U64[0]; }
730
void operator|=(const GSVector2& v_) { U64[0] |= v_.U64[0]; }
731
void operator^=(const GSVector2& v_) { U64[0] ^= v_.U64[0]; }
732
733
friend GSVector2 operator+(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x + v2.x, v1.y + v2.y); }
734
735
friend GSVector2 operator-(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x - v2.x, v1.y - v2.y); }
736
737
friend GSVector2 operator*(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x * v2.x, v1.y * v2.y); }
738
739
friend GSVector2 operator/(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x / v2.x, v1.y / v2.y); }
740
741
friend GSVector2 operator+(const GSVector2& v, float f) { return GSVector2(v.x + f, v.y + f); }
742
743
friend GSVector2 operator-(const GSVector2& v, float f) { return GSVector2(v.x - f, v.y - f); }
744
745
friend GSVector2 operator*(const GSVector2& v, float f) { return GSVector2(v.x * f, v.y * f); }
746
747
friend GSVector2 operator/(const GSVector2& v, float f) { return GSVector2(v.x / f, v.y / f); }
748
749
friend GSVector2 operator&(const GSVector2& v1, const GSVector2& v2)
750
{
751
GSVector2 ret;
752
ret.U64[0] = v1.U64[0] & v2.U64[0];
753
return ret;
754
}
755
756
ALWAYS_INLINE friend GSVector2 operator|(const GSVector2& v1, const GSVector2& v2)
757
{
758
GSVector2 ret;
759
ret.U64[0] = v1.U64[0] | v2.U64[0];
760
return ret;
761
}
762
763
ALWAYS_INLINE friend GSVector2 operator^(const GSVector2& v1, const GSVector2& v2)
764
{
765
GSVector2 ret;
766
ret.U64[0] = v1.U64[0] ^ v2.U64[0];
767
return ret;
768
}
769
770
ALWAYS_INLINE friend GSVector2 operator==(const GSVector2& v1, const GSVector2& v2)
771
{
772
GSVector2 ret;
773
ret.I32[0] = (v1.x == v2.x) ? -1 : 0;
774
ret.I32[1] = (v1.y == v2.y) ? -1 : 0;
775
return ret;
776
}
777
778
ALWAYS_INLINE friend GSVector2 operator!=(const GSVector2& v1, const GSVector2& v2)
779
{
780
GSVector2 ret;
781
ret.I32[0] = (v1.x != v2.x) ? -1 : 0;
782
ret.I32[1] = (v1.y != v2.y) ? -1 : 0;
783
return ret;
784
}
785
786
ALWAYS_INLINE friend GSVector2 operator>(const GSVector2& v1, const GSVector2& v2)
787
{
788
GSVector2 ret;
789
ret.I32[0] = (v1.x > v2.x) ? -1 : 0;
790
ret.I32[1] = (v1.y > v2.y) ? -1 : 0;
791
return ret;
792
}
793
794
ALWAYS_INLINE friend GSVector2 operator<(const GSVector2& v1, const GSVector2& v2)
795
{
796
GSVector2 ret;
797
ret.I32[0] = (v1.x < v2.x) ? -1 : 0;
798
ret.I32[1] = (v1.y < v2.y) ? -1 : 0;
799
return ret;
800
}
801
802
ALWAYS_INLINE friend GSVector2 operator>=(const GSVector2& v1, const GSVector2& v2)
803
{
804
GSVector2 ret;
805
ret.I32[0] = (v1.x >= v2.x) ? -1 : 0;
806
ret.I32[1] = (v1.y >= v2.y) ? -1 : 0;
807
return ret;
808
}
809
810
ALWAYS_INLINE friend GSVector2 operator<=(const GSVector2& v1, const GSVector2& v2)
811
{
812
GSVector2 ret;
813
ret.I32[0] = (v1.x <= v2.x) ? -1 : 0;
814
ret.I32[1] = (v1.y <= v2.y) ? -1 : 0;
815
return ret;
816
}
817
818
ALWAYS_INLINE GSVector2 xy() const { return *this; }
819
ALWAYS_INLINE GSVector2 xx() const { return GSVector2(x, x); }
820
ALWAYS_INLINE GSVector2 yx() const { return GSVector2(y, x); }
821
ALWAYS_INLINE GSVector2 yy() const { return GSVector2(y, y); }
822
};
823
824
#undef ALL_LANES_8
825
#undef ALL_LANES_16
826
#undef ALL_LANES_32
827
828
#define ALL_LANES_8(expr) \
829
GSVector4i ret; \
830
for (size_t i = 0; i < 16; i++) \
831
expr; \
832
return ret;
833
#define ALL_LANES_16(expr) \
834
GSVector4i ret; \
835
for (size_t i = 0; i < 8; i++) \
836
expr; \
837
return ret;
838
#define ALL_LANES_32(expr) \
839
GSVector4i ret; \
840
for (size_t i = 0; i < 4; i++) \
841
expr; \
842
return ret;
843
#define ALL_LANES_64(expr) \
844
GSVector4i ret; \
845
for (size_t i = 0; i < 2; i++) \
846
expr; \
847
return ret;
848
849
class alignas(16) GSVector4i
850
{
851
struct cxpr_init_tag
852
{
853
};
854
static constexpr cxpr_init_tag cxpr_init{};
855
856
constexpr GSVector4i(cxpr_init_tag, s32 x, s32 y, s32 z, s32 w) : S32{x, y, z, w} {}
857
858
constexpr GSVector4i(cxpr_init_tag, s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7)
859
: S16{s0, s1, s2, s3, s4, s5, s6, s7}
860
{
861
}
862
863
constexpr GSVector4i(cxpr_init_tag, s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9, s8 b10,
864
s8 b11, s8 b12, s8 b13, s8 b14, s8 b15)
865
: S8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
866
{
867
}
868
869
public:
870
union
871
{
872
struct
873
{
874
s32 x, y, z, w;
875
};
876
struct
877
{
878
s32 r, g, b, a;
879
};
880
struct
881
{
882
s32 left, top, right, bottom;
883
};
884
float F32[4];
885
s8 S8[16];
886
s16 S16[8];
887
s32 S32[4];
888
s64 S64[2];
889
u8 U8[16];
890
u16 U16[8];
891
u32 U32[4];
892
u64 U64[2];
893
};
894
895
GSVector4i() = default;
896
897
ALWAYS_INLINE constexpr static GSVector4i cxpr(s32 x, s32 y, s32 z, s32 w)
898
{
899
return GSVector4i(cxpr_init, x, y, z, w);
900
}
901
902
ALWAYS_INLINE constexpr static GSVector4i cxpr(s32 x) { return GSVector4i(cxpr_init, x, x, x, x); }
903
904
ALWAYS_INLINE constexpr static GSVector4i cxpr16(s16 x) { return GSVector4i(cxpr_init, x, x, x, x, x, x, x, x); }
905
906
ALWAYS_INLINE constexpr static GSVector4i cxpr16(s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7)
907
{
908
return GSVector4i(cxpr_init, s0, s1, s2, s3, s4, s5, s6, s7);
909
}
910
911
ALWAYS_INLINE constexpr static GSVector4i cxpr8(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9,
912
s8 b10, s8 b11, s8 b12, s8 b13, s8 b14, s8 b15)
913
{
914
return GSVector4i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15);
915
}
916
917
ALWAYS_INLINE GSVector4i(s32 x, s32 y, s32 z, s32 w)
918
{
919
this->x = x;
920
this->y = y;
921
this->z = z;
922
this->w = w;
923
}
924
925
ALWAYS_INLINE GSVector4i(s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7)
926
{
927
S16[0] = s0;
928
S16[1] = s1;
929
S16[2] = s2;
930
S16[3] = s3;
931
S16[4] = s4;
932
S16[5] = s5;
933
S16[6] = s6;
934
S16[7] = s7;
935
}
936
937
ALWAYS_INLINE constexpr GSVector4i(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9, s8 b10,
938
s8 b11, s8 b12, s8 b13, s8 b14, s8 b15)
939
: S8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
940
{
941
}
942
943
ALWAYS_INLINE GSVector4i(const GSVector4i& v) { std::memcpy(S32, v.S32, sizeof(S32)); }
944
945
ALWAYS_INLINE explicit GSVector4i(const GSVector2& v) : S32{static_cast<s32>(v.x), static_cast<s32>(v.y), 0, 0} {}
946
947
ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) : S32{v.S32[0], v.S32[1], 0, 0} {}
948
949
ALWAYS_INLINE explicit GSVector4i(s32 i) { *this = i; }
950
951
ALWAYS_INLINE explicit GSVector4i(const GSVector4& v);
952
953
ALWAYS_INLINE static GSVector4i cast(const GSVector4& v);
954
955
ALWAYS_INLINE void operator=(const GSVector4i& v) { std::memcpy(S32, v.S32, sizeof(S32)); }
956
ALWAYS_INLINE void operator=(s32 i)
957
{
958
x = i;
959
y = i;
960
z = i;
961
w = i;
962
}
963
964
// rect
965
966
ALWAYS_INLINE s32 width() const { return right - left; }
967
ALWAYS_INLINE s32 height() const { return bottom - top; }
968
969
ALWAYS_INLINE GSVector2i rsize() const { return GSVector2i(width(), height()); }
970
ALWAYS_INLINE bool rempty() const { return (lt32(zwzw()).mask() != 0x00ff); }
971
ALWAYS_INLINE bool rvalid() const { return ((ge32(zwzw()).mask() & 0xff) == 0); }
972
973
GSVector4i runion(const GSVector4i& v) const
974
{
975
return GSVector4i(std::min(x, v.x), std::min(y, v.y), std::max(z, v.z), std::max(w, v.w));
976
}
977
978
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_s32(v); }
979
ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return rintersect(v).rvalid(); }
980
ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); }
981
982
ALWAYS_INLINE u32 rgba32() const { return static_cast<u32>(ps32().pu16().extract32<0>()); }
983
984
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const
985
{
986
return max_s8(min).min_s8(max);
987
}
988
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const
989
{
990
return max_s8(minmax.xyxy()).min_s8(minmax.zwzw());
991
}
992
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const
993
{
994
return max_s16(min).min_s16(max);
995
}
996
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const
997
{
998
return max_s16(minmax.xyxy()).min_s16(minmax.zwzw());
999
}
1000
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const
1001
{
1002
return max_s32(min).min_s32(max);
1003
}
1004
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const
1005
{
1006
return max_s32(minmax.xyxy()).min_s32(minmax.zwzw());
1007
}
1008
1009
ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const
1010
{
1011
return max_u8(min).min_u8(max);
1012
}
1013
ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& minmax) const
1014
{
1015
return max_u8(minmax.xyxy()).min_u8(minmax.zwzw());
1016
}
1017
ALWAYS_INLINE GSVector4i sat_u16(const GSVector4i& min, const GSVector4i& max) const
1018
{
1019
return max_u16(min).min_u16(max);
1020
}
1021
ALWAYS_INLINE GSVector4i sat_u16(const GSVector4i& minmax) const
1022
{
1023
return max_u16(minmax.xyxy()).min_u16(minmax.zwzw());
1024
}
1025
ALWAYS_INLINE GSVector4i sat_u32(const GSVector4i& min, const GSVector4i& max) const
1026
{
1027
return max_u32(min).min_u32(max);
1028
}
1029
ALWAYS_INLINE GSVector4i sat_u32(const GSVector4i& minmax) const
1030
{
1031
return max_u32(minmax.xyxy()).min_u32(minmax.zwzw());
1032
}
1033
1034
GSVector4i min_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
1035
GSVector4i max_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
1036
GSVector4i min_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
1037
GSVector4i max_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
1038
GSVector4i min_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
1039
GSVector4i max_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
1040
1041
GSVector4i min_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); }
1042
GSVector4i max_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); }
1043
GSVector4i min_u16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = std::min(U16[i], v.U16[i])); }
1044
GSVector4i max_u16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = std::max(U16[i], v.U16[i])); }
1045
GSVector4i min_u32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = std::min(U32[i], v.U32[i])); }
1046
GSVector4i max_u32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = std::max(U32[i], v.U32[i])); }
1047
1048
GSVector4i madd_s16(const GSVector4i& v) const
1049
{
1050
ALL_LANES_32(ret.S32[i] = (S16[i * 2] * v.S16[i * 2]) + (S16[i * 2 + 1] * v.S16[i * 2 + 1]));
1051
}
1052
1053
GSVector4i addp_s32() const { return GSVector4i(x + y, z + w, 0, 0); }
1054
1055
s32 addv_s32() const { return (S32[0] + S32[1] + S32[2] + S32[3]); }
1056
1057
u8 minv_u8() const
1058
{
1059
return std::min(
1060
U8[0],
1061
std::min(
1062
U8[1],
1063
std::min(
1064
U8[2],
1065
std::min(
1066
U8[3],
1067
std::min(
1068
U8[4],
1069
std::min(
1070
U8[5],
1071
std::min(
1072
U8[6],
1073
std::min(
1074
U8[7],
1075
std::min(
1076
U8[9],
1077
std::min(U8[10],
1078
std::min(U8[11], std::min(U8[12], std::min(U8[13], std::min(U8[14], U8[15]))))))))))))));
1079
}
1080
1081
u16 maxv_u8() const
1082
{
1083
return std::max(
1084
U8[0],
1085
std::max(
1086
U8[1],
1087
std::max(
1088
U8[2],
1089
std::max(
1090
U8[3],
1091
std::max(
1092
U8[4],
1093
std::max(
1094
U8[5],
1095
std::max(
1096
U8[6],
1097
std::max(
1098
U8[7],
1099
std::max(
1100
U8[9],
1101
std::max(U8[10],
1102
std::max(U8[11], std::max(U8[12], std::max(U8[13], std::max(U8[14], U8[15]))))))))))))));
1103
}
1104
1105
u16 minv_u16() const
1106
{
1107
return std::min(
1108
U16[0],
1109
std::min(U16[1],
1110
std::min(U16[2], std::min(U16[3], std::min(U16[4], std::min(U16[5], std::min(U16[6], U16[7])))))));
1111
}
1112
1113
u16 maxv_u16() const
1114
{
1115
return std::max(
1116
U16[0],
1117
std::max(U16[1],
1118
std::max(U16[2], std::max(U16[3], std::max(U16[4], std::max(U16[5], std::max(U16[6], U16[7])))))));
1119
}
1120
1121
s32 minv_s32() const { return std::min(x, std::min(y, std::min(z, w))); }
1122
1123
u32 minv_u32() const { return std::min(U32[0], std::min(U32[1], std::min(U32[2], U32[3]))); }
1124
1125
s32 maxv_s32() const { return std::max(x, std::max(y, std::max(z, w))); }
1126
1127
u32 maxv_u32() const { return std::max(U32[0], std::max(U32[1], std::max(U32[2], U32[3]))); }
1128
1129
ALWAYS_INLINE GSVector4i clamp8() const { return pu16().upl8(); }
1130
1131
GSVector4i blend8(const GSVector4i& v, const GSVector4i& mask) const
1132
{
1133
GSVector4i ret;
1134
for (size_t i = 0; i < 16; i++)
1135
ret.U8[i] = (mask.U8[i] & 0x80) ? v.U8[i] : U8[i];
1136
return ret;
1137
}
1138
1139
template<s32 mask>
1140
GSVector4i blend16(const GSVector4i& v) const
1141
{
1142
GSVector4i ret;
1143
for (size_t i = 0; i < 8; i++)
1144
ret.U16[i] = ((mask & (1 << i)) != 0) ? v.U16[i] : U16[i];
1145
return ret;
1146
}
1147
1148
template<s32 mask>
1149
GSVector4i blend32(const GSVector4i& v) const
1150
{
1151
GSVector4i ret;
1152
for (size_t i = 0; i < 4; i++)
1153
ret.U32[i] = ((mask & (1 << i)) != 0) ? v.U32[i] : U32[i];
1154
return ret;
1155
}
1156
1157
GSVector4i blend(const GSVector4i& v, const GSVector4i& mask) const
1158
{
1159
GSVector4i ret;
1160
for (size_t i = 0; i < 2; i++)
1161
ret.U64[i] = (v.U64[i] & mask.U64[i]) | (U64[i] & ~mask.U64[i]);
1162
return ret;
1163
}
1164
1165
GSVector4i shuffle8(const GSVector4i& mask) const
1166
{
1167
ALL_LANES_8(ret.S8[i] = (mask.S8[i] & 0x80) ? 0 : (S8[mask.S8[i] & 0xf]));
1168
}
1169
1170
GSVector4i ps16(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8((i < 8) ? S16[i] : v.S16[i - 8])); }
1171
GSVector4i ps16() const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S16[(i < 8) ? i : (i - 8)])); }
1172
GSVector4i pu16(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8((i < 8) ? U16[i] : v.U16[i - 8])); }
1173
GSVector4i pu16() const { ALL_LANES_8(ret.U8[i] = USATURATE8(U16[(i < 8) ? i : (i - 8)])); }
1174
GSVector4i ps32(const GSVector4i& v) const
1175
{
1176
ALL_LANES_16(ret.U16[i] = SSATURATE16((i < 4) ? S32[i] : v.S32[i - 4]));
1177
}
1178
GSVector4i ps32() const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S32[(i < 4) ? i : (i - 4)])); }
1179
GSVector4i pu32(const GSVector4i& v) const
1180
{
1181
ALL_LANES_16(ret.U16[i] = USATURATE16((i < 4) ? U32[i] : v.U32[i - 4]));
1182
}
1183
GSVector4i pu32() const { ALL_LANES_16(ret.U16[i] = USATURATE16(U32[(i < 4) ? i : (i - 4)])); }
1184
1185
GSVector4i upl8(const GSVector4i& v) const
1186
{
1187
return GSVector4i(S8[0], v.S8[0], S8[1], v.S8[1], S8[2], v.S8[2], S8[3], v.S8[3], S8[4], v.S8[4], S8[5], v.S8[5],
1188
S8[6], v.S8[6], S8[7], v.S8[7]);
1189
}
1190
GSVector4i uph8(const GSVector4i& v) const
1191
{
1192
return GSVector4i(S8[8], v.S8[8], S8[9], v.S8[9], S8[10], v.S8[10], S8[11], v.S8[11], S8[12], v.S8[12], S8[13],
1193
v.S8[13], S8[14], v.S8[14], S8[15], v.S8[15]);
1194
}
1195
GSVector4i upl16(const GSVector4i& v) const
1196
{
1197
return GSVector4i(S16[0], v.S16[0], S16[1], v.S16[1], S16[2], v.S16[2], S16[3], v.S16[3]);
1198
}
1199
GSVector4i uph16(const GSVector4i& v) const
1200
{
1201
return GSVector4i(S16[4], v.S16[4], S16[5], v.S16[5], S16[6], v.S16[6], S16[7], v.S16[7]);
1202
}
1203
GSVector4i upl32(const GSVector4i& v) const { return GSVector4i(S32[0], v.S32[0], S32[1], v.S32[1]); }
1204
GSVector4i uph32(const GSVector4i& v) const { return GSVector4i(S32[2], v.S32[2], S32[3], v.S32[3]); }
1205
GSVector4i upl64(const GSVector4i& v) const
1206
{
1207
GSVector4i ret;
1208
ret.S64[0] = S64[0];
1209
ret.S64[1] = v.S64[0];
1210
return ret;
1211
}
1212
GSVector4i uph64(const GSVector4i& v) const
1213
{
1214
GSVector4i ret;
1215
ret.S64[0] = S64[1];
1216
ret.S64[1] = v.S64[1];
1217
return ret;
1218
}
1219
1220
GSVector4i upl8() const
1221
{
1222
return GSVector4i(S8[0], 0, S8[1], 0, S8[2], 0, S8[3], 0, S8[4], 0, S8[5], 0, S8[6], 0, S8[7], 0);
1223
}
1224
GSVector4i uph8() const
1225
{
1226
return GSVector4i(S8[8], 0, S8[9], 0, S8[10], 0, S8[11], 0, S8[12], 0, S8[13], 0, S8[14], 0, S8[15], 0);
1227
}
1228
1229
GSVector4i upl16() const { return GSVector4i(S16[0], 0, S16[1], 0, S16[2], 0, S16[3], 0); }
1230
GSVector4i uph16() const { return GSVector4i(S16[4], 0, S16[5], 0, S16[6], 0, S16[7], 0); }
1231
1232
GSVector4i upl32() const { return GSVector4i(S32[0], 0, S32[1], 0); }
1233
GSVector4i uph32() const { return GSVector4i(S32[2], 0, S32[3], 0); }
1234
GSVector4i upl64() const
1235
{
1236
GSVector4i ret;
1237
ret.S64[0] = S64[0];
1238
ret.S64[1] = 0;
1239
return ret;
1240
}
1241
GSVector4i uph64() const
1242
{
1243
GSVector4i ret;
1244
ret.S64[0] = S64[1];
1245
ret.S64[1] = 0;
1246
return ret;
1247
}
1248
1249
GSVector4i s8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); }
1250
GSVector4i s8to32() const { ALL_LANES_32(ret.S32[i] = S8[i]); }
1251
GSVector4i s8to64() const { ALL_LANES_64(ret.S64[i] = S8[i]); }
1252
1253
GSVector4i s16to32() const { ALL_LANES_32(ret.S32[i] = S16[i]); }
1254
GSVector4i s16to64() const { ALL_LANES_64(ret.S64[i] = S16[i]); }
1255
GSVector4i s32to64() const { ALL_LANES_64(ret.S64[i] = S32[i]); }
1256
GSVector4i u8to16() const { ALL_LANES_16(ret.U16[i] = U8[i]); }
1257
GSVector4i u8to32() const { ALL_LANES_32(ret.U32[i] = U8[i]); }
1258
GSVector4i u8to64() const { ALL_LANES_64(ret.U64[i] = U8[i]); }
1259
GSVector4i u16to32() const { ALL_LANES_32(ret.U32[i] = U16[i]); }
1260
GSVector4i u16to64() const { ALL_LANES_64(ret.U64[i] = U16[i]); }
1261
GSVector4i u32to64() const { ALL_LANES_64(ret.U64[i] = U32[i]); }
1262
1263
template<s32 v>
1264
GSVector4i srl() const
1265
{
1266
GSVector4i ret = {};
1267
if constexpr (v < 16)
1268
{
1269
for (s32 i = 0; i < (16 - v); i++)
1270
ret.U8[i] = U8[v + i];
1271
}
1272
return ret;
1273
}
1274
1275
template<s32 v>
1276
GSVector4i srl(const GSVector4i& r)
1277
{
1278
// This sucks. Hopefully it's never used.
1279
u8 concat[32];
1280
std::memcpy(concat, U8, sizeof(u8) * 16);
1281
std::memcpy(concat + 16, r.U8, sizeof(u8) * 16);
1282
1283
GSVector4i ret;
1284
std::memcpy(ret.U8, &concat[v], sizeof(u8) * 16);
1285
return ret;
1286
}
1287
1288
template<s32 v>
1289
GSVector4i sll() const
1290
{
1291
GSVector4i ret = {};
1292
if constexpr (v < 16)
1293
{
1294
for (s32 i = 0; i < (16 - v); i++)
1295
ret.U8[v + i] = U8[i];
1296
}
1297
return ret;
1298
}
1299
1300
template<s32 v>
1301
GSVector4i sll16() const
1302
{
1303
ALL_LANES_16(ret.U16[i] = U16[i] << v);
1304
}
1305
1306
GSVector4i sll16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v); }
1307
1308
GSVector4i sllv16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v.U16[i]); }
1309
1310
template<s32 v>
1311
GSVector4i srl16() const
1312
{
1313
ALL_LANES_16(ret.U16[i] = U16[i] >> v);
1314
}
1315
1316
GSVector4i srl16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v); }
1317
1318
GSVector4i srlv16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v.U16[i]); }
1319
1320
template<s32 v>
1321
GSVector4i sra16() const
1322
{
1323
ALL_LANES_16(ret.S16[i] = S16[i] >> v);
1324
}
1325
1326
GSVector4i sra16(s32 v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v); }
1327
1328
GSVector4i srav16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v.S16[i]); }
1329
1330
template<s32 v>
1331
GSVector4i sll32() const
1332
{
1333
ALL_LANES_32(ret.U32[i] = U32[i] << v);
1334
}
1335
1336
GSVector4i sll32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v); }
1337
1338
GSVector4i sllv32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v.U32[i]); }
1339
1340
template<s32 v>
1341
GSVector4i srl32() const
1342
{
1343
ALL_LANES_32(ret.U32[i] = U32[i] >> v);
1344
}
1345
1346
GSVector4i srl32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v); }
1347
1348
GSVector4i srlv32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v.U32[i]); }
1349
1350
template<s32 v>
1351
GSVector4i sra32() const
1352
{
1353
ALL_LANES_32(ret.S32[i] = S32[i] >> v);
1354
}
1355
1356
GSVector4i sra32(s32 v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v); }
1357
1358
GSVector4i srav32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v.S32[i]); }
1359
1360
template<s64 v>
1361
GSVector4i sll64() const
1362
{
1363
ALL_LANES_64(ret.U64[i] = U64[i] << v);
1364
}
1365
1366
GSVector4i sll64(s32 v) const { ALL_LANES_64(ret.U64[i] = U64[i] << v); }
1367
1368
GSVector4i sllv64(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = U64[i] << v.U64[i]); }
1369
1370
template<s64 v>
1371
GSVector4i srl64() const
1372
{
1373
ALL_LANES_64(ret.U64[i] = U64[i] >> v);
1374
}
1375
1376
GSVector4i srl64(s32 v) const { ALL_LANES_64(ret.U64[i] = U64[i] >> v); }
1377
1378
GSVector4i srlv64(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = U64[i] >> v.U64[i]); }
1379
1380
GSVector4i add8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] + v.S8[i]); }
1381
1382
GSVector4i add16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] + v.S16[i]); }
1383
1384
GSVector4i add32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] + v.S32[i]); }
1385
1386
GSVector4i adds8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] + v.S8[i])); }
1387
1388
GSVector4i adds16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] + v.S16[i])); }
1389
1390
GSVector4i hadds16(const GSVector4i& v) const
1391
{
1392
return GSVector4i(SSATURATE16(S16[0] + S16[1]), SSATURATE16(S16[2] + S16[3]), SSATURATE16(S16[4] + S16[5]),
1393
SSATURATE16(S16[6] + S16[7]), SSATURATE16(v.S16[0] + v.S16[1]), SSATURATE16(v.S16[2] + v.S16[3]),
1394
SSATURATE16(v.S16[4] + v.S16[5]), SSATURATE16(v.S16[6] + v.S16[7]));
1395
}
1396
1397
GSVector4i addus8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] + v.U8[i])); }
1398
1399
GSVector4i addus16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] + v.U16[i])); }
1400
1401
GSVector4i sub8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] - v.S8[i]); }
1402
1403
GSVector4i sub16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] - v.S16[i]); }
1404
1405
GSVector4i sub32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] - v.S32[i]); }
1406
1407
GSVector4i subs8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] - v.S8[i])); }
1408
1409
GSVector4i subs16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] - v.S16[i])); }
1410
1411
GSVector4i subus8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] - v.U8[i])); }
1412
1413
GSVector4i subus16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] - v.U16[i])); }
1414
1415
GSVector4i mul16hs(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] * v.S16[i]) >> 16); }
1416
1417
GSVector4i mul16l(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] * v.S16[i]); }
1418
1419
GSVector4i mul16hrs(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = ((S16[i] * v.S16[i]) >> 14) + 1); }
1420
1421
GSVector4i mul32l(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] * v.S32[i]); }
1422
1423
ALWAYS_INLINE bool eq(const GSVector4i& v) const { return (std::memcmp(S32, v.S32, sizeof(S32))) == 0; }
1424
1425
GSVector4i eq8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] == v.S8[i]) ? -1 : 0); }
1426
GSVector4i eq16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] == v.S16[i]) ? -1 : 0); }
1427
GSVector4i eq32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] == v.S32[i]) ? -1 : 0); }
1428
GSVector4i eq64(const GSVector4i& v) const { ALL_LANES_64(ret.S64[i] = (S64[i] == v.S64[i]) ? -1 : 0); }
1429
1430
GSVector4i neq8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] != v.S8[i]) ? -1 : 0); }
1431
GSVector4i neq16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] != v.S16[i]) ? -1 : 0); }
1432
GSVector4i neq32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] != v.S32[i]) ? -1 : 0); }
1433
1434
GSVector4i gt8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] > v.S8[i]) ? -1 : 0); }
1435
GSVector4i gt16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] > v.S16[i]) ? -1 : 0); }
1436
GSVector4i gt32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] > v.S32[i]) ? -1 : 0); }
1437
1438
GSVector4i ge8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] >= v.S8[i]) ? -1 : 0); }
1439
GSVector4i ge16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] >= v.S16[i]) ? -1 : 0); }
1440
GSVector4i ge32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] >= v.S32[i]) ? -1 : 0); }
1441
1442
GSVector4i lt8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] < v.S8[i]) ? -1 : 0); }
1443
GSVector4i lt16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] < v.S16[i]) ? -1 : 0); }
1444
GSVector4i lt32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] < v.S32[i]) ? -1 : 0); }
1445
1446
GSVector4i le8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] <= v.S8[i]) ? -1 : 0); }
1447
GSVector4i le16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] <= v.S16[i]) ? -1 : 0); }
1448
GSVector4i le32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] <= v.S32[i]) ? -1 : 0); }
1449
1450
ALWAYS_INLINE GSVector4i andnot(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = (~v.U64[i]) & U64[i]); }
1451
1452
s32 mask() const
1453
{
1454
return static_cast<s32>((static_cast<u32>(U8[0] >> 7) << 0) | (static_cast<u32>(U8[1] >> 7) << 1) |
1455
(static_cast<u32>(U8[2] >> 7) << 2) | (static_cast<u32>(U8[3] >> 7) << 3) |
1456
(static_cast<u32>(U8[4] >> 7) << 4) | (static_cast<u32>(U8[5] >> 7) << 5) |
1457
(static_cast<u32>(U8[6] >> 7) << 6) | (static_cast<u32>(U8[7] >> 7) << 7) |
1458
(static_cast<u32>(U8[8] >> 7) << 8) | (static_cast<u32>(U8[9] >> 7) << 9) |
1459
(static_cast<u32>(U8[10] >> 7) << 10) | (static_cast<u32>(U8[11] >> 7) << 11) |
1460
(static_cast<u32>(U8[12] >> 7) << 12) | (static_cast<u32>(U8[13] >> 7) << 13) |
1461
(static_cast<u32>(U8[14] >> 7) << 14) | (static_cast<u32>(U8[15] >> 7) << 15));
1462
}
1463
1464
ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); }
1465
1466
ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); }
1467
1468
template<s32 i>
1469
ALWAYS_INLINE GSVector4i insert8(s32 a) const
1470
{
1471
GSVector4i ret = *this;
1472
ret.S8[i] = static_cast<s8>(a);
1473
return ret;
1474
}
1475
1476
template<s32 i>
1477
ALWAYS_INLINE s32 extract8() const
1478
{
1479
return S8[i];
1480
}
1481
1482
template<s32 i>
1483
ALWAYS_INLINE GSVector4i insert16(s32 a) const
1484
{
1485
GSVector4i ret = *this;
1486
ret.S16[i] = static_cast<s16>(a);
1487
return ret;
1488
}
1489
1490
template<s32 i>
1491
ALWAYS_INLINE s32 extract16() const
1492
{
1493
return S16[i];
1494
}
1495
1496
template<s32 i>
1497
ALWAYS_INLINE GSVector4i insert32(s32 a) const
1498
{
1499
GSVector4i ret = *this;
1500
ret.S32[i] = a;
1501
return ret;
1502
}
1503
1504
template<s32 i>
1505
ALWAYS_INLINE s32 extract32() const
1506
{
1507
return S32[i];
1508
}
1509
1510
template<s32 i>
1511
ALWAYS_INLINE GSVector4i insert64(s64 a) const
1512
{
1513
GSVector4i ret = *this;
1514
ret.S64[i] = a;
1515
return ret;
1516
}
1517
1518
template<s32 i>
1519
ALWAYS_INLINE s64 extract64() const
1520
{
1521
return S64[i];
1522
}
1523
1524
ALWAYS_INLINE static GSVector4i loadnt(const void* p)
1525
{
1526
GSVector4i ret;
1527
std::memcpy(&ret, p, sizeof(ret.S32));
1528
return ret;
1529
}
1530
1531
ALWAYS_INLINE static GSVector4i load32(const void* p)
1532
{
1533
GSVector4i ret;
1534
std::memcpy(&ret.x, p, sizeof(s32));
1535
ret.y = 0;
1536
ret.z = 0;
1537
ret.w = 0;
1538
return ret;
1539
}
1540
1541
ALWAYS_INLINE static GSVector4i zext32(s32 v) { return GSVector4i(v, 0, 0, 0); }
1542
1543
template<bool aligned>
1544
ALWAYS_INLINE static GSVector4i loadl(const void* p)
1545
{
1546
GSVector4i ret;
1547
std::memcpy(&ret.U64[0], p, sizeof(ret.U64[0]));
1548
ret.U64[1] = 0;
1549
return ret;
1550
}
1551
1552
ALWAYS_INLINE static GSVector4i loadl(const GSVector2i& v) { return loadl<true>(&v); }
1553
1554
template<bool aligned>
1555
ALWAYS_INLINE static GSVector4i loadh(const void* p)
1556
{
1557
GSVector4i ret;
1558
ret.U64[0] = 0;
1559
std::memcpy(&ret.U64[1], p, sizeof(ret.U64[1]));
1560
return ret;
1561
}
1562
1563
ALWAYS_INLINE static GSVector4i loadh(const GSVector2i& v) { return loadh<true>(&v); }
1564
1565
template<bool aligned>
1566
ALWAYS_INLINE static GSVector4i load(const void* p)
1567
{
1568
GSVector4i ret;
1569
std::memcpy(ret.S32, p, sizeof(ret.S32));
1570
return ret;
1571
}
1572
1573
ALWAYS_INLINE static void storent(void* p, const GSVector4i& v) { std::memcpy(p, v.S32, sizeof(v.S32)); }
1574
1575
template<bool aligned>
1576
ALWAYS_INLINE static void storel(void* p, const GSVector4i& v)
1577
{
1578
std::memcpy(p, &v.S32[0], sizeof(s32) * 2);
1579
}
1580
1581
template<bool aligned>
1582
ALWAYS_INLINE static void storeh(void* p, const GSVector4i& v)
1583
{
1584
std::memcpy(p, &v.S32[2], sizeof(s32) * 2);
1585
}
1586
1587
template<bool aligned>
1588
ALWAYS_INLINE static void store(void* p, const GSVector4i& v)
1589
{
1590
std::memcpy(p, v.S32, sizeof(S32));
1591
}
1592
1593
ALWAYS_INLINE static void store32(void* p, const GSVector4i& v) { std::memcpy(p, &v.x, sizeof(s32)); }
1594
1595
ALWAYS_INLINE static GSVector4i broadcast128(const GSVector4i& v) { return v; }
1596
1597
template<bool aligned>
1598
ALWAYS_INLINE static GSVector4i broadcast128(const void* v)
1599
{
1600
return load<aligned>(v);
1601
}
1602
1603
ALWAYS_INLINE void operator&=(const GSVector4i& v)
1604
{
1605
U64[0] &= v.U64[0];
1606
U64[1] &= v.U64[1];
1607
}
1608
ALWAYS_INLINE void operator|=(const GSVector4i& v)
1609
{
1610
U64[0] |= v.U64[0];
1611
U64[1] |= v.U64[1];
1612
}
1613
ALWAYS_INLINE void operator^=(const GSVector4i& v)
1614
{
1615
U64[0] ^= v.U64[0];
1616
U64[1] ^= v.U64[1];
1617
}
1618
1619
ALWAYS_INLINE friend GSVector4i operator&(const GSVector4i& v1, const GSVector4i& v2)
1620
{
1621
GSVector4i ret;
1622
ret.U64[0] = v1.U64[0] & v2.U64[0];
1623
ret.U64[1] = v1.U64[1] & v2.U64[1];
1624
return ret;
1625
}
1626
1627
ALWAYS_INLINE friend GSVector4i operator|(const GSVector4i& v1, const GSVector4i& v2)
1628
{
1629
GSVector4i ret;
1630
ret.U64[0] = v1.U64[0] | v2.U64[0];
1631
ret.U64[1] = v1.U64[1] | v2.U64[1];
1632
return ret;
1633
}
1634
1635
ALWAYS_INLINE friend GSVector4i operator^(const GSVector4i& v1, const GSVector4i& v2)
1636
{
1637
GSVector4i ret;
1638
ret.U64[0] = v1.U64[0] ^ v2.U64[0];
1639
ret.U64[1] = v1.U64[1] ^ v2.U64[1];
1640
return ret;
1641
}
1642
1643
ALWAYS_INLINE friend GSVector4i operator&(const GSVector4i& v, s32 i) { return v & GSVector4i(i); }
1644
1645
ALWAYS_INLINE friend GSVector4i operator|(const GSVector4i& v, s32 i) { return v | GSVector4i(i); }
1646
1647
ALWAYS_INLINE friend GSVector4i operator^(const GSVector4i& v, s32 i) { return v ^ GSVector4i(i); }
1648
1649
ALWAYS_INLINE friend GSVector4i operator~(const GSVector4i& v) { return v ^ v.eq32(v); }
1650
1651
ALWAYS_INLINE static constexpr GSVector4i zero() { return GSVector4i::cxpr(0, 0, 0, 0); }
1652
1653
ALWAYS_INLINE GSVector4i xyxy(const GSVector4i& v) const { return upl64(v); }
1654
1655
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xy, const GSVector2i& zw)
1656
{
1657
return GSVector4i(xy.x, xy.y, zw.x, zw.y);
1658
}
1659
1660
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw) { return GSVector4i(xyzw.x, xyzw.y, xyzw.x, xyzw.y); }
1661
1662
static GSVector4i rfit(const GSVector4i& fit_rect, const GSVector2i& image_size);
1663
1664
ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(x, y); }
1665
ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(z, w); }
1666
1667
#define VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
1668
ALWAYS_INLINE GSVector4i xs##ys##zs##ws() const { return GSVector4i(S32[xn], S32[yn], S32[zn], S32[wn]); } \
1669
ALWAYS_INLINE GSVector4i xs##ys##zs##ws##l() const \
1670
{ \
1671
return GSVector4i(S16[xn], S16[yn], S16[zn], S16[wn], S16[4], S16[5], S16[6], S16[7]); \
1672
} \
1673
ALWAYS_INLINE GSVector4i xs##ys##zs##ws##h() const \
1674
{ \
1675
return GSVector4i(S16[0], S16[1], S16[2], S16[3], S16[4 + xn], S16[4 + yn], S16[4 + zn], S16[4 + wn]); \
1676
}
1677
1678
#define VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
1679
VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0); \
1680
VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1); \
1681
VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2); \
1682
VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3);
1683
1684
#define VECTOR4i_SHUFFLE_2(xs, xn, ys, yn) \
1685
VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, x, 0); \
1686
VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, y, 1); \
1687
VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, z, 2); \
1688
VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, w, 3);
1689
1690
#define VECTOR4i_SHUFFLE_1(xs, xn) \
1691
VECTOR4i_SHUFFLE_2(xs, xn, x, 0); \
1692
VECTOR4i_SHUFFLE_2(xs, xn, y, 1); \
1693
VECTOR4i_SHUFFLE_2(xs, xn, z, 2); \
1694
VECTOR4i_SHUFFLE_2(xs, xn, w, 3);
1695
1696
VECTOR4i_SHUFFLE_1(x, 0);
1697
VECTOR4i_SHUFFLE_1(y, 1);
1698
VECTOR4i_SHUFFLE_1(z, 2);
1699
VECTOR4i_SHUFFLE_1(w, 3);
1700
1701
#undef VECTOR4i_SHUFFLE_1
1702
#undef VECTOR4i_SHUFFLE_2
1703
#undef VECTOR4i_SHUFFLE_3
1704
#undef VECTOR4i_SHUFFLE_4
1705
};
1706
1707
class alignas(16) GSVector4
1708
{
1709
struct cxpr_init_tag
1710
{
1711
};
1712
static constexpr cxpr_init_tag cxpr_init{};
1713
1714
constexpr GSVector4(cxpr_init_tag, float x, float y, float z, float w) : F32{x, y, z, w} {}
1715
1716
constexpr GSVector4(cxpr_init_tag, int x, int y, int z, int w) : I32{x, y, z, w} {}
1717
1718
constexpr GSVector4(cxpr_init_tag, u64 x, u64 y) : U64{x, y} {}
1719
1720
constexpr GSVector4(cxpr_init_tag, double x, double y) : F64{x, y} {}
1721
1722
public:
1723
union
1724
{
1725
struct
1726
{
1727
float x, y, z, w;
1728
};
1729
struct
1730
{
1731
float r, g, b, a;
1732
};
1733
struct
1734
{
1735
float left, top, right, bottom;
1736
};
1737
float F32[4];
1738
double F64[2];
1739
s8 I8[16];
1740
s16 I16[8];
1741
s32 I32[4];
1742
s64 I64[2];
1743
u8 U8[16];
1744
u16 U16[8];
1745
u32 U32[4];
1746
u64 U64[2];
1747
};
1748
1749
GSVector4() = default;
1750
1751
constexpr static GSVector4 cxpr(float x, float y, float z, float w) { return GSVector4(cxpr_init, x, y, z, w); }
1752
1753
constexpr static GSVector4 cxpr(float x) { return GSVector4(cxpr_init, x, x, x, x); }
1754
1755
constexpr static GSVector4 cxpr(int x, int y, int z, int w) { return GSVector4(cxpr_init, x, y, z, w); }
1756
1757
constexpr static GSVector4 cxpr(int x) { return GSVector4(cxpr_init, x, x, x, x); }
1758
1759
constexpr static GSVector4 cxpr64(u64 x, u64 y) { return GSVector4(cxpr_init, x, y); }
1760
1761
constexpr static GSVector4 cxpr64(u64 x) { return GSVector4(cxpr_init, x, x); }
1762
1763
constexpr static GSVector4 cxpr64(double x, double y) { return GSVector4(cxpr_init, x, y); }
1764
1765
constexpr static GSVector4 cxpr64(double x) { return GSVector4(cxpr_init, x, x); }
1766
1767
ALWAYS_INLINE GSVector4(float x, float y, float z, float w)
1768
{
1769
this->x = x;
1770
this->y = y;
1771
this->z = z;
1772
this->w = w;
1773
}
1774
1775
ALWAYS_INLINE GSVector4(float x, float y)
1776
{
1777
this->x = x;
1778
this->y = y;
1779
this->z = 0.0f;
1780
this->w = 0.0f;
1781
}
1782
1783
ALWAYS_INLINE GSVector4(int x, int y, int z, int w)
1784
{
1785
this->x = static_cast<float>(x);
1786
this->y = static_cast<float>(y);
1787
this->z = static_cast<float>(z);
1788
this->w = static_cast<float>(w);
1789
}
1790
1791
ALWAYS_INLINE GSVector4(int x, int y)
1792
{
1793
this->x = static_cast<float>(x);
1794
this->y = static_cast<float>(y);
1795
this->z = 0.0f;
1796
this->w = 0.0f;
1797
}
1798
1799
ALWAYS_INLINE explicit GSVector4(float f) { x = y = z = w = f; }
1800
1801
ALWAYS_INLINE explicit GSVector4(int i) { x = y = z = w = static_cast<float>(i); }
1802
1803
ALWAYS_INLINE explicit GSVector4(const GSVector2& v) : x(v.x), y(v.y), z(0.0f), w(0.0f) {}
1804
ALWAYS_INLINE explicit GSVector4(const GSVector4i& v);
1805
1806
ALWAYS_INLINE static GSVector4 cast(const GSVector4i& v);
1807
1808
ALWAYS_INLINE static GSVector4 f64(double x, double y)
1809
{
1810
GSVector4 ret;
1811
ret.F64[0] = x;
1812
ret.F64[1] = y;
1813
return ret;
1814
}
1815
1816
ALWAYS_INLINE static GSVector4 f64(double x)
1817
{
1818
GSVector4 ret;
1819
ret.F64[0] = ret.F64[1] = x;
1820
return ret;
1821
}
1822
1823
ALWAYS_INLINE void operator=(float f) { x = y = z = w = f; }
1824
1825
u32 rgba32() const { return GSVector4i(*this).rgba32(); }
1826
1827
ALWAYS_INLINE static GSVector4 rgba32(u32 rgba)
1828
{
1829
return GSVector4(GSVector4i::zext32(static_cast<s32>(rgba)).u8to32());
1830
}
1831
1832
ALWAYS_INLINE static GSVector4 unorm8(u32 rgba) { return rgba32(rgba) * GSVector4::cxpr(1.0f / 255.0f); }
1833
1834
GSVector4 abs() const { return GSVector4(std::fabs(x), std::fabs(y), std::fabs(z), std::fabs(w)); }
1835
1836
GSVector4 neg() const { return GSVector4(-x, -y, -z, -w); }
1837
1838
GSVector4 floor() const { return GSVector4(std::floor(x), std::floor(y), std::floor(z), std::floor(w)); }
1839
1840
GSVector4 ceil() const { return GSVector4(std::ceil(x), std::ceil(y), std::ceil(z), std::ceil(w)); }
1841
1842
GSVector4 hadd() const { return GSVector4(x + y, z + w, x + y, z + w); }
1843
1844
GSVector4 hadd(const GSVector4& v) const { return GSVector4(x + y, z + w, v.x + v.y, v.z + v.w); }
1845
1846
GSVector4 hsub() const { return GSVector4(x - y, z - w, x - y, z - w); }
1847
1848
GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); }
1849
1850
ALWAYS_INLINE float dot(const GSVector4& v) const { return (x * v.x) + (y * v.y) + (z * v.z) + (w * v.w); }
1851
1852
ALWAYS_INLINE float addv() const { return (x + y + z + w); }
1853
ALWAYS_INLINE float minv() const { return std::min(x, std::min(y, std::min(z, w))); }
1854
ALWAYS_INLINE float maxv() const { return std::max(x, std::max(y, std::max(z, w))); }
1855
1856
GSVector4 sat(const GSVector4& min, const GSVector4& max) const
1857
{
1858
return GSVector4(std::clamp(x, min.x, max.x), std::clamp(y, min.y, max.y), std::clamp(z, min.z, max.z),
1859
std::clamp(w, min.w, max.w));
1860
}
1861
1862
GSVector4 sat(const GSVector4& v) const
1863
{
1864
return GSVector4(std::clamp(x, v.x, v.z), std::clamp(y, v.y, v.w), std::clamp(z, v.x, v.z),
1865
std::clamp(w, v.y, v.w));
1866
}
1867
1868
GSVector4 sat(const float scale = 255) const { return sat(zero(), GSVector4(scale)); }
1869
1870
GSVector4 clamp(const float scale = 255) const { return min(GSVector4(scale)); }
1871
1872
GSVector4 min(const GSVector4& v) const
1873
{
1874
return GSVector4(std::min(x, v.x), std::min(y, v.y), std::min(z, v.z), std::min(w, v.w));
1875
}
1876
1877
GSVector4 max(const GSVector4& v) const
1878
{
1879
return GSVector4(std::max(x, v.x), std::max(y, v.y), std::max(z, v.z), std::max(w, v.w));
1880
}
1881
1882
template<int mask>
1883
GSVector4 blend32(const GSVector4& v) const
1884
{
1885
return GSVector4(v.F32[mask & 1], v.F32[(mask >> 1) & 1], v.F32[(mask >> 2) & 1], v.F32[(mask >> 3) & 1]);
1886
}
1887
1888
ALWAYS_INLINE GSVector4 blend32(const GSVector4& v, const GSVector4& mask) const
1889
{
1890
return GSVector4((mask.U32[0] & 0x80000000u) ? v.x : x, (mask.U32[1] & 0x80000000u) ? v.y : y,
1891
(mask.U32[2] & 0x80000000u) ? v.z : z, (mask.U32[3] & 0x80000000u) ? v.w : w);
1892
}
1893
1894
GSVector4 upl(const GSVector4& v) const { return GSVector4(x, y, v.x, v.y); }
1895
1896
GSVector4 uph(const GSVector4& v) const { return GSVector4(z, w, v.z, v.w); }
1897
1898
GSVector4 upld(const GSVector4& v) const
1899
{
1900
GSVector4 ret;
1901
ret.U64[0] = U64[0];
1902
ret.U64[1] = v.U64[0];
1903
return ret;
1904
}
1905
1906
GSVector4 uphd(const GSVector4& v) const
1907
{
1908
GSVector4 ret;
1909
ret.U64[0] = U64[1];
1910
ret.U64[1] = v.U64[1];
1911
return ret;
1912
}
1913
1914
ALWAYS_INLINE GSVector4 l2h(const GSVector4& v) const { return GSVector4(x, y, v.x, v.y); }
1915
1916
ALWAYS_INLINE GSVector4 h2l(const GSVector4& v) const { return GSVector4(v.z, v.w, z, w); }
1917
1918
ALWAYS_INLINE GSVector4 andnot(const GSVector4& v) const
1919
{
1920
GSVector4 ret;
1921
ret.U32[0] = ((~v.U32[0]) & U32[0]);
1922
ret.U32[1] = ((~v.U32[1]) & U32[1]);
1923
ret.U32[2] = ((~v.U32[2]) & U32[2]);
1924
ret.U32[3] = ((~v.U32[3]) & U32[3]);
1925
return ret;
1926
}
1927
1928
ALWAYS_INLINE int mask() const
1929
{
1930
return (U32[0] >> 31) | ((U32[1] >> 30) & 2) | ((U32[2] >> 29) & 4) | ((U32[3] >> 28) & 8);
1931
}
1932
1933
ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); }
1934
1935
ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); }
1936
1937
ALWAYS_INLINE GSVector4 replace_nan(const GSVector4& v) const { return v.blend32(*this, *this == *this); }
1938
1939
template<int src, int dst>
1940
ALWAYS_INLINE GSVector4 insert32(const GSVector4& v) const
1941
{
1942
GSVector4 ret = *this;
1943
ret.F32[dst] = v.F32[src];
1944
return ret;
1945
}
1946
1947
template<int i>
1948
ALWAYS_INLINE GSVector4 insert32(float v) const
1949
{
1950
GSVector4 ret(*this);
1951
ret.F32[i] = v;
1952
return ret;
1953
}
1954
1955
template<int i>
1956
ALWAYS_INLINE float extract32() const
1957
{
1958
return F32[i];
1959
}
1960
1961
template<int dst>
1962
ALWAYS_INLINE GSVector4 insert64(double v) const
1963
{
1964
GSVector4 ret;
1965
ret.F64[dst] = v;
1966
return ret;
1967
}
1968
1969
template<int src>
1970
ALWAYS_INLINE double extract64() const
1971
{
1972
return F64[src];
1973
}
1974
1975
ALWAYS_INLINE static constexpr GSVector4 zero() { return GSVector4::cxpr(0.0f, 0.0f, 0.0f, 0.0f); }
1976
1977
ALWAYS_INLINE static constexpr GSVector4 xffffffff()
1978
{
1979
GSVector4 ret = zero();
1980
ret.U64[0] = ~ret.U64[0];
1981
ret.U64[1] = ~ret.U64[1];
1982
return ret;
1983
}
1984
1985
template<bool aligned>
1986
ALWAYS_INLINE static GSVector4 loadl(const void* p)
1987
{
1988
GSVector4 ret;
1989
std::memcpy(&ret.x, p, sizeof(float) * 2);
1990
ret.z = 0.0f;
1991
ret.w = 0.0f;
1992
return ret;
1993
}
1994
1995
template<bool aligned>
1996
ALWAYS_INLINE static GSVector4 load(const void* p)
1997
{
1998
GSVector4 ret;
1999
std::memcpy(&ret.x, p, sizeof(float) * 4);
2000
return ret;
2001
}
2002
2003
ALWAYS_INLINE static void storent(void* p, const GSVector4& v) { std::memcpy(p, &v, sizeof(v)); }
2004
2005
template<bool aligned>
2006
ALWAYS_INLINE static void storel(void* p, const GSVector4& v)
2007
{
2008
std::memcpy(p, &v.x, sizeof(float) * 2);
2009
}
2010
2011
template<bool aligned>
2012
ALWAYS_INLINE static void storeh(void* p, const GSVector4& v)
2013
{
2014
std::memcpy(p, &v.z, sizeof(float) * 2);
2015
}
2016
2017
template<bool aligned>
2018
ALWAYS_INLINE static void store(void* p, const GSVector4& v)
2019
{
2020
std::memcpy(p, v.F32, sizeof(F32));
2021
}
2022
2023
ALWAYS_INLINE static void store(float* p, const GSVector4& v) { *p = v.x; }
2024
2025
ALWAYS_INLINE GSVector4 operator-() const { return neg(); }
2026
2027
void operator+=(const GSVector4& v_)
2028
{
2029
x = x + v_.x;
2030
y = y + v_.y;
2031
z = z + v_.z;
2032
w = w + v_.w;
2033
}
2034
void operator-=(const GSVector4& v_)
2035
{
2036
x = x - v_.x;
2037
y = y - v_.y;
2038
z = z - v_.z;
2039
w = w - v_.w;
2040
}
2041
void operator*=(const GSVector4& v_)
2042
{
2043
x = x * v_.x;
2044
y = y * v_.y;
2045
z = z * v_.z;
2046
w = w * v_.w;
2047
}
2048
void operator/=(const GSVector4& v_)
2049
{
2050
x = x / v_.x;
2051
y = y / v_.y;
2052
z = z / v_.z;
2053
w = w / v_.w;
2054
}
2055
2056
void operator+=(const float v_)
2057
{
2058
x = x + v_;
2059
y = y + v_;
2060
z = z + v_;
2061
w = w + v_;
2062
}
2063
void operator-=(const float v_)
2064
{
2065
x = x - v_;
2066
y = y - v_;
2067
z = z - v_;
2068
w = w - v_;
2069
}
2070
void operator*=(const float v_)
2071
{
2072
x = x * v_;
2073
y = y * v_;
2074
z = z * v_;
2075
w = w * v_;
2076
}
2077
void operator/=(const float v_)
2078
{
2079
x = x / v_;
2080
y = y / v_;
2081
z = z / v_;
2082
w = w / v_;
2083
}
2084
2085
void operator&=(const GSVector4& v_)
2086
{
2087
U64[0] &= v_.U64[0];
2088
U64[1] &= v_.U64[1];
2089
}
2090
void operator|=(const GSVector4& v_)
2091
{
2092
U64[0] |= v_.U64[0];
2093
U64[1] |= v_.U64[1];
2094
}
2095
void operator^=(const GSVector4& v_)
2096
{
2097
U64[0] ^= v_.U64[0];
2098
U64[1] ^= v_.U64[1];
2099
}
2100
2101
friend GSVector4 operator+(const GSVector4& v1, const GSVector4& v2)
2102
{
2103
return GSVector4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
2104
}
2105
2106
friend GSVector4 operator-(const GSVector4& v1, const GSVector4& v2)
2107
{
2108
return GSVector4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
2109
}
2110
2111
friend GSVector4 operator*(const GSVector4& v1, const GSVector4& v2)
2112
{
2113
return GSVector4(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.w * v2.w);
2114
}
2115
2116
friend GSVector4 operator/(const GSVector4& v1, const GSVector4& v2)
2117
{
2118
return GSVector4(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.w / v2.w);
2119
}
2120
2121
friend GSVector4 operator+(const GSVector4& v, float f) { return GSVector4(v.x + f, v.y + f, v.z + f, v.w + f); }
2122
2123
friend GSVector4 operator-(const GSVector4& v, float f) { return GSVector4(v.x - f, v.y - f, v.z - f, v.w - f); }
2124
2125
friend GSVector4 operator*(const GSVector4& v, float f) { return GSVector4(v.x * f, v.y * f, v.z * f, v.w * f); }
2126
2127
friend GSVector4 operator/(const GSVector4& v, float f) { return GSVector4(v.x / f, v.y / f, v.z / f, v.w / f); }
2128
2129
friend GSVector4 operator&(const GSVector4& v1, const GSVector4& v2)
2130
{
2131
GSVector4 ret;
2132
ret.U64[0] = v1.U64[0] & v2.U64[0];
2133
ret.U64[1] = v1.U64[1] & v2.U64[1];
2134
return ret;
2135
}
2136
2137
ALWAYS_INLINE friend GSVector4 operator|(const GSVector4& v1, const GSVector4& v2)
2138
{
2139
GSVector4 ret;
2140
ret.U64[0] = v1.U64[0] | v2.U64[0];
2141
ret.U64[1] = v1.U64[1] | v2.U64[1];
2142
return ret;
2143
}
2144
2145
ALWAYS_INLINE friend GSVector4 operator^(const GSVector4& v1, const GSVector4& v2)
2146
{
2147
GSVector4 ret;
2148
ret.U64[0] = v1.U64[0] ^ v2.U64[0];
2149
ret.U64[1] = v1.U64[1] ^ v2.U64[1];
2150
return ret;
2151
}
2152
2153
ALWAYS_INLINE friend GSVector4 operator==(const GSVector4& v1, const GSVector4& v2)
2154
{
2155
GSVector4 ret;
2156
ret.I32[0] = (v1.x == v2.x) ? -1 : 0;
2157
ret.I32[1] = (v1.y == v2.y) ? -1 : 0;
2158
ret.I32[2] = (v1.z == v2.z) ? -1 : 0;
2159
ret.I32[3] = (v1.w == v2.w) ? -1 : 0;
2160
return ret;
2161
}
2162
2163
ALWAYS_INLINE friend GSVector4 operator!=(const GSVector4& v1, const GSVector4& v2)
2164
{
2165
GSVector4 ret;
2166
ret.I32[0] = (v1.x != v2.x) ? -1 : 0;
2167
ret.I32[1] = (v1.y != v2.y) ? -1 : 0;
2168
ret.I32[2] = (v1.z != v2.z) ? -1 : 0;
2169
ret.I32[3] = (v1.w != v2.w) ? -1 : 0;
2170
return ret;
2171
}
2172
2173
ALWAYS_INLINE friend GSVector4 operator>(const GSVector4& v1, const GSVector4& v2)
2174
{
2175
GSVector4 ret;
2176
ret.I32[0] = (v1.x > v2.x) ? -1 : 0;
2177
ret.I32[1] = (v1.y > v2.y) ? -1 : 0;
2178
ret.I32[2] = (v1.z > v2.z) ? -1 : 0;
2179
ret.I32[3] = (v1.w > v2.w) ? -1 : 0;
2180
return ret;
2181
}
2182
2183
ALWAYS_INLINE friend GSVector4 operator<(const GSVector4& v1, const GSVector4& v2)
2184
{
2185
GSVector4 ret;
2186
ret.I32[0] = (v1.x < v2.x) ? -1 : 0;
2187
ret.I32[1] = (v1.y < v2.y) ? -1 : 0;
2188
ret.I32[2] = (v1.z < v2.z) ? -1 : 0;
2189
ret.I32[3] = (v1.w < v2.w) ? -1 : 0;
2190
return ret;
2191
}
2192
2193
ALWAYS_INLINE friend GSVector4 operator>=(const GSVector4& v1, const GSVector4& v2)
2194
{
2195
GSVector4 ret;
2196
ret.I32[0] = (v1.x >= v2.x) ? -1 : 0;
2197
ret.I32[1] = (v1.y >= v2.y) ? -1 : 0;
2198
ret.I32[2] = (v1.z >= v2.z) ? -1 : 0;
2199
ret.I32[3] = (v1.w >= v2.w) ? -1 : 0;
2200
return ret;
2201
}
2202
2203
ALWAYS_INLINE friend GSVector4 operator<=(const GSVector4& v1, const GSVector4& v2)
2204
{
2205
GSVector4 ret;
2206
ret.I32[0] = (v1.x <= v2.x) ? -1 : 0;
2207
ret.I32[1] = (v1.y <= v2.y) ? -1 : 0;
2208
ret.I32[2] = (v1.z <= v2.z) ? -1 : 0;
2209
ret.I32[3] = (v1.w <= v2.w) ? -1 : 0;
2210
return ret;
2211
}
2212
2213
ALWAYS_INLINE GSVector4 mul64(const GSVector4& v_) const
2214
{
2215
GSVector4 ret;
2216
ret.F64[0] = F64[0] * v_.F64[0];
2217
ret.F64[1] = F64[1] * v_.F64[1];
2218
return ret;
2219
}
2220
2221
ALWAYS_INLINE GSVector4 add64(const GSVector4& v_) const
2222
{
2223
GSVector4 ret;
2224
ret.F64[0] = F64[0] + v_.F64[0];
2225
ret.F64[1] = F64[1] + v_.F64[1];
2226
return ret;
2227
}
2228
2229
ALWAYS_INLINE GSVector4 sub64(const GSVector4& v_) const
2230
{
2231
GSVector4 ret;
2232
ret.F64[0] = F64[0] - v_.F64[0];
2233
ret.F64[1] = F64[1] - v_.F64[1];
2234
return ret;
2235
}
2236
2237
ALWAYS_INLINE GSVector4 div64(const GSVector4& v) const
2238
{
2239
return GSVector4::f64(F64[0] / v.F64[0], F64[1] / v.F64[1]);
2240
}
2241
2242
ALWAYS_INLINE GSVector4 gt64(const GSVector4& v) const
2243
{
2244
GSVector4 ret;
2245
ret.U64[0] = (F64[0] > v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2246
ret.U64[1] = (F64[1] > v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2247
return ret;
2248
}
2249
2250
ALWAYS_INLINE GSVector4 eq64(const GSVector4& v) const
2251
{
2252
GSVector4 ret;
2253
ret.U64[0] = (F64[0] == v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2254
ret.U64[1] = (F64[1] == v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2255
return ret;
2256
}
2257
2258
ALWAYS_INLINE GSVector4 lt64(const GSVector4& v) const
2259
{
2260
GSVector4 ret;
2261
ret.U64[0] = (F64[0] < v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2262
ret.U64[1] = (F64[1] < v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2263
return ret;
2264
}
2265
2266
ALWAYS_INLINE GSVector4 ge64(const GSVector4& v) const
2267
{
2268
GSVector4 ret;
2269
ret.U64[0] = (F64[0] >= v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2270
ret.U64[1] = (F64[1] >= v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2271
return ret;
2272
}
2273
2274
ALWAYS_INLINE GSVector4 le64(const GSVector4& v) const
2275
{
2276
GSVector4 ret;
2277
ret.U64[0] = (F64[0] <= v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2278
ret.U64[1] = (F64[1] <= v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
2279
return ret;
2280
}
2281
2282
ALWAYS_INLINE GSVector4 min64(const GSVector4& v) const
2283
{
2284
return GSVector4::f64(std::min(F64[0], v.F64[0]), std::min(F64[1], v.F64[1]));
2285
}
2286
2287
ALWAYS_INLINE GSVector4 max64(const GSVector4& v) const
2288
{
2289
return GSVector4::f64(std::max(F64[0], v.F64[0]), std::max(F64[1], v.F64[1]));
2290
}
2291
2292
ALWAYS_INLINE GSVector4 abs64() const { return *this & GSVector4::cxpr64(static_cast<u64>(0x7FFFFFFFFFFFFFFFULL)); }
2293
2294
ALWAYS_INLINE GSVector4 neg64() const { return *this ^ GSVector4::cxpr64(static_cast<u64>(0x8000000000000000ULL)); }
2295
2296
ALWAYS_INLINE GSVector4 sqrt64() const { return GSVector4::f64(std::sqrt(F64[0]), std::sqrt(F64[1])); }
2297
2298
ALWAYS_INLINE GSVector4 sqr64() const { return GSVector4::f64(F64[0] * F64[0], F64[1] * F64[1]); }
2299
2300
ALWAYS_INLINE GSVector4 floor64() const { return GSVector4::f64(std::floor(F64[0]), std::floor(F64[1])); }
2301
2302
ALWAYS_INLINE static GSVector4 f32to64(const GSVector4& v_)
2303
{
2304
GSVector4 ret;
2305
ret.F64[0] = v_.x;
2306
ret.F64[1] = v_.y;
2307
return ret;
2308
}
2309
2310
ALWAYS_INLINE static GSVector4 f32to64(const void* p)
2311
{
2312
float f[2];
2313
std::memcpy(f, p, sizeof(f));
2314
GSVector4 ret;
2315
ret.F64[0] = f[0];
2316
ret.F64[1] = f[1];
2317
return ret;
2318
}
2319
2320
ALWAYS_INLINE GSVector4i f64toi32() const
2321
{
2322
return GSVector4i(static_cast<s32>(F64[0]), static_cast<s32>(F64[1]), 0, 0);
2323
}
2324
2325
ALWAYS_INLINE GSVector2 xy() const { return GSVector2(x, y); }
2326
2327
ALWAYS_INLINE GSVector2 zw() const { return GSVector2(z, w); }
2328
2329
ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) { return GSVector4(l.x, l.y, h.x, h.y); }
2330
2331
ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l) { return GSVector4(l.x, l.y, l.x, l.y); }
2332
2333
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
2334
ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); }
2335
2336
#define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
2337
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0); \
2338
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1); \
2339
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2); \
2340
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3);
2341
2342
#define VECTOR4_SHUFFLE_2(xs, xn, ys, yn) \
2343
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, x, 0); \
2344
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, y, 1); \
2345
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, z, 2); \
2346
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, w, 3);
2347
2348
#define VECTOR4_SHUFFLE_1(xs, xn) \
2349
VECTOR4_SHUFFLE_2(xs, xn, x, 0); \
2350
VECTOR4_SHUFFLE_2(xs, xn, y, 1); \
2351
VECTOR4_SHUFFLE_2(xs, xn, z, 2); \
2352
VECTOR4_SHUFFLE_2(xs, xn, w, 3);
2353
2354
VECTOR4_SHUFFLE_1(x, 0);
2355
VECTOR4_SHUFFLE_1(y, 1);
2356
VECTOR4_SHUFFLE_1(z, 2);
2357
VECTOR4_SHUFFLE_1(w, 3);
2358
2359
#undef VECTOR4_SHUFFLE_1
2360
#undef VECTOR4_SHUFFLE_2
2361
#undef VECTOR4_SHUFFLE_3
2362
#undef VECTOR4_SHUFFLE_4
2363
2364
ALWAYS_INLINE GSVector4 broadcast32() const { return GSVector4(x, x, x, x); }
2365
2366
ALWAYS_INLINE static GSVector4 broadcast32(const GSVector4& v) { return GSVector4(v.x, v.x, v.x, v.x); }
2367
2368
ALWAYS_INLINE static GSVector4 broadcast32(const void* f)
2369
{
2370
float ff;
2371
std::memcpy(&ff, f, sizeof(ff));
2372
return GSVector4(ff, ff, ff, ff);
2373
}
2374
2375
ALWAYS_INLINE static GSVector4 broadcast64(const void* d)
2376
{
2377
GSVector4 ret;
2378
std::memcpy(&ret.F64[0], d, sizeof(ret.F64[0]));
2379
ret.F64[1] = ret.F64[0];
2380
return ret;
2381
}
2382
};
2383
2384
ALWAYS_INLINE GSVector2i::GSVector2i(const GSVector2& v)
2385
{
2386
x = static_cast<s32>(v.x);
2387
y = static_cast<s32>(v.y);
2388
}
2389
2390
ALWAYS_INLINE GSVector2::GSVector2(const GSVector2i& v)
2391
{
2392
x = static_cast<float>(v.x);
2393
y = static_cast<float>(v.y);
2394
}
2395
2396
ALWAYS_INLINE GSVector2i GSVector2i::cast(const GSVector2& v)
2397
{
2398
GSVector2i ret;
2399
std::memcpy(&ret, &v, sizeof(ret));
2400
return ret;
2401
}
2402
2403
ALWAYS_INLINE GSVector2 GSVector2::cast(const GSVector2i& v)
2404
{
2405
GSVector2 ret;
2406
std::memcpy(&ret, &v, sizeof(ret));
2407
return ret;
2408
}
2409
2410
ALWAYS_INLINE GSVector4i::GSVector4i(const GSVector4& v)
2411
{
2412
x = static_cast<s32>(v.x);
2413
y = static_cast<s32>(v.y);
2414
z = static_cast<s32>(v.z);
2415
w = static_cast<s32>(v.w);
2416
}
2417
2418
ALWAYS_INLINE GSVector4::GSVector4(const GSVector4i& v)
2419
{
2420
x = static_cast<float>(v.x);
2421
y = static_cast<float>(v.y);
2422
z = static_cast<float>(v.z);
2423
w = static_cast<float>(v.w);
2424
}
2425
2426
ALWAYS_INLINE GSVector4i GSVector4i::cast(const GSVector4& v)
2427
{
2428
GSVector4i ret;
2429
std::memcpy(&ret, &v, sizeof(ret));
2430
return ret;
2431
}
2432
2433
ALWAYS_INLINE GSVector4 GSVector4::cast(const GSVector4i& v)
2434
{
2435
GSVector4 ret;
2436
std::memcpy(&ret, &v, sizeof(ret));
2437
return ret;
2438
}
2439
2440
#undef SSATURATE8
2441
#undef USATURATE8
2442
#undef SSATURATE16
2443
#undef USATURATE16
2444
#undef ALL_LANES_8
2445
#undef ALL_LANES_16
2446
#undef ALL_LANES_32
2447
#undef ALL_LANES_64
2448
2449