Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/vixl/src/aarch64/logic-aarch64.cc
4261 views
1
// Copyright 2015, VIXL authors
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are met:
6
//
7
// * Redistributions of source code must retain the above copyright notice,
8
// this list of conditions and the following disclaimer.
9
// * Redistributions in binary form must reproduce the above copyright notice,
10
// this list of conditions and the following disclaimer in the documentation
11
// and/or other materials provided with the distribution.
12
// * Neither the name of ARM Limited nor the names of its contributors may be
13
// used to endorse or promote products derived from this software without
14
// specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29
#include <cmath>
30
31
#include "simulator-aarch64.h"
32
33
namespace vixl {
34
namespace aarch64 {
35
36
using vixl::internal::SimFloat16;
37
38
template <typename T>
39
bool IsFloat64() {
40
return false;
41
}
42
template <>
43
bool IsFloat64<double>() {
44
return true;
45
}
46
47
template <typename T>
48
bool IsFloat32() {
49
return false;
50
}
51
template <>
52
bool IsFloat32<float>() {
53
return true;
54
}
55
56
template <typename T>
57
bool IsFloat16() {
58
return false;
59
}
60
template <>
61
bool IsFloat16<Float16>() {
62
return true;
63
}
64
template <>
65
bool IsFloat16<SimFloat16>() {
66
return true;
67
}
68
69
template <>
70
double Simulator::FPDefaultNaN<double>() {
71
return kFP64DefaultNaN;
72
}
73
74
75
template <>
76
float Simulator::FPDefaultNaN<float>() {
77
return kFP32DefaultNaN;
78
}
79
80
81
template <>
82
SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83
return SimFloat16(kFP16DefaultNaN);
84
}
85
86
87
double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88
if (src >= 0) {
89
return UFixedToDouble(src, fbits, round);
90
} else if (src == INT64_MIN) {
91
return -UFixedToDouble(src, fbits, round);
92
} else {
93
return -UFixedToDouble(-src, fbits, round);
94
}
95
}
96
97
98
double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99
// An input of 0 is a special case because the result is effectively
100
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101
if (src == 0) {
102
return 0.0;
103
}
104
105
// Calculate the exponent. The highest significant bit will have the value
106
// 2^exponent.
107
const int highest_significant_bit = 63 - CountLeadingZeros(src);
108
const int64_t exponent = highest_significant_bit - fbits;
109
110
return FPRoundToDouble(0, exponent, src, round);
111
}
112
113
114
float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115
if (src >= 0) {
116
return UFixedToFloat(src, fbits, round);
117
} else if (src == INT64_MIN) {
118
return -UFixedToFloat(src, fbits, round);
119
} else {
120
return -UFixedToFloat(-src, fbits, round);
121
}
122
}
123
124
125
float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126
// An input of 0 is a special case because the result is effectively
127
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128
if (src == 0) {
129
return 0.0f;
130
}
131
132
// Calculate the exponent. The highest significant bit will have the value
133
// 2^exponent.
134
const int highest_significant_bit = 63 - CountLeadingZeros(src);
135
const int32_t exponent = highest_significant_bit - fbits;
136
137
return FPRoundToFloat(0, exponent, src, round);
138
}
139
140
141
SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142
if (src >= 0) {
143
return UFixedToFloat16(src, fbits, round);
144
} else if (src == INT64_MIN) {
145
return -UFixedToFloat16(src, fbits, round);
146
} else {
147
return -UFixedToFloat16(-src, fbits, round);
148
}
149
}
150
151
152
SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153
int fbits,
154
FPRounding round) {
155
// An input of 0 is a special case because the result is effectively
156
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157
if (src == 0) {
158
return 0.0f;
159
}
160
161
// Calculate the exponent. The highest significant bit will have the value
162
// 2^exponent.
163
const int highest_significant_bit = 63 - CountLeadingZeros(src);
164
const int16_t exponent = highest_significant_bit - fbits;
165
166
return FPRoundToFloat16(0, exponent, src, round);
167
}
168
169
170
uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171
uint64_t rtag = nrand48(rand_state_) >> 28;
172
VIXL_ASSERT(IsUint4(rtag));
173
174
if (exclude == 0) {
175
exclude = nrand48(rand_state_) >> 27;
176
}
177
178
// TODO: implement this to better match the specification, which calls for a
179
// true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
180
// PRNG.
181
return ChooseNonExcludedTag(rtag, 0, exclude);
182
}
183
184
185
bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
186
dst.ClearForWrite(vform);
187
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
188
if (!LoadLane(dst, vform, i, addr)) {
189
return false;
190
}
191
addr += LaneSizeInBytesFromFormat(vform);
192
}
193
return true;
194
}
195
196
197
bool Simulator::ld1(VectorFormat vform,
198
LogicVRegister dst,
199
int index,
200
uint64_t addr) {
201
dst.ClearForWrite(vform);
202
return LoadLane(dst, vform, index, addr);
203
}
204
205
206
bool Simulator::ld1r(VectorFormat vform,
207
VectorFormat unpack_vform,
208
LogicVRegister dst,
209
uint64_t addr,
210
bool is_signed) {
211
unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
212
dst.ClearForWrite(vform);
213
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
214
if (is_signed) {
215
if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {
216
return false;
217
}
218
} else {
219
if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {
220
return false;
221
}
222
}
223
}
224
return true;
225
}
226
227
228
bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
229
return ld1r(vform, vform, dst, addr);
230
}
231
232
233
bool Simulator::ld2(VectorFormat vform,
234
LogicVRegister dst1,
235
LogicVRegister dst2,
236
uint64_t addr1) {
237
dst1.ClearForWrite(vform);
238
dst2.ClearForWrite(vform);
239
int esize = LaneSizeInBytesFromFormat(vform);
240
uint64_t addr2 = addr1 + esize;
241
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
242
if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {
243
return false;
244
}
245
addr1 += 2 * esize;
246
addr2 += 2 * esize;
247
}
248
return true;
249
}
250
251
252
bool Simulator::ld2(VectorFormat vform,
253
LogicVRegister dst1,
254
LogicVRegister dst2,
255
int index,
256
uint64_t addr1) {
257
dst1.ClearForWrite(vform);
258
dst2.ClearForWrite(vform);
259
uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
260
return (LoadLane(dst1, vform, index, addr1) &&
261
LoadLane(dst2, vform, index, addr2));
262
}
263
264
265
bool Simulator::ld2r(VectorFormat vform,
266
LogicVRegister dst1,
267
LogicVRegister dst2,
268
uint64_t addr) {
269
dst1.ClearForWrite(vform);
270
dst2.ClearForWrite(vform);
271
uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
272
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
273
if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {
274
return false;
275
}
276
}
277
return true;
278
}
279
280
281
bool Simulator::ld3(VectorFormat vform,
282
LogicVRegister dst1,
283
LogicVRegister dst2,
284
LogicVRegister dst3,
285
uint64_t addr1) {
286
dst1.ClearForWrite(vform);
287
dst2.ClearForWrite(vform);
288
dst3.ClearForWrite(vform);
289
int esize = LaneSizeInBytesFromFormat(vform);
290
uint64_t addr2 = addr1 + esize;
291
uint64_t addr3 = addr2 + esize;
292
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
293
if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
294
!LoadLane(dst3, vform, i, addr3)) {
295
return false;
296
}
297
addr1 += 3 * esize;
298
addr2 += 3 * esize;
299
addr3 += 3 * esize;
300
}
301
return true;
302
}
303
304
305
bool Simulator::ld3(VectorFormat vform,
306
LogicVRegister dst1,
307
LogicVRegister dst2,
308
LogicVRegister dst3,
309
int index,
310
uint64_t addr1) {
311
dst1.ClearForWrite(vform);
312
dst2.ClearForWrite(vform);
313
dst3.ClearForWrite(vform);
314
uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
315
uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
316
return (LoadLane(dst1, vform, index, addr1) &&
317
LoadLane(dst2, vform, index, addr2) &&
318
LoadLane(dst3, vform, index, addr3));
319
}
320
321
322
bool Simulator::ld3r(VectorFormat vform,
323
LogicVRegister dst1,
324
LogicVRegister dst2,
325
LogicVRegister dst3,
326
uint64_t addr) {
327
dst1.ClearForWrite(vform);
328
dst2.ClearForWrite(vform);
329
dst3.ClearForWrite(vform);
330
uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
331
uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
332
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
333
if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
334
!LoadLane(dst3, vform, i, addr3)) {
335
return false;
336
}
337
}
338
return true;
339
}
340
341
342
bool Simulator::ld4(VectorFormat vform,
343
LogicVRegister dst1,
344
LogicVRegister dst2,
345
LogicVRegister dst3,
346
LogicVRegister dst4,
347
uint64_t addr1) {
348
dst1.ClearForWrite(vform);
349
dst2.ClearForWrite(vform);
350
dst3.ClearForWrite(vform);
351
dst4.ClearForWrite(vform);
352
int esize = LaneSizeInBytesFromFormat(vform);
353
uint64_t addr2 = addr1 + esize;
354
uint64_t addr3 = addr2 + esize;
355
uint64_t addr4 = addr3 + esize;
356
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
357
if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
358
!LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
359
return false;
360
}
361
addr1 += 4 * esize;
362
addr2 += 4 * esize;
363
addr3 += 4 * esize;
364
addr4 += 4 * esize;
365
}
366
return true;
367
}
368
369
370
bool Simulator::ld4(VectorFormat vform,
371
LogicVRegister dst1,
372
LogicVRegister dst2,
373
LogicVRegister dst3,
374
LogicVRegister dst4,
375
int index,
376
uint64_t addr1) {
377
dst1.ClearForWrite(vform);
378
dst2.ClearForWrite(vform);
379
dst3.ClearForWrite(vform);
380
dst4.ClearForWrite(vform);
381
uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
382
uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
383
uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
384
return (LoadLane(dst1, vform, index, addr1) &&
385
LoadLane(dst2, vform, index, addr2) &&
386
LoadLane(dst3, vform, index, addr3) &&
387
LoadLane(dst4, vform, index, addr4));
388
}
389
390
391
bool Simulator::ld4r(VectorFormat vform,
392
LogicVRegister dst1,
393
LogicVRegister dst2,
394
LogicVRegister dst3,
395
LogicVRegister dst4,
396
uint64_t addr) {
397
dst1.ClearForWrite(vform);
398
dst2.ClearForWrite(vform);
399
dst3.ClearForWrite(vform);
400
dst4.ClearForWrite(vform);
401
uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
402
uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
403
uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
404
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
405
if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
406
!LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
407
return false;
408
}
409
}
410
return true;
411
}
412
413
414
bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
415
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
416
if (!StoreLane(src, vform, i, addr)) return false;
417
addr += LaneSizeInBytesFromFormat(vform);
418
}
419
return true;
420
}
421
422
423
bool Simulator::st1(VectorFormat vform,
424
LogicVRegister src,
425
int index,
426
uint64_t addr) {
427
return StoreLane(src, vform, index, addr);
428
}
429
430
431
bool Simulator::st2(VectorFormat vform,
432
LogicVRegister src,
433
LogicVRegister src2,
434
uint64_t addr) {
435
int esize = LaneSizeInBytesFromFormat(vform);
436
uint64_t addr2 = addr + esize;
437
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
438
if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
439
return false;
440
}
441
addr += 2 * esize;
442
addr2 += 2 * esize;
443
}
444
return true;
445
}
446
447
448
bool Simulator::st2(VectorFormat vform,
449
LogicVRegister src,
450
LogicVRegister src2,
451
int index,
452
uint64_t addr) {
453
int esize = LaneSizeInBytesFromFormat(vform);
454
return (StoreLane(src, vform, index, addr) &&
455
StoreLane(src2, vform, index, addr + 1 * esize));
456
}
457
458
459
bool Simulator::st3(VectorFormat vform,
460
LogicVRegister src,
461
LogicVRegister src2,
462
LogicVRegister src3,
463
uint64_t addr) {
464
int esize = LaneSizeInBytesFromFormat(vform);
465
uint64_t addr2 = addr + esize;
466
uint64_t addr3 = addr2 + esize;
467
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
468
if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
469
!StoreLane(src3, vform, i, addr3)) {
470
return false;
471
}
472
addr += 3 * esize;
473
addr2 += 3 * esize;
474
addr3 += 3 * esize;
475
}
476
return true;
477
}
478
479
480
bool Simulator::st3(VectorFormat vform,
481
LogicVRegister src,
482
LogicVRegister src2,
483
LogicVRegister src3,
484
int index,
485
uint64_t addr) {
486
int esize = LaneSizeInBytesFromFormat(vform);
487
return (StoreLane(src, vform, index, addr) &&
488
StoreLane(src2, vform, index, addr + 1 * esize) &&
489
StoreLane(src3, vform, index, addr + 2 * esize));
490
}
491
492
493
bool Simulator::st4(VectorFormat vform,
494
LogicVRegister src,
495
LogicVRegister src2,
496
LogicVRegister src3,
497
LogicVRegister src4,
498
uint64_t addr) {
499
int esize = LaneSizeInBytesFromFormat(vform);
500
uint64_t addr2 = addr + esize;
501
uint64_t addr3 = addr2 + esize;
502
uint64_t addr4 = addr3 + esize;
503
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504
if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
505
!StoreLane(src3, vform, i, addr3) ||
506
!StoreLane(src4, vform, i, addr4)) {
507
return false;
508
}
509
addr += 4 * esize;
510
addr2 += 4 * esize;
511
addr3 += 4 * esize;
512
addr4 += 4 * esize;
513
}
514
return true;
515
}
516
517
518
bool Simulator::st4(VectorFormat vform,
519
LogicVRegister src,
520
LogicVRegister src2,
521
LogicVRegister src3,
522
LogicVRegister src4,
523
int index,
524
uint64_t addr) {
525
int esize = LaneSizeInBytesFromFormat(vform);
526
return (StoreLane(src, vform, index, addr) &&
527
StoreLane(src2, vform, index, addr + 1 * esize) &&
528
StoreLane(src3, vform, index, addr + 2 * esize) &&
529
StoreLane(src4, vform, index, addr + 3 * esize));
530
}
531
532
533
LogicVRegister Simulator::cmp(VectorFormat vform,
534
LogicVRegister dst,
535
const LogicVRegister& src1,
536
const LogicVRegister& src2,
537
Condition cond) {
538
dst.ClearForWrite(vform);
539
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
540
int64_t sa = src1.Int(vform, i);
541
int64_t sb = src2.Int(vform, i);
542
uint64_t ua = src1.Uint(vform, i);
543
uint64_t ub = src2.Uint(vform, i);
544
bool result = false;
545
switch (cond) {
546
case eq:
547
result = (ua == ub);
548
break;
549
case ge:
550
result = (sa >= sb);
551
break;
552
case gt:
553
result = (sa > sb);
554
break;
555
case hi:
556
result = (ua > ub);
557
break;
558
case hs:
559
result = (ua >= ub);
560
break;
561
case lt:
562
result = (sa < sb);
563
break;
564
case le:
565
result = (sa <= sb);
566
break;
567
default:
568
VIXL_UNREACHABLE();
569
break;
570
}
571
dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
572
}
573
return dst;
574
}
575
576
577
LogicVRegister Simulator::cmp(VectorFormat vform,
578
LogicVRegister dst,
579
const LogicVRegister& src1,
580
int imm,
581
Condition cond) {
582
SimVRegister temp;
583
LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
584
return cmp(vform, dst, src1, imm_reg, cond);
585
}
586
587
588
LogicVRegister Simulator::cmptst(VectorFormat vform,
589
LogicVRegister dst,
590
const LogicVRegister& src1,
591
const LogicVRegister& src2) {
592
dst.ClearForWrite(vform);
593
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
594
uint64_t ua = src1.Uint(vform, i);
595
uint64_t ub = src2.Uint(vform, i);
596
dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
597
}
598
return dst;
599
}
600
601
602
LogicVRegister Simulator::add(VectorFormat vform,
603
LogicVRegister dst,
604
const LogicVRegister& src1,
605
const LogicVRegister& src2) {
606
int lane_size = LaneSizeInBitsFromFormat(vform);
607
dst.ClearForWrite(vform);
608
609
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
610
// Test for unsigned saturation.
611
uint64_t ua = src1.UintLeftJustified(vform, i);
612
uint64_t ub = src2.UintLeftJustified(vform, i);
613
uint64_t ur = ua + ub;
614
if (ur < ua) {
615
dst.SetUnsignedSat(i, true);
616
}
617
618
// Test for signed saturation.
619
bool pos_a = (ua >> 63) == 0;
620
bool pos_b = (ub >> 63) == 0;
621
bool pos_r = (ur >> 63) == 0;
622
// If the signs of the operands are the same, but different from the result,
623
// there was an overflow.
624
if ((pos_a == pos_b) && (pos_a != pos_r)) {
625
dst.SetSignedSat(i, pos_a);
626
}
627
dst.SetInt(vform, i, ur >> (64 - lane_size));
628
}
629
return dst;
630
}
631
632
LogicVRegister Simulator::add_uint(VectorFormat vform,
633
LogicVRegister dst,
634
const LogicVRegister& src1,
635
uint64_t value) {
636
int lane_size = LaneSizeInBitsFromFormat(vform);
637
VIXL_ASSERT(IsUintN(lane_size, value));
638
dst.ClearForWrite(vform);
639
// Left-justify `value`.
640
uint64_t ub = value << (64 - lane_size);
641
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
642
// Test for unsigned saturation.
643
uint64_t ua = src1.UintLeftJustified(vform, i);
644
uint64_t ur = ua + ub;
645
if (ur < ua) {
646
dst.SetUnsignedSat(i, true);
647
}
648
649
// Test for signed saturation.
650
// `value` is always positive, so we have an overflow if the (signed) result
651
// is smaller than the first operand.
652
if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
653
dst.SetSignedSat(i, true);
654
}
655
656
dst.SetInt(vform, i, ur >> (64 - lane_size));
657
}
658
return dst;
659
}
660
661
LogicVRegister Simulator::addp(VectorFormat vform,
662
LogicVRegister dst,
663
const LogicVRegister& src1,
664
const LogicVRegister& src2) {
665
SimVRegister temp1, temp2;
666
uzp1(vform, temp1, src1, src2);
667
uzp2(vform, temp2, src1, src2);
668
add(vform, dst, temp1, temp2);
669
if (IsSVEFormat(vform)) {
670
interleave_top_bottom(vform, dst, dst);
671
}
672
return dst;
673
}
674
675
LogicVRegister Simulator::sdiv(VectorFormat vform,
676
LogicVRegister dst,
677
const LogicVRegister& src1,
678
const LogicVRegister& src2) {
679
VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
680
681
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
682
int64_t val1 = src1.Int(vform, i);
683
int64_t val2 = src2.Int(vform, i);
684
int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
685
int64_t quotient = 0;
686
if ((val1 == min_int) && (val2 == -1)) {
687
quotient = min_int;
688
} else if (val2 != 0) {
689
quotient = val1 / val2;
690
}
691
dst.SetInt(vform, i, quotient);
692
}
693
694
return dst;
695
}
696
697
LogicVRegister Simulator::udiv(VectorFormat vform,
698
LogicVRegister dst,
699
const LogicVRegister& src1,
700
const LogicVRegister& src2) {
701
VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
702
703
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
704
uint64_t val1 = src1.Uint(vform, i);
705
uint64_t val2 = src2.Uint(vform, i);
706
uint64_t quotient = 0;
707
if (val2 != 0) {
708
quotient = val1 / val2;
709
}
710
dst.SetUint(vform, i, quotient);
711
}
712
713
return dst;
714
}
715
716
717
LogicVRegister Simulator::mla(VectorFormat vform,
718
LogicVRegister dst,
719
const LogicVRegister& srca,
720
const LogicVRegister& src1,
721
const LogicVRegister& src2) {
722
SimVRegister temp;
723
mul(vform, temp, src1, src2);
724
add(vform, dst, srca, temp);
725
return dst;
726
}
727
728
729
LogicVRegister Simulator::mls(VectorFormat vform,
730
LogicVRegister dst,
731
const LogicVRegister& srca,
732
const LogicVRegister& src1,
733
const LogicVRegister& src2) {
734
SimVRegister temp;
735
mul(vform, temp, src1, src2);
736
sub(vform, dst, srca, temp);
737
return dst;
738
}
739
740
741
LogicVRegister Simulator::mul(VectorFormat vform,
742
LogicVRegister dst,
743
const LogicVRegister& src1,
744
const LogicVRegister& src2) {
745
dst.ClearForWrite(vform);
746
747
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
748
dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
749
}
750
return dst;
751
}
752
753
754
LogicVRegister Simulator::mul(VectorFormat vform,
755
LogicVRegister dst,
756
const LogicVRegister& src1,
757
const LogicVRegister& src2,
758
int index) {
759
SimVRegister temp;
760
VectorFormat indexform = VectorFormatFillQ(vform);
761
return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
762
}
763
764
765
LogicVRegister Simulator::smulh(VectorFormat vform,
766
LogicVRegister dst,
767
const LogicVRegister& src1,
768
const LogicVRegister& src2) {
769
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
770
int64_t dst_val = 0xbadbeef;
771
int64_t val1 = src1.Int(vform, i);
772
int64_t val2 = src2.Int(vform, i);
773
switch (LaneSizeInBitsFromFormat(vform)) {
774
case 8:
775
dst_val = internal::MultiplyHigh<8>(val1, val2);
776
break;
777
case 16:
778
dst_val = internal::MultiplyHigh<16>(val1, val2);
779
break;
780
case 32:
781
dst_val = internal::MultiplyHigh<32>(val1, val2);
782
break;
783
case 64:
784
dst_val = internal::MultiplyHigh<64>(val1, val2);
785
break;
786
default:
787
VIXL_UNREACHABLE();
788
break;
789
}
790
dst.SetInt(vform, i, dst_val);
791
}
792
return dst;
793
}
794
795
796
LogicVRegister Simulator::umulh(VectorFormat vform,
797
LogicVRegister dst,
798
const LogicVRegister& src1,
799
const LogicVRegister& src2) {
800
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
801
uint64_t dst_val = 0xbadbeef;
802
uint64_t val1 = src1.Uint(vform, i);
803
uint64_t val2 = src2.Uint(vform, i);
804
switch (LaneSizeInBitsFromFormat(vform)) {
805
case 8:
806
dst_val = internal::MultiplyHigh<8>(val1, val2);
807
break;
808
case 16:
809
dst_val = internal::MultiplyHigh<16>(val1, val2);
810
break;
811
case 32:
812
dst_val = internal::MultiplyHigh<32>(val1, val2);
813
break;
814
case 64:
815
dst_val = internal::MultiplyHigh<64>(val1, val2);
816
break;
817
default:
818
VIXL_UNREACHABLE();
819
break;
820
}
821
dst.SetUint(vform, i, dst_val);
822
}
823
return dst;
824
}
825
826
827
LogicVRegister Simulator::mla(VectorFormat vform,
828
LogicVRegister dst,
829
const LogicVRegister& src1,
830
const LogicVRegister& src2,
831
int index) {
832
SimVRegister temp;
833
VectorFormat indexform = VectorFormatFillQ(vform);
834
return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
835
}
836
837
838
LogicVRegister Simulator::mls(VectorFormat vform,
839
LogicVRegister dst,
840
const LogicVRegister& src1,
841
const LogicVRegister& src2,
842
int index) {
843
SimVRegister temp;
844
VectorFormat indexform = VectorFormatFillQ(vform);
845
return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
846
}
847
848
LogicVRegister Simulator::sqdmull(VectorFormat vform,
849
LogicVRegister dst,
850
const LogicVRegister& src1,
851
const LogicVRegister& src2,
852
int index) {
853
SimVRegister temp;
854
VectorFormat indexform =
855
VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
856
return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
857
}
858
859
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
860
LogicVRegister dst,
861
const LogicVRegister& src1,
862
const LogicVRegister& src2,
863
int index) {
864
SimVRegister temp;
865
VectorFormat indexform =
866
VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867
return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868
}
869
870
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
871
LogicVRegister dst,
872
const LogicVRegister& src1,
873
const LogicVRegister& src2,
874
int index) {
875
SimVRegister temp;
876
VectorFormat indexform =
877
VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
878
return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
879
}
880
881
LogicVRegister Simulator::sqdmulh(VectorFormat vform,
882
LogicVRegister dst,
883
const LogicVRegister& src1,
884
const LogicVRegister& src2,
885
int index) {
886
SimVRegister temp;
887
VectorFormat indexform = VectorFormatFillQ(vform);
888
return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
889
}
890
891
892
LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
893
LogicVRegister dst,
894
const LogicVRegister& src1,
895
const LogicVRegister& src2,
896
int index) {
897
SimVRegister temp;
898
VectorFormat indexform = VectorFormatFillQ(vform);
899
return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
900
}
901
902
903
LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
904
LogicVRegister dst,
905
const LogicVRegister& src1,
906
const LogicVRegister& src2,
907
int index) {
908
SimVRegister temp;
909
VectorFormat indexform = VectorFormatFillQ(vform);
910
return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
911
}
912
913
914
LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
915
LogicVRegister dst,
916
const LogicVRegister& src1,
917
const LogicVRegister& src2,
918
int index) {
919
SimVRegister temp;
920
VectorFormat indexform = VectorFormatFillQ(vform);
921
return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
922
}
923
924
uint64_t Simulator::PolynomialMult(uint64_t op1,
925
uint64_t op2,
926
int lane_size_in_bits) const {
927
return PolynomialMult128(op1, op2, lane_size_in_bits).second;
928
}
929
930
LogicVRegister Simulator::pmul(VectorFormat vform,
931
LogicVRegister dst,
932
const LogicVRegister& src1,
933
const LogicVRegister& src2) {
934
dst.ClearForWrite(vform);
935
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
936
dst.SetUint(vform,
937
i,
938
PolynomialMult(src1.Uint(vform, i),
939
src2.Uint(vform, i),
940
LaneSizeInBitsFromFormat(vform)));
941
}
942
return dst;
943
}
944
945
946
LogicVRegister Simulator::pmull(VectorFormat vform,
947
LogicVRegister dst,
948
const LogicVRegister& src1,
949
const LogicVRegister& src2) {
950
dst.ClearForWrite(vform);
951
VectorFormat vform_src = VectorFormatHalfWidth(vform);
952
953
// Process the elements in reverse to avoid problems when the destination
954
// register is the same as a source.
955
for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
956
dst.SetUint(vform,
957
i,
958
PolynomialMult128(src1.Uint(vform_src, i),
959
src2.Uint(vform_src, i),
960
LaneSizeInBitsFromFormat(vform_src)));
961
}
962
963
return dst;
964
}
965
966
967
LogicVRegister Simulator::pmull2(VectorFormat vform,
968
LogicVRegister dst,
969
const LogicVRegister& src1,
970
const LogicVRegister& src2) {
971
dst.ClearForWrite(vform);
972
VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
973
974
int lane_count = LaneCountFromFormat(vform);
975
for (int i = 0; i < lane_count; i++) {
976
dst.SetUint(vform,
977
i,
978
PolynomialMult128(src1.Uint(vform_src, lane_count + i),
979
src2.Uint(vform_src, lane_count + i),
980
LaneSizeInBitsFromFormat(vform_src)));
981
}
982
983
return dst;
984
}
985
986
987
LogicVRegister Simulator::sub(VectorFormat vform,
988
LogicVRegister dst,
989
const LogicVRegister& src1,
990
const LogicVRegister& src2) {
991
int lane_size = LaneSizeInBitsFromFormat(vform);
992
dst.ClearForWrite(vform);
993
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
994
// Test for unsigned saturation.
995
uint64_t ua = src1.UintLeftJustified(vform, i);
996
uint64_t ub = src2.UintLeftJustified(vform, i);
997
uint64_t ur = ua - ub;
998
if (ub > ua) {
999
dst.SetUnsignedSat(i, false);
1000
}
1001
1002
// Test for signed saturation.
1003
bool pos_a = (ua >> 63) == 0;
1004
bool pos_b = (ub >> 63) == 0;
1005
bool pos_r = (ur >> 63) == 0;
1006
// If the signs of the operands are different, and the sign of the first
1007
// operand doesn't match the result, there was an overflow.
1008
if ((pos_a != pos_b) && (pos_a != pos_r)) {
1009
dst.SetSignedSat(i, pos_a);
1010
}
1011
1012
dst.SetInt(vform, i, ur >> (64 - lane_size));
1013
}
1014
return dst;
1015
}
1016
1017
LogicVRegister Simulator::sub_uint(VectorFormat vform,
1018
LogicVRegister dst,
1019
const LogicVRegister& src1,
1020
uint64_t value) {
1021
int lane_size = LaneSizeInBitsFromFormat(vform);
1022
VIXL_ASSERT(IsUintN(lane_size, value));
1023
dst.ClearForWrite(vform);
1024
// Left-justify `value`.
1025
uint64_t ub = value << (64 - lane_size);
1026
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1027
// Test for unsigned saturation.
1028
uint64_t ua = src1.UintLeftJustified(vform, i);
1029
uint64_t ur = ua - ub;
1030
if (ub > ua) {
1031
dst.SetUnsignedSat(i, false);
1032
}
1033
1034
// Test for signed saturation.
1035
// `value` is always positive, so we have an overflow if the (signed) result
1036
// is greater than the first operand.
1037
if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1038
dst.SetSignedSat(i, false);
1039
}
1040
1041
dst.SetInt(vform, i, ur >> (64 - lane_size));
1042
}
1043
return dst;
1044
}
1045
1046
LogicVRegister Simulator::and_(VectorFormat vform,
1047
LogicVRegister dst,
1048
const LogicVRegister& src1,
1049
const LogicVRegister& src2) {
1050
dst.ClearForWrite(vform);
1051
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1052
dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1053
}
1054
return dst;
1055
}
1056
1057
1058
LogicVRegister Simulator::orr(VectorFormat vform,
1059
LogicVRegister dst,
1060
const LogicVRegister& src1,
1061
const LogicVRegister& src2) {
1062
dst.ClearForWrite(vform);
1063
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1064
dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1065
}
1066
return dst;
1067
}
1068
1069
1070
LogicVRegister Simulator::orn(VectorFormat vform,
1071
LogicVRegister dst,
1072
const LogicVRegister& src1,
1073
const LogicVRegister& src2) {
1074
dst.ClearForWrite(vform);
1075
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1076
dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1077
}
1078
return dst;
1079
}
1080
1081
1082
LogicVRegister Simulator::eor(VectorFormat vform,
1083
LogicVRegister dst,
1084
const LogicVRegister& src1,
1085
const LogicVRegister& src2) {
1086
dst.ClearForWrite(vform);
1087
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1088
dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1089
}
1090
return dst;
1091
}
1092
1093
1094
LogicVRegister Simulator::bic(VectorFormat vform,
1095
LogicVRegister dst,
1096
const LogicVRegister& src1,
1097
const LogicVRegister& src2) {
1098
dst.ClearForWrite(vform);
1099
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100
dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1101
}
1102
return dst;
1103
}
1104
1105
1106
LogicVRegister Simulator::bic(VectorFormat vform,
1107
LogicVRegister dst,
1108
const LogicVRegister& src,
1109
uint64_t imm) {
1110
uint64_t result[16];
1111
int lane_count = LaneCountFromFormat(vform);
1112
for (int i = 0; i < lane_count; ++i) {
1113
result[i] = src.Uint(vform, i) & ~imm;
1114
}
1115
dst.ClearForWrite(vform);
1116
for (int i = 0; i < lane_count; ++i) {
1117
dst.SetUint(vform, i, result[i]);
1118
}
1119
return dst;
1120
}
1121
1122
1123
LogicVRegister Simulator::bif(VectorFormat vform,
1124
LogicVRegister dst,
1125
const LogicVRegister& src1,
1126
const LogicVRegister& src2) {
1127
dst.ClearForWrite(vform);
1128
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1129
uint64_t operand1 = dst.Uint(vform, i);
1130
uint64_t operand2 = ~src2.Uint(vform, i);
1131
uint64_t operand3 = src1.Uint(vform, i);
1132
uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1133
dst.SetUint(vform, i, result);
1134
}
1135
return dst;
1136
}
1137
1138
1139
LogicVRegister Simulator::bit(VectorFormat vform,
1140
LogicVRegister dst,
1141
const LogicVRegister& src1,
1142
const LogicVRegister& src2) {
1143
dst.ClearForWrite(vform);
1144
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145
uint64_t operand1 = dst.Uint(vform, i);
1146
uint64_t operand2 = src2.Uint(vform, i);
1147
uint64_t operand3 = src1.Uint(vform, i);
1148
uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1149
dst.SetUint(vform, i, result);
1150
}
1151
return dst;
1152
}
1153
1154
1155
LogicVRegister Simulator::bsl(VectorFormat vform,
1156
LogicVRegister dst,
1157
const LogicVRegister& src_mask,
1158
const LogicVRegister& src1,
1159
const LogicVRegister& src2) {
1160
dst.ClearForWrite(vform);
1161
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1162
uint64_t operand1 = src2.Uint(vform, i);
1163
uint64_t operand2 = src_mask.Uint(vform, i);
1164
uint64_t operand3 = src1.Uint(vform, i);
1165
uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1166
dst.SetUint(vform, i, result);
1167
}
1168
return dst;
1169
}
1170
1171
1172
LogicVRegister Simulator::sminmax(VectorFormat vform,
1173
LogicVRegister dst,
1174
const LogicVRegister& src1,
1175
const LogicVRegister& src2,
1176
bool max) {
1177
dst.ClearForWrite(vform);
1178
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1179
int64_t src1_val = src1.Int(vform, i);
1180
int64_t src2_val = src2.Int(vform, i);
1181
int64_t dst_val;
1182
if (max) {
1183
dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1184
} else {
1185
dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1186
}
1187
dst.SetInt(vform, i, dst_val);
1188
}
1189
return dst;
1190
}
1191
1192
1193
LogicVRegister Simulator::smax(VectorFormat vform,
1194
LogicVRegister dst,
1195
const LogicVRegister& src1,
1196
const LogicVRegister& src2) {
1197
return sminmax(vform, dst, src1, src2, true);
1198
}
1199
1200
1201
LogicVRegister Simulator::smin(VectorFormat vform,
1202
LogicVRegister dst,
1203
const LogicVRegister& src1,
1204
const LogicVRegister& src2) {
1205
return sminmax(vform, dst, src1, src2, false);
1206
}
1207
1208
1209
LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1210
LogicVRegister dst,
1211
const LogicVRegister& src1,
1212
const LogicVRegister& src2,
1213
bool max) {
1214
unsigned lanes = LaneCountFromFormat(vform);
1215
int64_t result[kZRegMaxSizeInBytes];
1216
const LogicVRegister* src = &src1;
1217
for (unsigned j = 0; j < 2; j++) {
1218
for (unsigned i = 0; i < lanes; i += 2) {
1219
int64_t first_val = src->Int(vform, i);
1220
int64_t second_val = src->Int(vform, i + 1);
1221
int64_t dst_val;
1222
if (max) {
1223
dst_val = (first_val > second_val) ? first_val : second_val;
1224
} else {
1225
dst_val = (first_val < second_val) ? first_val : second_val;
1226
}
1227
VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1228
result[(i >> 1) + (j * lanes / 2)] = dst_val;
1229
}
1230
src = &src2;
1231
}
1232
dst.SetIntArray(vform, result);
1233
if (IsSVEFormat(vform)) {
1234
interleave_top_bottom(vform, dst, dst);
1235
}
1236
return dst;
1237
}
1238
1239
1240
LogicVRegister Simulator::smaxp(VectorFormat vform,
1241
LogicVRegister dst,
1242
const LogicVRegister& src1,
1243
const LogicVRegister& src2) {
1244
return sminmaxp(vform, dst, src1, src2, true);
1245
}
1246
1247
1248
LogicVRegister Simulator::sminp(VectorFormat vform,
1249
LogicVRegister dst,
1250
const LogicVRegister& src1,
1251
const LogicVRegister& src2) {
1252
return sminmaxp(vform, dst, src1, src2, false);
1253
}
1254
1255
1256
LogicVRegister Simulator::addp(VectorFormat vform,
1257
LogicVRegister dst,
1258
const LogicVRegister& src) {
1259
VIXL_ASSERT(vform == kFormatD);
1260
1261
uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1262
dst.ClearForWrite(vform);
1263
dst.SetUint(vform, 0, dst_val);
1264
return dst;
1265
}
1266
1267
1268
LogicVRegister Simulator::addv(VectorFormat vform,
1269
LogicVRegister dst,
1270
const LogicVRegister& src) {
1271
VectorFormat vform_dst =
1272
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1273
1274
1275
int64_t dst_val = 0;
1276
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277
dst_val += src.Int(vform, i);
1278
}
1279
1280
dst.ClearForWrite(vform_dst);
1281
dst.SetInt(vform_dst, 0, dst_val);
1282
return dst;
1283
}
1284
1285
1286
LogicVRegister Simulator::saddlv(VectorFormat vform,
1287
LogicVRegister dst,
1288
const LogicVRegister& src) {
1289
VectorFormat vform_dst =
1290
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1291
1292
int64_t dst_val = 0;
1293
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1294
dst_val += src.Int(vform, i);
1295
}
1296
1297
dst.ClearForWrite(vform_dst);
1298
dst.SetInt(vform_dst, 0, dst_val);
1299
return dst;
1300
}
1301
1302
1303
LogicVRegister Simulator::uaddlv(VectorFormat vform,
1304
LogicVRegister dst,
1305
const LogicVRegister& src) {
1306
VectorFormat vform_dst =
1307
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1308
1309
uint64_t dst_val = 0;
1310
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1311
dst_val += src.Uint(vform, i);
1312
}
1313
1314
dst.ClearForWrite(vform_dst);
1315
dst.SetUint(vform_dst, 0, dst_val);
1316
return dst;
1317
}
1318
1319
1320
LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1321
LogicVRegister dst,
1322
const LogicPRegister& pg,
1323
const LogicVRegister& src,
1324
bool max) {
1325
int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1326
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1327
if (!pg.IsActive(vform, i)) continue;
1328
1329
int64_t src_val = src.Int(vform, i);
1330
if (max) {
1331
dst_val = (src_val > dst_val) ? src_val : dst_val;
1332
} else {
1333
dst_val = (src_val < dst_val) ? src_val : dst_val;
1334
}
1335
}
1336
dst.ClearForWrite(ScalarFormatFromFormat(vform));
1337
dst.SetInt(vform, 0, dst_val);
1338
return dst;
1339
}
1340
1341
1342
LogicVRegister Simulator::smaxv(VectorFormat vform,
1343
LogicVRegister dst,
1344
const LogicVRegister& src) {
1345
sminmaxv(vform, dst, GetPTrue(), src, true);
1346
return dst;
1347
}
1348
1349
1350
LogicVRegister Simulator::sminv(VectorFormat vform,
1351
LogicVRegister dst,
1352
const LogicVRegister& src) {
1353
sminmaxv(vform, dst, GetPTrue(), src, false);
1354
return dst;
1355
}
1356
1357
1358
LogicVRegister Simulator::smaxv(VectorFormat vform,
1359
LogicVRegister dst,
1360
const LogicPRegister& pg,
1361
const LogicVRegister& src) {
1362
VIXL_ASSERT(IsSVEFormat(vform));
1363
sminmaxv(vform, dst, pg, src, true);
1364
return dst;
1365
}
1366
1367
1368
LogicVRegister Simulator::sminv(VectorFormat vform,
1369
LogicVRegister dst,
1370
const LogicPRegister& pg,
1371
const LogicVRegister& src) {
1372
VIXL_ASSERT(IsSVEFormat(vform));
1373
sminmaxv(vform, dst, pg, src, false);
1374
return dst;
1375
}
1376
1377
1378
LogicVRegister Simulator::uminmax(VectorFormat vform,
1379
LogicVRegister dst,
1380
const LogicVRegister& src1,
1381
const LogicVRegister& src2,
1382
bool max) {
1383
dst.ClearForWrite(vform);
1384
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1385
uint64_t src1_val = src1.Uint(vform, i);
1386
uint64_t src2_val = src2.Uint(vform, i);
1387
uint64_t dst_val;
1388
if (max) {
1389
dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1390
} else {
1391
dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1392
}
1393
dst.SetUint(vform, i, dst_val);
1394
}
1395
return dst;
1396
}
1397
1398
1399
LogicVRegister Simulator::umax(VectorFormat vform,
1400
LogicVRegister dst,
1401
const LogicVRegister& src1,
1402
const LogicVRegister& src2) {
1403
return uminmax(vform, dst, src1, src2, true);
1404
}
1405
1406
1407
LogicVRegister Simulator::umin(VectorFormat vform,
1408
LogicVRegister dst,
1409
const LogicVRegister& src1,
1410
const LogicVRegister& src2) {
1411
return uminmax(vform, dst, src1, src2, false);
1412
}
1413
1414
1415
LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1416
LogicVRegister dst,
1417
const LogicVRegister& src1,
1418
const LogicVRegister& src2,
1419
bool max) {
1420
unsigned lanes = LaneCountFromFormat(vform);
1421
uint64_t result[kZRegMaxSizeInBytes];
1422
const LogicVRegister* src = &src1;
1423
for (unsigned j = 0; j < 2; j++) {
1424
for (unsigned i = 0; i < lanes; i += 2) {
1425
uint64_t first_val = src->Uint(vform, i);
1426
uint64_t second_val = src->Uint(vform, i + 1);
1427
uint64_t dst_val;
1428
if (max) {
1429
dst_val = (first_val > second_val) ? first_val : second_val;
1430
} else {
1431
dst_val = (first_val < second_val) ? first_val : second_val;
1432
}
1433
VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1434
result[(i >> 1) + (j * lanes / 2)] = dst_val;
1435
}
1436
src = &src2;
1437
}
1438
dst.SetUintArray(vform, result);
1439
if (IsSVEFormat(vform)) {
1440
interleave_top_bottom(vform, dst, dst);
1441
}
1442
return dst;
1443
}
1444
1445
1446
LogicVRegister Simulator::umaxp(VectorFormat vform,
1447
LogicVRegister dst,
1448
const LogicVRegister& src1,
1449
const LogicVRegister& src2) {
1450
return uminmaxp(vform, dst, src1, src2, true);
1451
}
1452
1453
1454
LogicVRegister Simulator::uminp(VectorFormat vform,
1455
LogicVRegister dst,
1456
const LogicVRegister& src1,
1457
const LogicVRegister& src2) {
1458
return uminmaxp(vform, dst, src1, src2, false);
1459
}
1460
1461
1462
LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1463
LogicVRegister dst,
1464
const LogicPRegister& pg,
1465
const LogicVRegister& src,
1466
bool max) {
1467
uint64_t dst_val = max ? 0 : UINT64_MAX;
1468
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1469
if (!pg.IsActive(vform, i)) continue;
1470
1471
uint64_t src_val = src.Uint(vform, i);
1472
if (max) {
1473
dst_val = (src_val > dst_val) ? src_val : dst_val;
1474
} else {
1475
dst_val = (src_val < dst_val) ? src_val : dst_val;
1476
}
1477
}
1478
dst.ClearForWrite(ScalarFormatFromFormat(vform));
1479
dst.SetUint(vform, 0, dst_val);
1480
return dst;
1481
}
1482
1483
1484
LogicVRegister Simulator::umaxv(VectorFormat vform,
1485
LogicVRegister dst,
1486
const LogicVRegister& src) {
1487
uminmaxv(vform, dst, GetPTrue(), src, true);
1488
return dst;
1489
}
1490
1491
1492
LogicVRegister Simulator::uminv(VectorFormat vform,
1493
LogicVRegister dst,
1494
const LogicVRegister& src) {
1495
uminmaxv(vform, dst, GetPTrue(), src, false);
1496
return dst;
1497
}
1498
1499
1500
LogicVRegister Simulator::umaxv(VectorFormat vform,
1501
LogicVRegister dst,
1502
const LogicPRegister& pg,
1503
const LogicVRegister& src) {
1504
VIXL_ASSERT(IsSVEFormat(vform));
1505
uminmaxv(vform, dst, pg, src, true);
1506
return dst;
1507
}
1508
1509
1510
LogicVRegister Simulator::uminv(VectorFormat vform,
1511
LogicVRegister dst,
1512
const LogicPRegister& pg,
1513
const LogicVRegister& src) {
1514
VIXL_ASSERT(IsSVEFormat(vform));
1515
uminmaxv(vform, dst, pg, src, false);
1516
return dst;
1517
}
1518
1519
1520
LogicVRegister Simulator::shl(VectorFormat vform,
1521
LogicVRegister dst,
1522
const LogicVRegister& src,
1523
int shift) {
1524
VIXL_ASSERT(shift >= 0);
1525
SimVRegister temp;
1526
LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1527
return ushl(vform, dst, src, shiftreg);
1528
}
1529
1530
1531
LogicVRegister Simulator::sshll(VectorFormat vform,
1532
LogicVRegister dst,
1533
const LogicVRegister& src,
1534
int shift) {
1535
VIXL_ASSERT(shift >= 0);
1536
SimVRegister temp1, temp2;
1537
LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1538
LogicVRegister extendedreg = sxtl(vform, temp2, src);
1539
return sshl(vform, dst, extendedreg, shiftreg);
1540
}
1541
1542
1543
LogicVRegister Simulator::sshll2(VectorFormat vform,
1544
LogicVRegister dst,
1545
const LogicVRegister& src,
1546
int shift) {
1547
VIXL_ASSERT(shift >= 0);
1548
SimVRegister temp1, temp2;
1549
LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1550
LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1551
return sshl(vform, dst, extendedreg, shiftreg);
1552
}
1553
1554
1555
LogicVRegister Simulator::shll(VectorFormat vform,
1556
LogicVRegister dst,
1557
const LogicVRegister& src) {
1558
int shift = LaneSizeInBitsFromFormat(vform) / 2;
1559
return sshll(vform, dst, src, shift);
1560
}
1561
1562
1563
LogicVRegister Simulator::shll2(VectorFormat vform,
1564
LogicVRegister dst,
1565
const LogicVRegister& src) {
1566
int shift = LaneSizeInBitsFromFormat(vform) / 2;
1567
return sshll2(vform, dst, src, shift);
1568
}
1569
1570
1571
LogicVRegister Simulator::ushll(VectorFormat vform,
1572
LogicVRegister dst,
1573
const LogicVRegister& src,
1574
int shift) {
1575
VIXL_ASSERT(shift >= 0);
1576
SimVRegister temp1, temp2;
1577
LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1578
LogicVRegister extendedreg = uxtl(vform, temp2, src);
1579
return ushl(vform, dst, extendedreg, shiftreg);
1580
}
1581
1582
1583
LogicVRegister Simulator::ushll2(VectorFormat vform,
1584
LogicVRegister dst,
1585
const LogicVRegister& src,
1586
int shift) {
1587
VIXL_ASSERT(shift >= 0);
1588
SimVRegister temp1, temp2;
1589
LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1590
LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1591
return ushl(vform, dst, extendedreg, shiftreg);
1592
}
1593
1594
std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1595
const LogicPRegister& pg,
1596
const LogicVRegister& src,
1597
int offset_from_last_active) {
1598
// Untested for any other values.
1599
VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1600
1601
int last_active = GetLastActive(vform, pg);
1602
int lane_count = LaneCountFromFormat(vform);
1603
int index =
1604
((last_active + offset_from_last_active) + lane_count) % lane_count;
1605
return std::make_pair(last_active >= 0, src.Uint(vform, index));
1606
}
1607
1608
LogicVRegister Simulator::compact(VectorFormat vform,
1609
LogicVRegister dst,
1610
const LogicPRegister& pg,
1611
const LogicVRegister& src) {
1612
int j = 0;
1613
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1614
if (pg.IsActive(vform, i)) {
1615
dst.SetUint(vform, j++, src.Uint(vform, i));
1616
}
1617
}
1618
for (; j < LaneCountFromFormat(vform); j++) {
1619
dst.SetUint(vform, j, 0);
1620
}
1621
return dst;
1622
}
1623
1624
LogicVRegister Simulator::splice(VectorFormat vform,
1625
LogicVRegister dst,
1626
const LogicPRegister& pg,
1627
const LogicVRegister& src1,
1628
const LogicVRegister& src2) {
1629
int lane_count = LaneCountFromFormat(vform);
1630
int first_active = GetFirstActive(vform, pg);
1631
int last_active = GetLastActive(vform, pg);
1632
int dst_idx = 0;
1633
uint64_t result[kZRegMaxSizeInBytes];
1634
1635
if (first_active >= 0) {
1636
VIXL_ASSERT(last_active >= first_active);
1637
VIXL_ASSERT(last_active < lane_count);
1638
for (int i = first_active; i <= last_active; i++) {
1639
result[dst_idx++] = src1.Uint(vform, i);
1640
}
1641
}
1642
1643
VIXL_ASSERT(dst_idx <= lane_count);
1644
for (int i = dst_idx; i < lane_count; i++) {
1645
result[i] = src2.Uint(vform, i - dst_idx);
1646
}
1647
1648
dst.SetUintArray(vform, result);
1649
1650
return dst;
1651
}
1652
1653
LogicVRegister Simulator::sel(VectorFormat vform,
1654
LogicVRegister dst,
1655
const SimPRegister& pg,
1656
const LogicVRegister& src1,
1657
const LogicVRegister& src2) {
1658
int p_reg_bits_per_lane =
1659
LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1660
for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1661
uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1662
? src1.Uint(vform, lane)
1663
: src2.Uint(vform, lane);
1664
dst.SetUint(vform, lane, lane_value);
1665
}
1666
return dst;
1667
}
1668
1669
1670
LogicPRegister Simulator::sel(LogicPRegister dst,
1671
const LogicPRegister& pg,
1672
const LogicPRegister& src1,
1673
const LogicPRegister& src2) {
1674
for (int i = 0; i < dst.GetChunkCount(); i++) {
1675
LogicPRegister::ChunkType mask = pg.GetChunk(i);
1676
LogicPRegister::ChunkType result =
1677
(mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1678
dst.SetChunk(i, result);
1679
}
1680
return dst;
1681
}
1682
1683
1684
LogicVRegister Simulator::sli(VectorFormat vform,
1685
LogicVRegister dst,
1686
const LogicVRegister& src,
1687
int shift) {
1688
dst.ClearForWrite(vform);
1689
int lane_count = LaneCountFromFormat(vform);
1690
for (int i = 0; i < lane_count; i++) {
1691
uint64_t src_lane = src.Uint(vform, i);
1692
uint64_t dst_lane = dst.Uint(vform, i);
1693
uint64_t shifted = src_lane << shift;
1694
uint64_t mask = MaxUintFromFormat(vform) << shift;
1695
dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1696
}
1697
return dst;
1698
}
1699
1700
1701
LogicVRegister Simulator::sqshl(VectorFormat vform,
1702
LogicVRegister dst,
1703
const LogicVRegister& src,
1704
int shift) {
1705
VIXL_ASSERT(shift >= 0);
1706
SimVRegister temp;
1707
LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1708
return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1709
}
1710
1711
1712
LogicVRegister Simulator::uqshl(VectorFormat vform,
1713
LogicVRegister dst,
1714
const LogicVRegister& src,
1715
int shift) {
1716
VIXL_ASSERT(shift >= 0);
1717
SimVRegister temp;
1718
LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1719
return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1720
}
1721
1722
1723
LogicVRegister Simulator::sqshlu(VectorFormat vform,
1724
LogicVRegister dst,
1725
const LogicVRegister& src,
1726
int shift) {
1727
VIXL_ASSERT(shift >= 0);
1728
SimVRegister temp;
1729
LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1730
return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1731
}
1732
1733
1734
LogicVRegister Simulator::sri(VectorFormat vform,
1735
LogicVRegister dst,
1736
const LogicVRegister& src,
1737
int shift) {
1738
dst.ClearForWrite(vform);
1739
int lane_count = LaneCountFromFormat(vform);
1740
VIXL_ASSERT((shift > 0) &&
1741
(shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1742
for (int i = 0; i < lane_count; i++) {
1743
uint64_t src_lane = src.Uint(vform, i);
1744
uint64_t dst_lane = dst.Uint(vform, i);
1745
uint64_t shifted;
1746
uint64_t mask;
1747
if (shift == 64) {
1748
shifted = 0;
1749
mask = 0;
1750
} else {
1751
shifted = src_lane >> shift;
1752
mask = MaxUintFromFormat(vform) >> shift;
1753
}
1754
dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1755
}
1756
return dst;
1757
}
1758
1759
1760
LogicVRegister Simulator::ushr(VectorFormat vform,
1761
LogicVRegister dst,
1762
const LogicVRegister& src,
1763
int shift) {
1764
VIXL_ASSERT(shift >= 0);
1765
SimVRegister temp;
1766
LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1767
return ushl(vform, dst, src, shiftreg);
1768
}
1769
1770
1771
LogicVRegister Simulator::sshr(VectorFormat vform,
1772
LogicVRegister dst,
1773
const LogicVRegister& src,
1774
int shift) {
1775
VIXL_ASSERT(shift >= 0);
1776
SimVRegister temp;
1777
LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1778
return sshl(vform, dst, src, shiftreg);
1779
}
1780
1781
1782
LogicVRegister Simulator::ssra(VectorFormat vform,
1783
LogicVRegister dst,
1784
const LogicVRegister& src,
1785
int shift) {
1786
SimVRegister temp;
1787
LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1788
return add(vform, dst, dst, shifted_reg);
1789
}
1790
1791
1792
LogicVRegister Simulator::usra(VectorFormat vform,
1793
LogicVRegister dst,
1794
const LogicVRegister& src,
1795
int shift) {
1796
SimVRegister temp;
1797
LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1798
return add(vform, dst, dst, shifted_reg);
1799
}
1800
1801
1802
LogicVRegister Simulator::srsra(VectorFormat vform,
1803
LogicVRegister dst,
1804
const LogicVRegister& src,
1805
int shift) {
1806
SimVRegister temp;
1807
LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1808
return add(vform, dst, dst, shifted_reg);
1809
}
1810
1811
1812
LogicVRegister Simulator::ursra(VectorFormat vform,
1813
LogicVRegister dst,
1814
const LogicVRegister& src,
1815
int shift) {
1816
SimVRegister temp;
1817
LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1818
return add(vform, dst, dst, shifted_reg);
1819
}
1820
1821
1822
LogicVRegister Simulator::cls(VectorFormat vform,
1823
LogicVRegister dst,
1824
const LogicVRegister& src) {
1825
int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1826
int lane_count = LaneCountFromFormat(vform);
1827
1828
// Ensure that we can store one result per lane.
1829
int result[kZRegMaxSizeInBytes];
1830
1831
for (int i = 0; i < lane_count; i++) {
1832
result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1833
}
1834
1835
dst.ClearForWrite(vform);
1836
for (int i = 0; i < lane_count; ++i) {
1837
dst.SetUint(vform, i, result[i]);
1838
}
1839
return dst;
1840
}
1841
1842
1843
LogicVRegister Simulator::clz(VectorFormat vform,
1844
LogicVRegister dst,
1845
const LogicVRegister& src) {
1846
int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1847
int lane_count = LaneCountFromFormat(vform);
1848
1849
// Ensure that we can store one result per lane.
1850
int result[kZRegMaxSizeInBytes];
1851
1852
for (int i = 0; i < lane_count; i++) {
1853
result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1854
}
1855
1856
dst.ClearForWrite(vform);
1857
for (int i = 0; i < lane_count; ++i) {
1858
dst.SetUint(vform, i, result[i]);
1859
}
1860
return dst;
1861
}
1862
1863
1864
LogicVRegister Simulator::cnot(VectorFormat vform,
1865
LogicVRegister dst,
1866
const LogicVRegister& src) {
1867
dst.ClearForWrite(vform);
1868
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1869
uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1870
dst.SetUint(vform, i, value);
1871
}
1872
return dst;
1873
}
1874
1875
1876
LogicVRegister Simulator::cnt(VectorFormat vform,
1877
LogicVRegister dst,
1878
const LogicVRegister& src) {
1879
int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1880
int lane_count = LaneCountFromFormat(vform);
1881
1882
// Ensure that we can store one result per lane.
1883
int result[kZRegMaxSizeInBytes];
1884
1885
for (int i = 0; i < lane_count; i++) {
1886
result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1887
}
1888
1889
dst.ClearForWrite(vform);
1890
for (int i = 0; i < lane_count; ++i) {
1891
dst.SetUint(vform, i, result[i]);
1892
}
1893
return dst;
1894
}
1895
1896
static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1897
int esize,
1898
bool shift_in_ls_byte) {
1899
if (shift_in_ls_byte) {
1900
// Neon uses the least-significant byte of the lane as the shift distance.
1901
shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1902
} else {
1903
// SVE uses a saturated shift distance in the range
1904
// -(esize + 1) ... (esize + 1).
1905
if (shift_val > (esize + 1)) shift_val = esize + 1;
1906
if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1907
}
1908
return shift_val;
1909
}
1910
1911
LogicVRegister Simulator::sshl(VectorFormat vform,
1912
LogicVRegister dst,
1913
const LogicVRegister& src1,
1914
const LogicVRegister& src2,
1915
bool shift_in_ls_byte) {
1916
dst.ClearForWrite(vform);
1917
int esize = LaneSizeInBitsFromFormat(vform);
1918
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1919
int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1920
esize,
1921
shift_in_ls_byte);
1922
1923
int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1924
1925
// Set signed saturation state.
1926
if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1927
dst.SetSignedSat(i, lj_src_val >= 0);
1928
}
1929
1930
// Set unsigned saturation state.
1931
if (lj_src_val < 0) {
1932
dst.SetUnsignedSat(i, false);
1933
} else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1934
(lj_src_val != 0)) {
1935
dst.SetUnsignedSat(i, true);
1936
}
1937
1938
int64_t src_val = src1.Int(vform, i);
1939
bool src_is_negative = src_val < 0;
1940
if (shift_val > 63) {
1941
dst.SetInt(vform, i, 0);
1942
} else if (shift_val < -63) {
1943
dst.SetRounding(i, src_is_negative);
1944
dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1945
} else {
1946
// Use unsigned types for shifts, as behaviour is undefined for signed
1947
// lhs.
1948
uint64_t usrc_val = static_cast<uint64_t>(src_val);
1949
1950
if (shift_val < 0) {
1951
// Convert to right shift.
1952
shift_val = -shift_val;
1953
1954
// Set rounding state by testing most-significant bit shifted out.
1955
// Rounding only needed on right shifts.
1956
if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1957
dst.SetRounding(i, true);
1958
}
1959
1960
usrc_val >>= shift_val;
1961
1962
if (src_is_negative) {
1963
// Simulate sign-extension.
1964
usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1965
}
1966
} else {
1967
usrc_val <<= shift_val;
1968
}
1969
dst.SetUint(vform, i, usrc_val);
1970
}
1971
}
1972
return dst;
1973
}
1974
1975
1976
LogicVRegister Simulator::ushl(VectorFormat vform,
1977
LogicVRegister dst,
1978
const LogicVRegister& src1,
1979
const LogicVRegister& src2,
1980
bool shift_in_ls_byte) {
1981
dst.ClearForWrite(vform);
1982
int esize = LaneSizeInBitsFromFormat(vform);
1983
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1984
int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1985
esize,
1986
shift_in_ls_byte);
1987
1988
uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1989
1990
// Set saturation state.
1991
if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1992
dst.SetUnsignedSat(i, true);
1993
}
1994
1995
uint64_t src_val = src1.Uint(vform, i);
1996
if ((shift_val > 63) || (shift_val < -64)) {
1997
dst.SetUint(vform, i, 0);
1998
} else {
1999
if (shift_val < 0) {
2000
// Set rounding state. Rounding only needed on right shifts.
2001
if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2002
dst.SetRounding(i, true);
2003
}
2004
2005
if (shift_val == -64) {
2006
src_val = 0;
2007
} else {
2008
src_val >>= -shift_val;
2009
}
2010
} else {
2011
src_val <<= shift_val;
2012
}
2013
dst.SetUint(vform, i, src_val);
2014
}
2015
}
2016
return dst;
2017
}
2018
2019
LogicVRegister Simulator::sshr(VectorFormat vform,
2020
LogicVRegister dst,
2021
const LogicVRegister& src1,
2022
const LogicVRegister& src2) {
2023
SimVRegister temp;
2024
// Saturate to sidestep the min-int problem.
2025
neg(vform, temp, src2).SignedSaturate(vform);
2026
sshl(vform, dst, src1, temp, false);
2027
return dst;
2028
}
2029
2030
LogicVRegister Simulator::ushr(VectorFormat vform,
2031
LogicVRegister dst,
2032
const LogicVRegister& src1,
2033
const LogicVRegister& src2) {
2034
SimVRegister temp;
2035
// Saturate to sidestep the min-int problem.
2036
neg(vform, temp, src2).SignedSaturate(vform);
2037
ushl(vform, dst, src1, temp, false);
2038
return dst;
2039
}
2040
2041
LogicVRegister Simulator::neg(VectorFormat vform,
2042
LogicVRegister dst,
2043
const LogicVRegister& src) {
2044
dst.ClearForWrite(vform);
2045
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2046
// Test for signed saturation.
2047
int64_t sa = src.Int(vform, i);
2048
if (sa == MinIntFromFormat(vform)) {
2049
dst.SetSignedSat(i, true);
2050
}
2051
dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2052
}
2053
return dst;
2054
}
2055
2056
2057
LogicVRegister Simulator::suqadd(VectorFormat vform,
2058
LogicVRegister dst,
2059
const LogicVRegister& src1,
2060
const LogicVRegister& src2) {
2061
dst.ClearForWrite(vform);
2062
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2063
int64_t sa = src1.IntLeftJustified(vform, i);
2064
uint64_t ub = src2.UintLeftJustified(vform, i);
2065
uint64_t ur = sa + ub;
2066
2067
int64_t sr;
2068
memcpy(&sr, &ur, sizeof(sr));
2069
if (sr < sa) { // Test for signed positive saturation.
2070
dst.SetInt(vform, i, MaxIntFromFormat(vform));
2071
} else {
2072
dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2073
}
2074
}
2075
return dst;
2076
}
2077
2078
2079
LogicVRegister Simulator::usqadd(VectorFormat vform,
2080
LogicVRegister dst,
2081
const LogicVRegister& src1,
2082
const LogicVRegister& src2) {
2083
dst.ClearForWrite(vform);
2084
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2085
uint64_t ua = src1.UintLeftJustified(vform, i);
2086
int64_t sb = src2.IntLeftJustified(vform, i);
2087
uint64_t ur = ua + sb;
2088
2089
if ((sb > 0) && (ur <= ua)) {
2090
dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2091
} else if ((sb < 0) && (ur >= ua)) {
2092
dst.SetUint(vform, i, 0); // Negative saturation.
2093
} else {
2094
dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2095
}
2096
}
2097
return dst;
2098
}
2099
2100
2101
LogicVRegister Simulator::abs(VectorFormat vform,
2102
LogicVRegister dst,
2103
const LogicVRegister& src) {
2104
dst.ClearForWrite(vform);
2105
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2106
// Test for signed saturation.
2107
int64_t sa = src.Int(vform, i);
2108
if (sa == MinIntFromFormat(vform)) {
2109
dst.SetSignedSat(i, true);
2110
}
2111
if (sa < 0) {
2112
dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2113
} else {
2114
dst.SetInt(vform, i, sa);
2115
}
2116
}
2117
return dst;
2118
}
2119
2120
2121
LogicVRegister Simulator::andv(VectorFormat vform,
2122
LogicVRegister dst,
2123
const LogicPRegister& pg,
2124
const LogicVRegister& src) {
2125
VIXL_ASSERT(IsSVEFormat(vform));
2126
uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2127
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2128
if (!pg.IsActive(vform, i)) continue;
2129
2130
result &= src.Uint(vform, i);
2131
}
2132
VectorFormat vform_dst =
2133
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2134
dst.ClearForWrite(vform_dst);
2135
dst.SetUint(vform_dst, 0, result);
2136
return dst;
2137
}
2138
2139
2140
LogicVRegister Simulator::eorv(VectorFormat vform,
2141
LogicVRegister dst,
2142
const LogicPRegister& pg,
2143
const LogicVRegister& src) {
2144
VIXL_ASSERT(IsSVEFormat(vform));
2145
uint64_t result = 0;
2146
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2147
if (!pg.IsActive(vform, i)) continue;
2148
2149
result ^= src.Uint(vform, i);
2150
}
2151
VectorFormat vform_dst =
2152
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2153
dst.ClearForWrite(vform_dst);
2154
dst.SetUint(vform_dst, 0, result);
2155
return dst;
2156
}
2157
2158
2159
LogicVRegister Simulator::orv(VectorFormat vform,
2160
LogicVRegister dst,
2161
const LogicPRegister& pg,
2162
const LogicVRegister& src) {
2163
VIXL_ASSERT(IsSVEFormat(vform));
2164
uint64_t result = 0;
2165
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2166
if (!pg.IsActive(vform, i)) continue;
2167
2168
result |= src.Uint(vform, i);
2169
}
2170
VectorFormat vform_dst =
2171
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2172
dst.ClearForWrite(vform_dst);
2173
dst.SetUint(vform_dst, 0, result);
2174
return dst;
2175
}
2176
2177
2178
LogicVRegister Simulator::saddv(VectorFormat vform,
2179
LogicVRegister dst,
2180
const LogicPRegister& pg,
2181
const LogicVRegister& src) {
2182
VIXL_ASSERT(IsSVEFormat(vform));
2183
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2184
int64_t result = 0;
2185
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2186
if (!pg.IsActive(vform, i)) continue;
2187
2188
// The destination register always has D-lane sizes and the source register
2189
// always has S-lanes or smaller, so signed integer overflow -- undefined
2190
// behaviour -- can't occur.
2191
result += src.Int(vform, i);
2192
}
2193
2194
dst.ClearForWrite(kFormatD);
2195
dst.SetInt(kFormatD, 0, result);
2196
return dst;
2197
}
2198
2199
2200
LogicVRegister Simulator::uaddv(VectorFormat vform,
2201
LogicVRegister dst,
2202
const LogicPRegister& pg,
2203
const LogicVRegister& src) {
2204
VIXL_ASSERT(IsSVEFormat(vform));
2205
uint64_t result = 0;
2206
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2207
if (!pg.IsActive(vform, i)) continue;
2208
2209
result += src.Uint(vform, i);
2210
}
2211
2212
dst.ClearForWrite(kFormatD);
2213
dst.SetUint(kFormatD, 0, result);
2214
return dst;
2215
}
2216
2217
2218
LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2219
LogicVRegister dst,
2220
bool dst_is_signed,
2221
const LogicVRegister& src,
2222
bool src_is_signed) {
2223
bool upperhalf = false;
2224
VectorFormat srcform = dstform;
2225
if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2226
(dstform == kFormat4S)) {
2227
upperhalf = true;
2228
srcform = VectorFormatHalfLanes(srcform);
2229
}
2230
srcform = VectorFormatDoubleWidth(srcform);
2231
2232
LogicVRegister src_copy = src;
2233
2234
int offset;
2235
if (upperhalf) {
2236
offset = LaneCountFromFormat(dstform) / 2;
2237
} else {
2238
offset = 0;
2239
}
2240
2241
for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2242
int64_t ssrc = src_copy.Int(srcform, i);
2243
uint64_t usrc = src_copy.Uint(srcform, i);
2244
2245
// Test for signed saturation
2246
if (ssrc > MaxIntFromFormat(dstform)) {
2247
dst.SetSignedSat(offset + i, true);
2248
} else if (ssrc < MinIntFromFormat(dstform)) {
2249
dst.SetSignedSat(offset + i, false);
2250
}
2251
2252
// Test for unsigned saturation
2253
if (src_is_signed) {
2254
if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2255
dst.SetUnsignedSat(offset + i, true);
2256
} else if (ssrc < 0) {
2257
dst.SetUnsignedSat(offset + i, false);
2258
}
2259
} else {
2260
if (usrc > MaxUintFromFormat(dstform)) {
2261
dst.SetUnsignedSat(offset + i, true);
2262
}
2263
}
2264
2265
int64_t result;
2266
if (src_is_signed) {
2267
result = ssrc & MaxUintFromFormat(dstform);
2268
} else {
2269
result = usrc & MaxUintFromFormat(dstform);
2270
}
2271
2272
if (dst_is_signed) {
2273
dst.SetInt(dstform, offset + i, result);
2274
} else {
2275
dst.SetUint(dstform, offset + i, result);
2276
}
2277
}
2278
2279
if (upperhalf) {
2280
// Clear any bits beyond a Q register.
2281
dst.ClearForWrite(kFormat16B);
2282
} else {
2283
dst.ClearForWrite(dstform);
2284
}
2285
return dst;
2286
}
2287
2288
2289
LogicVRegister Simulator::xtn(VectorFormat vform,
2290
LogicVRegister dst,
2291
const LogicVRegister& src) {
2292
return extractnarrow(vform, dst, true, src, true);
2293
}
2294
2295
2296
LogicVRegister Simulator::sqxtn(VectorFormat vform,
2297
LogicVRegister dst,
2298
const LogicVRegister& src) {
2299
return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2300
}
2301
2302
2303
LogicVRegister Simulator::sqxtun(VectorFormat vform,
2304
LogicVRegister dst,
2305
const LogicVRegister& src) {
2306
return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2307
}
2308
2309
2310
LogicVRegister Simulator::uqxtn(VectorFormat vform,
2311
LogicVRegister dst,
2312
const LogicVRegister& src) {
2313
return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2314
}
2315
2316
2317
LogicVRegister Simulator::absdiff(VectorFormat vform,
2318
LogicVRegister dst,
2319
const LogicVRegister& src1,
2320
const LogicVRegister& src2,
2321
bool is_signed) {
2322
dst.ClearForWrite(vform);
2323
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2324
bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2325
: (src1.Uint(vform, i) > src2.Uint(vform, i));
2326
// Always calculate the answer using unsigned arithmetic, to avoid
2327
// implementation-defined signed overflow.
2328
if (src1_gt_src2) {
2329
dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2330
} else {
2331
dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2332
}
2333
}
2334
return dst;
2335
}
2336
2337
2338
LogicVRegister Simulator::saba(VectorFormat vform,
2339
LogicVRegister dst,
2340
const LogicVRegister& src1,
2341
const LogicVRegister& src2) {
2342
SimVRegister temp;
2343
dst.ClearForWrite(vform);
2344
absdiff(vform, temp, src1, src2, true);
2345
add(vform, dst, dst, temp);
2346
return dst;
2347
}
2348
2349
2350
LogicVRegister Simulator::uaba(VectorFormat vform,
2351
LogicVRegister dst,
2352
const LogicVRegister& src1,
2353
const LogicVRegister& src2) {
2354
SimVRegister temp;
2355
dst.ClearForWrite(vform);
2356
absdiff(vform, temp, src1, src2, false);
2357
add(vform, dst, dst, temp);
2358
return dst;
2359
}
2360
2361
2362
LogicVRegister Simulator::not_(VectorFormat vform,
2363
LogicVRegister dst,
2364
const LogicVRegister& src) {
2365
dst.ClearForWrite(vform);
2366
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2367
dst.SetUint(vform, i, ~src.Uint(vform, i));
2368
}
2369
return dst;
2370
}
2371
2372
2373
LogicVRegister Simulator::rbit(VectorFormat vform,
2374
LogicVRegister dst,
2375
const LogicVRegister& src) {
2376
uint64_t result[kZRegMaxSizeInBytes];
2377
int lane_count = LaneCountFromFormat(vform);
2378
int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2379
uint64_t reversed_value;
2380
uint64_t value;
2381
for (int i = 0; i < lane_count; i++) {
2382
value = src.Uint(vform, i);
2383
reversed_value = 0;
2384
for (int j = 0; j < lane_size_in_bits; j++) {
2385
reversed_value = (reversed_value << 1) | (value & 1);
2386
value >>= 1;
2387
}
2388
result[i] = reversed_value;
2389
}
2390
2391
dst.ClearForWrite(vform);
2392
for (int i = 0; i < lane_count; ++i) {
2393
dst.SetUint(vform, i, result[i]);
2394
}
2395
return dst;
2396
}
2397
2398
2399
LogicVRegister Simulator::rev(VectorFormat vform,
2400
LogicVRegister dst,
2401
const LogicVRegister& src) {
2402
VIXL_ASSERT(IsSVEFormat(vform));
2403
int lane_count = LaneCountFromFormat(vform);
2404
for (int i = 0; i < lane_count / 2; i++) {
2405
uint64_t t = src.Uint(vform, i);
2406
dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2407
dst.SetUint(vform, lane_count - i - 1, t);
2408
}
2409
return dst;
2410
}
2411
2412
2413
LogicVRegister Simulator::rev_byte(VectorFormat vform,
2414
LogicVRegister dst,
2415
const LogicVRegister& src,
2416
int rev_size) {
2417
uint64_t result[kZRegMaxSizeInBytes] = {};
2418
int lane_count = LaneCountFromFormat(vform);
2419
int lane_size = LaneSizeInBytesFromFormat(vform);
2420
int lanes_per_loop = rev_size / lane_size;
2421
for (int i = 0; i < lane_count; i += lanes_per_loop) {
2422
for (int j = 0; j < lanes_per_loop; j++) {
2423
result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2424
}
2425
}
2426
dst.ClearForWrite(vform);
2427
for (int i = 0; i < lane_count; ++i) {
2428
dst.SetUint(vform, i, result[i]);
2429
}
2430
return dst;
2431
}
2432
2433
2434
LogicVRegister Simulator::rev16(VectorFormat vform,
2435
LogicVRegister dst,
2436
const LogicVRegister& src) {
2437
return rev_byte(vform, dst, src, 2);
2438
}
2439
2440
2441
LogicVRegister Simulator::rev32(VectorFormat vform,
2442
LogicVRegister dst,
2443
const LogicVRegister& src) {
2444
return rev_byte(vform, dst, src, 4);
2445
}
2446
2447
2448
LogicVRegister Simulator::rev64(VectorFormat vform,
2449
LogicVRegister dst,
2450
const LogicVRegister& src) {
2451
return rev_byte(vform, dst, src, 8);
2452
}
2453
2454
LogicVRegister Simulator::addlp(VectorFormat vform,
2455
LogicVRegister dst,
2456
const LogicVRegister& src,
2457
bool is_signed,
2458
bool do_accumulate) {
2459
VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2460
VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2461
2462
uint64_t result[kZRegMaxSizeInBytes];
2463
int lane_count = LaneCountFromFormat(vform);
2464
for (int i = 0; i < lane_count; i++) {
2465
if (is_signed) {
2466
result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2467
src.Int(vformsrc, 2 * i + 1));
2468
} else {
2469
result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2470
}
2471
}
2472
2473
dst.ClearForWrite(vform);
2474
for (int i = 0; i < lane_count; ++i) {
2475
if (do_accumulate) {
2476
result[i] += dst.Uint(vform, i);
2477
}
2478
dst.SetUint(vform, i, result[i]);
2479
}
2480
2481
return dst;
2482
}
2483
2484
2485
LogicVRegister Simulator::saddlp(VectorFormat vform,
2486
LogicVRegister dst,
2487
const LogicVRegister& src) {
2488
return addlp(vform, dst, src, true, false);
2489
}
2490
2491
2492
LogicVRegister Simulator::uaddlp(VectorFormat vform,
2493
LogicVRegister dst,
2494
const LogicVRegister& src) {
2495
return addlp(vform, dst, src, false, false);
2496
}
2497
2498
2499
LogicVRegister Simulator::sadalp(VectorFormat vform,
2500
LogicVRegister dst,
2501
const LogicVRegister& src) {
2502
return addlp(vform, dst, src, true, true);
2503
}
2504
2505
2506
LogicVRegister Simulator::uadalp(VectorFormat vform,
2507
LogicVRegister dst,
2508
const LogicVRegister& src) {
2509
return addlp(vform, dst, src, false, true);
2510
}
2511
2512
LogicVRegister Simulator::ror(VectorFormat vform,
2513
LogicVRegister dst,
2514
const LogicVRegister& src,
2515
int rotation) {
2516
int width = LaneSizeInBitsFromFormat(vform);
2517
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2518
uint64_t value = src.Uint(vform, i);
2519
dst.SetUint(vform, i, RotateRight(value, rotation, width));
2520
}
2521
return dst;
2522
}
2523
2524
LogicVRegister Simulator::ext(VectorFormat vform,
2525
LogicVRegister dst,
2526
const LogicVRegister& src1,
2527
const LogicVRegister& src2,
2528
int index) {
2529
uint8_t result[kZRegMaxSizeInBytes] = {};
2530
int lane_count = LaneCountFromFormat(vform);
2531
for (int i = 0; i < lane_count - index; ++i) {
2532
result[i] = src1.Uint(vform, i + index);
2533
}
2534
for (int i = 0; i < index; ++i) {
2535
result[lane_count - index + i] = src2.Uint(vform, i);
2536
}
2537
dst.ClearForWrite(vform);
2538
for (int i = 0; i < lane_count; ++i) {
2539
dst.SetUint(vform, i, result[i]);
2540
}
2541
return dst;
2542
}
2543
2544
LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2545
LogicVRegister dst,
2546
const LogicVRegister& src,
2547
int index) {
2548
if (index < 0) index += LaneCountFromFormat(vform);
2549
VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2550
index *= LaneSizeInBytesFromFormat(vform);
2551
return ext(kFormatVnB, dst, src, src, index);
2552
}
2553
2554
2555
template <typename T>
2556
LogicVRegister Simulator::fadda(VectorFormat vform,
2557
LogicVRegister acc,
2558
const LogicPRegister& pg,
2559
const LogicVRegister& src) {
2560
T result = acc.Float<T>(0);
2561
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2562
if (!pg.IsActive(vform, i)) continue;
2563
2564
result = FPAdd(result, src.Float<T>(i));
2565
}
2566
VectorFormat vform_dst =
2567
ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2568
acc.ClearForWrite(vform_dst);
2569
acc.SetFloat(0, result);
2570
return acc;
2571
}
2572
2573
LogicVRegister Simulator::fadda(VectorFormat vform,
2574
LogicVRegister acc,
2575
const LogicPRegister& pg,
2576
const LogicVRegister& src) {
2577
switch (LaneSizeInBitsFromFormat(vform)) {
2578
case kHRegSize:
2579
fadda<SimFloat16>(vform, acc, pg, src);
2580
break;
2581
case kSRegSize:
2582
fadda<float>(vform, acc, pg, src);
2583
break;
2584
case kDRegSize:
2585
fadda<double>(vform, acc, pg, src);
2586
break;
2587
default:
2588
VIXL_UNREACHABLE();
2589
}
2590
return acc;
2591
}
2592
2593
template <typename T>
2594
LogicVRegister Simulator::fcadd(VectorFormat vform,
2595
LogicVRegister dst, // d
2596
const LogicVRegister& src1, // n
2597
const LogicVRegister& src2, // m
2598
int rot) {
2599
int elements = LaneCountFromFormat(vform);
2600
2601
T element1, element3;
2602
rot = (rot == 1) ? 270 : 90;
2603
2604
// Loop example:
2605
// 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2606
// 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2607
2608
for (int e = 0; e <= (elements / 2) - 1; e++) {
2609
switch (rot) {
2610
case 90:
2611
element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2612
element3 = src2.Float<T>(e * 2);
2613
break;
2614
case 270:
2615
element1 = src2.Float<T>(e * 2 + 1);
2616
element3 = FPNeg(src2.Float<T>(e * 2));
2617
break;
2618
default:
2619
VIXL_UNREACHABLE();
2620
return dst; // prevents "element(n) may be unintialized" errors
2621
}
2622
dst.ClearForWrite(vform);
2623
dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2624
dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2625
}
2626
return dst;
2627
}
2628
2629
2630
LogicVRegister Simulator::fcadd(VectorFormat vform,
2631
LogicVRegister dst, // d
2632
const LogicVRegister& src1, // n
2633
const LogicVRegister& src2, // m
2634
int rot) {
2635
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2636
fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2637
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2638
fcadd<float>(vform, dst, src1, src2, rot);
2639
} else {
2640
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2641
fcadd<double>(vform, dst, src1, src2, rot);
2642
}
2643
return dst;
2644
}
2645
2646
template <typename T>
2647
LogicVRegister Simulator::fcmla(VectorFormat vform,
2648
LogicVRegister dst,
2649
const LogicVRegister& src1,
2650
const LogicVRegister& src2,
2651
const LogicVRegister& acc,
2652
int index,
2653
int rot) {
2654
int elements = LaneCountFromFormat(vform);
2655
2656
T element1, element2, element3, element4;
2657
rot *= 90;
2658
2659
// Loop example:
2660
// 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2661
// 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2662
2663
for (int e = 0; e <= (elements / 2) - 1; e++) {
2664
// Index == -1 indicates a vector/vector rather than vector/indexed-element
2665
// operation.
2666
int f = (index < 0) ? e : index;
2667
2668
switch (rot) {
2669
case 0:
2670
element1 = src2.Float<T>(f * 2);
2671
element2 = src1.Float<T>(e * 2);
2672
element3 = src2.Float<T>(f * 2 + 1);
2673
element4 = src1.Float<T>(e * 2);
2674
break;
2675
case 90:
2676
element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2677
element2 = src1.Float<T>(e * 2 + 1);
2678
element3 = src2.Float<T>(f * 2);
2679
element4 = src1.Float<T>(e * 2 + 1);
2680
break;
2681
case 180:
2682
element1 = FPNeg(src2.Float<T>(f * 2));
2683
element2 = src1.Float<T>(e * 2);
2684
element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2685
element4 = src1.Float<T>(e * 2);
2686
break;
2687
case 270:
2688
element1 = src2.Float<T>(f * 2 + 1);
2689
element2 = src1.Float<T>(e * 2 + 1);
2690
element3 = FPNeg(src2.Float<T>(f * 2));
2691
element4 = src1.Float<T>(e * 2 + 1);
2692
break;
2693
default:
2694
VIXL_UNREACHABLE();
2695
return dst; // prevents "element(n) may be unintialized" errors
2696
}
2697
dst.ClearForWrite(vform);
2698
dst.SetFloat<T>(vform,
2699
e * 2,
2700
FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2701
dst.SetFloat<T>(vform,
2702
e * 2 + 1,
2703
FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2704
}
2705
return dst;
2706
}
2707
2708
LogicVRegister Simulator::fcmla(VectorFormat vform,
2709
LogicVRegister dst,
2710
const LogicVRegister& src1,
2711
const LogicVRegister& src2,
2712
const LogicVRegister& acc,
2713
int rot) {
2714
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2715
fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2716
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2717
fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2718
} else {
2719
fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2720
}
2721
return dst;
2722
}
2723
2724
2725
LogicVRegister Simulator::fcmla(VectorFormat vform,
2726
LogicVRegister dst, // d
2727
const LogicVRegister& src1, // n
2728
const LogicVRegister& src2, // m
2729
int index,
2730
int rot) {
2731
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2732
fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);
2733
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2734
fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2735
} else {
2736
fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2737
}
2738
return dst;
2739
}
2740
2741
LogicVRegister Simulator::cadd(VectorFormat vform,
2742
LogicVRegister dst,
2743
const LogicVRegister& src1,
2744
const LogicVRegister& src2,
2745
int rot,
2746
bool saturate) {
2747
SimVRegister src1_r, src1_i;
2748
SimVRegister src2_r, src2_i;
2749
SimVRegister zero;
2750
zero.Clear();
2751
uzp1(vform, src1_r, src1, zero);
2752
uzp2(vform, src1_i, src1, zero);
2753
uzp1(vform, src2_r, src2, zero);
2754
uzp2(vform, src2_i, src2, zero);
2755
2756
if (rot == 90) {
2757
if (saturate) {
2758
sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2759
add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2760
} else {
2761
sub(vform, src1_r, src1_r, src2_i);
2762
add(vform, src1_i, src1_i, src2_r);
2763
}
2764
} else {
2765
VIXL_ASSERT(rot == 270);
2766
if (saturate) {
2767
add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2768
sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2769
} else {
2770
add(vform, src1_r, src1_r, src2_i);
2771
sub(vform, src1_i, src1_i, src2_r);
2772
}
2773
}
2774
2775
zip1(vform, dst, src1_r, src1_i);
2776
return dst;
2777
}
2778
2779
LogicVRegister Simulator::cmla(VectorFormat vform,
2780
LogicVRegister dst,
2781
const LogicVRegister& srca,
2782
const LogicVRegister& src1,
2783
const LogicVRegister& src2,
2784
int rot) {
2785
SimVRegister src1_a;
2786
SimVRegister src2_a, src2_b;
2787
SimVRegister srca_i, srca_r;
2788
SimVRegister zero, temp;
2789
zero.Clear();
2790
2791
if ((rot == 0) || (rot == 180)) {
2792
uzp1(vform, src1_a, src1, zero);
2793
uzp1(vform, src2_a, src2, zero);
2794
uzp2(vform, src2_b, src2, zero);
2795
} else {
2796
uzp2(vform, src1_a, src1, zero);
2797
uzp2(vform, src2_a, src2, zero);
2798
uzp1(vform, src2_b, src2, zero);
2799
}
2800
2801
uzp1(vform, srca_r, srca, zero);
2802
uzp2(vform, srca_i, srca, zero);
2803
2804
bool sub_r = (rot == 90) || (rot == 180);
2805
bool sub_i = (rot == 180) || (rot == 270);
2806
2807
mul(vform, temp, src1_a, src2_a);
2808
if (sub_r) {
2809
sub(vform, srca_r, srca_r, temp);
2810
} else {
2811
add(vform, srca_r, srca_r, temp);
2812
}
2813
2814
mul(vform, temp, src1_a, src2_b);
2815
if (sub_i) {
2816
sub(vform, srca_i, srca_i, temp);
2817
} else {
2818
add(vform, srca_i, srca_i, temp);
2819
}
2820
2821
zip1(vform, dst, srca_r, srca_i);
2822
return dst;
2823
}
2824
2825
LogicVRegister Simulator::cmla(VectorFormat vform,
2826
LogicVRegister dst,
2827
const LogicVRegister& srca,
2828
const LogicVRegister& src1,
2829
const LogicVRegister& src2,
2830
int index,
2831
int rot) {
2832
SimVRegister temp;
2833
dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2834
return cmla(vform, dst, srca, src1, temp, rot);
2835
}
2836
2837
LogicVRegister Simulator::bgrp(VectorFormat vform,
2838
LogicVRegister dst,
2839
const LogicVRegister& src1,
2840
const LogicVRegister& src2,
2841
bool do_bext) {
2842
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2843
uint64_t value = src1.Uint(vform, i);
2844
uint64_t mask = src2.Uint(vform, i);
2845
int high_pos = 0;
2846
int low_pos = 0;
2847
uint64_t result_high = 0;
2848
uint64_t result_low = 0;
2849
for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2850
if ((mask & 1) == 0) {
2851
result_high |= (value & 1) << high_pos;
2852
high_pos++;
2853
} else {
2854
result_low |= (value & 1) << low_pos;
2855
low_pos++;
2856
}
2857
mask >>= 1;
2858
value >>= 1;
2859
}
2860
2861
if (!do_bext) {
2862
result_low |= result_high << low_pos;
2863
}
2864
2865
dst.SetUint(vform, i, result_low);
2866
}
2867
return dst;
2868
}
2869
2870
LogicVRegister Simulator::bdep(VectorFormat vform,
2871
LogicVRegister dst,
2872
const LogicVRegister& src1,
2873
const LogicVRegister& src2) {
2874
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2875
uint64_t value = src1.Uint(vform, i);
2876
uint64_t mask = src2.Uint(vform, i);
2877
uint64_t result = 0;
2878
for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2879
if ((mask & 1) == 1) {
2880
result |= (value & 1) << j;
2881
value >>= 1;
2882
}
2883
mask >>= 1;
2884
}
2885
dst.SetUint(vform, i, result);
2886
}
2887
return dst;
2888
}
2889
2890
LogicVRegister Simulator::histogram(VectorFormat vform,
2891
LogicVRegister dst,
2892
const LogicPRegister& pg,
2893
const LogicVRegister& src1,
2894
const LogicVRegister& src2,
2895
bool do_segmented) {
2896
int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2897
uint64_t result[kZRegMaxSizeInBytes];
2898
2899
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2900
uint64_t count = 0;
2901
uint64_t value = src1.Uint(vform, i);
2902
2903
int segment = do_segmented ? (i / elements_per_segment) : 0;
2904
int segment_offset = segment * elements_per_segment;
2905
int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2906
for (int j = 0; j < hist_limit; j++) {
2907
if (pg.IsActive(vform, j) &&
2908
(value == src2.Uint(vform, j + segment_offset))) {
2909
count++;
2910
}
2911
}
2912
result[i] = count;
2913
}
2914
dst.SetUintArray(vform, result);
2915
return dst;
2916
}
2917
2918
LogicVRegister Simulator::dup_element(VectorFormat vform,
2919
LogicVRegister dst,
2920
const LogicVRegister& src,
2921
int src_index) {
2922
if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2923
// When duplicating an element larger than 64 bits, split the element into
2924
// 64-bit parts, and duplicate the parts across the destination.
2925
uint64_t d[4];
2926
int count = (vform == kFormatVnQ) ? 2 : 4;
2927
for (int i = 0; i < count; i++) {
2928
d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2929
}
2930
dst.Clear();
2931
for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2932
dst.SetUint(kFormatVnD, i, d[i % count]);
2933
}
2934
} else {
2935
int lane_count = LaneCountFromFormat(vform);
2936
uint64_t value = src.Uint(vform, src_index);
2937
dst.ClearForWrite(vform);
2938
for (int i = 0; i < lane_count; ++i) {
2939
dst.SetUint(vform, i, value);
2940
}
2941
}
2942
return dst;
2943
}
2944
2945
LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2946
LogicVRegister dst,
2947
const LogicVRegister& src,
2948
int src_index) {
2949
// In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2950
// whereas in NEON, the size of segment is equal to the size of register
2951
// itself.
2952
int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2953
VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2954
int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2955
2956
VIXL_ASSERT(src_index >= 0);
2957
VIXL_ASSERT(src_index < lanes_per_segment);
2958
2959
dst.ClearForWrite(vform);
2960
for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2961
uint64_t value = src.Uint(vform, j + src_index);
2962
for (int i = 0; i < lanes_per_segment; i++) {
2963
dst.SetUint(vform, j + i, value);
2964
}
2965
}
2966
return dst;
2967
}
2968
2969
LogicVRegister Simulator::dup_elements_to_segments(
2970
VectorFormat vform,
2971
LogicVRegister dst,
2972
const std::pair<int, int>& src_and_index) {
2973
return dup_elements_to_segments(vform,
2974
dst,
2975
ReadVRegister(src_and_index.first),
2976
src_and_index.second);
2977
}
2978
2979
LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2980
LogicVRegister dst,
2981
uint64_t imm) {
2982
int lane_count = LaneCountFromFormat(vform);
2983
uint64_t value = imm & MaxUintFromFormat(vform);
2984
dst.ClearForWrite(vform);
2985
for (int i = 0; i < lane_count; ++i) {
2986
dst.SetUint(vform, i, value);
2987
}
2988
return dst;
2989
}
2990
2991
2992
LogicVRegister Simulator::ins_element(VectorFormat vform,
2993
LogicVRegister dst,
2994
int dst_index,
2995
const LogicVRegister& src,
2996
int src_index) {
2997
dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2998
return dst;
2999
}
3000
3001
3002
LogicVRegister Simulator::ins_immediate(VectorFormat vform,
3003
LogicVRegister dst,
3004
int dst_index,
3005
uint64_t imm) {
3006
uint64_t value = imm & MaxUintFromFormat(vform);
3007
dst.SetUint(vform, dst_index, value);
3008
return dst;
3009
}
3010
3011
3012
LogicVRegister Simulator::index(VectorFormat vform,
3013
LogicVRegister dst,
3014
uint64_t start,
3015
uint64_t step) {
3016
VIXL_ASSERT(IsSVEFormat(vform));
3017
uint64_t value = start;
3018
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3019
dst.SetUint(vform, i, value);
3020
value += step;
3021
}
3022
return dst;
3023
}
3024
3025
3026
LogicVRegister Simulator::insr(VectorFormat vform,
3027
LogicVRegister dst,
3028
uint64_t imm) {
3029
VIXL_ASSERT(IsSVEFormat(vform));
3030
for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3031
dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3032
}
3033
dst.SetUint(vform, 0, imm);
3034
return dst;
3035
}
3036
3037
3038
LogicVRegister Simulator::mov(VectorFormat vform,
3039
LogicVRegister dst,
3040
const LogicVRegister& src) {
3041
dst.ClearForWrite(vform);
3042
for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3043
dst.SetUint(vform, lane, src.Uint(vform, lane));
3044
}
3045
return dst;
3046
}
3047
3048
3049
LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3050
// Avoid a copy if the registers already alias.
3051
if (dst.Aliases(src)) return dst;
3052
3053
for (int i = 0; i < dst.GetChunkCount(); i++) {
3054
dst.SetChunk(i, src.GetChunk(i));
3055
}
3056
return dst;
3057
}
3058
3059
3060
LogicVRegister Simulator::mov_merging(VectorFormat vform,
3061
LogicVRegister dst,
3062
const SimPRegister& pg,
3063
const LogicVRegister& src) {
3064
return sel(vform, dst, pg, src, dst);
3065
}
3066
3067
LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3068
LogicVRegister dst,
3069
const SimPRegister& pg,
3070
const LogicVRegister& src) {
3071
SimVRegister zero;
3072
dup_immediate(vform, zero, 0);
3073
return sel(vform, dst, pg, src, zero);
3074
}
3075
3076
LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3077
LogicVRegister dst,
3078
const LogicVRegister& src,
3079
int start_at) {
3080
VIXL_ASSERT((start_at == 0) || (start_at == 1));
3081
for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3082
dst.SetUint(vform, i, src.Uint(vform, i));
3083
}
3084
return dst;
3085
}
3086
3087
LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3088
const LogicPRegister& pg,
3089
const LogicPRegister& src) {
3090
return sel(dst, pg, src, dst);
3091
}
3092
3093
LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3094
const LogicPRegister& pg,
3095
const LogicPRegister& src) {
3096
SimPRegister all_false;
3097
return sel(dst, pg, src, pfalse(all_false));
3098
}
3099
3100
LogicVRegister Simulator::movi(VectorFormat vform,
3101
LogicVRegister dst,
3102
uint64_t imm) {
3103
int lane_count = LaneCountFromFormat(vform);
3104
dst.ClearForWrite(vform);
3105
for (int i = 0; i < lane_count; ++i) {
3106
dst.SetUint(vform, i, imm);
3107
}
3108
return dst;
3109
}
3110
3111
3112
LogicVRegister Simulator::mvni(VectorFormat vform,
3113
LogicVRegister dst,
3114
uint64_t imm) {
3115
int lane_count = LaneCountFromFormat(vform);
3116
dst.ClearForWrite(vform);
3117
for (int i = 0; i < lane_count; ++i) {
3118
dst.SetUint(vform, i, ~imm);
3119
}
3120
return dst;
3121
}
3122
3123
3124
LogicVRegister Simulator::orr(VectorFormat vform,
3125
LogicVRegister dst,
3126
const LogicVRegister& src,
3127
uint64_t imm) {
3128
uint64_t result[16];
3129
int lane_count = LaneCountFromFormat(vform);
3130
for (int i = 0; i < lane_count; ++i) {
3131
result[i] = src.Uint(vform, i) | imm;
3132
}
3133
dst.ClearForWrite(vform);
3134
for (int i = 0; i < lane_count; ++i) {
3135
dst.SetUint(vform, i, result[i]);
3136
}
3137
return dst;
3138
}
3139
3140
3141
LogicVRegister Simulator::uxtl(VectorFormat vform,
3142
LogicVRegister dst,
3143
const LogicVRegister& src,
3144
bool is_2) {
3145
VectorFormat vform_half = VectorFormatHalfWidth(vform);
3146
int lane_count = LaneCountFromFormat(vform);
3147
int src_offset = is_2 ? lane_count : 0;
3148
3149
dst.ClearForWrite(vform);
3150
for (int i = 0; i < lane_count; i++) {
3151
dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3152
}
3153
return dst;
3154
}
3155
3156
3157
LogicVRegister Simulator::sxtl(VectorFormat vform,
3158
LogicVRegister dst,
3159
const LogicVRegister& src,
3160
bool is_2) {
3161
VectorFormat vform_half = VectorFormatHalfWidth(vform);
3162
int lane_count = LaneCountFromFormat(vform);
3163
int src_offset = is_2 ? lane_count : 0;
3164
3165
dst.ClearForWrite(vform);
3166
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3167
dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3168
}
3169
return dst;
3170
}
3171
3172
3173
LogicVRegister Simulator::uxtl2(VectorFormat vform,
3174
LogicVRegister dst,
3175
const LogicVRegister& src) {
3176
return uxtl(vform, dst, src, /* is_2 = */ true);
3177
}
3178
3179
3180
LogicVRegister Simulator::sxtl2(VectorFormat vform,
3181
LogicVRegister dst,
3182
const LogicVRegister& src) {
3183
return sxtl(vform, dst, src, /* is_2 = */ true);
3184
}
3185
3186
3187
LogicVRegister Simulator::uxt(VectorFormat vform,
3188
LogicVRegister dst,
3189
const LogicVRegister& src,
3190
unsigned from_size_in_bits) {
3191
int lane_count = LaneCountFromFormat(vform);
3192
uint64_t mask = GetUintMask(from_size_in_bits);
3193
3194
dst.ClearForWrite(vform);
3195
for (int i = 0; i < lane_count; i++) {
3196
dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3197
}
3198
return dst;
3199
}
3200
3201
3202
LogicVRegister Simulator::sxt(VectorFormat vform,
3203
LogicVRegister dst,
3204
const LogicVRegister& src,
3205
unsigned from_size_in_bits) {
3206
int lane_count = LaneCountFromFormat(vform);
3207
3208
dst.ClearForWrite(vform);
3209
for (int i = 0; i < lane_count; i++) {
3210
uint64_t value =
3211
ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3212
dst.SetInt(vform, i, value);
3213
}
3214
return dst;
3215
}
3216
3217
3218
LogicVRegister Simulator::shrn(VectorFormat vform,
3219
LogicVRegister dst,
3220
const LogicVRegister& src,
3221
int shift) {
3222
SimVRegister temp;
3223
VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3224
VectorFormat vform_dst = vform;
3225
LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3226
return extractnarrow(vform_dst, dst, false, shifted_src, false);
3227
}
3228
3229
3230
LogicVRegister Simulator::shrn2(VectorFormat vform,
3231
LogicVRegister dst,
3232
const LogicVRegister& src,
3233
int shift) {
3234
SimVRegister temp;
3235
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3236
VectorFormat vformdst = vform;
3237
LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3238
return extractnarrow(vformdst, dst, false, shifted_src, false);
3239
}
3240
3241
3242
LogicVRegister Simulator::rshrn(VectorFormat vform,
3243
LogicVRegister dst,
3244
const LogicVRegister& src,
3245
int shift) {
3246
SimVRegister temp;
3247
VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3248
VectorFormat vformdst = vform;
3249
LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3250
return extractnarrow(vformdst, dst, false, shifted_src, false);
3251
}
3252
3253
3254
LogicVRegister Simulator::rshrn2(VectorFormat vform,
3255
LogicVRegister dst,
3256
const LogicVRegister& src,
3257
int shift) {
3258
SimVRegister temp;
3259
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3260
VectorFormat vformdst = vform;
3261
LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3262
return extractnarrow(vformdst, dst, false, shifted_src, false);
3263
}
3264
3265
LogicVRegister Simulator::Table(VectorFormat vform,
3266
LogicVRegister dst,
3267
const LogicVRegister& ind,
3268
bool zero_out_of_bounds,
3269
const LogicVRegister* tab1,
3270
const LogicVRegister* tab2,
3271
const LogicVRegister* tab3,
3272
const LogicVRegister* tab4) {
3273
VIXL_ASSERT(tab1 != NULL);
3274
int lane_count = LaneCountFromFormat(vform);
3275
VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3276
uint64_t table[kZRegMaxSizeInBytes * 2];
3277
uint64_t result[kZRegMaxSizeInBytes];
3278
3279
// For Neon, the table source registers are always 16B, and Neon allows only
3280
// 8B or 16B vform for the destination, so infer the table format from the
3281
// destination.
3282
VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3283
3284
uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3285
if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3286
if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3287
if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3288
3289
for (int i = 0; i < lane_count; i++) {
3290
uint64_t index = ind.Uint(vform, i);
3291
result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3292
if (index < tab_size) result[i] = table[index];
3293
}
3294
dst.SetUintArray(vform, result);
3295
return dst;
3296
}
3297
3298
LogicVRegister Simulator::tbl(VectorFormat vform,
3299
LogicVRegister dst,
3300
const LogicVRegister& tab,
3301
const LogicVRegister& ind) {
3302
return Table(vform, dst, ind, true, &tab);
3303
}
3304
3305
3306
LogicVRegister Simulator::tbl(VectorFormat vform,
3307
LogicVRegister dst,
3308
const LogicVRegister& tab,
3309
const LogicVRegister& tab2,
3310
const LogicVRegister& ind) {
3311
return Table(vform, dst, ind, true, &tab, &tab2);
3312
}
3313
3314
3315
LogicVRegister Simulator::tbl(VectorFormat vform,
3316
LogicVRegister dst,
3317
const LogicVRegister& tab,
3318
const LogicVRegister& tab2,
3319
const LogicVRegister& tab3,
3320
const LogicVRegister& ind) {
3321
return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3322
}
3323
3324
3325
LogicVRegister Simulator::tbl(VectorFormat vform,
3326
LogicVRegister dst,
3327
const LogicVRegister& tab,
3328
const LogicVRegister& tab2,
3329
const LogicVRegister& tab3,
3330
const LogicVRegister& tab4,
3331
const LogicVRegister& ind) {
3332
return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3333
}
3334
3335
3336
LogicVRegister Simulator::tbx(VectorFormat vform,
3337
LogicVRegister dst,
3338
const LogicVRegister& tab,
3339
const LogicVRegister& ind) {
3340
return Table(vform, dst, ind, false, &tab);
3341
}
3342
3343
3344
LogicVRegister Simulator::tbx(VectorFormat vform,
3345
LogicVRegister dst,
3346
const LogicVRegister& tab,
3347
const LogicVRegister& tab2,
3348
const LogicVRegister& ind) {
3349
return Table(vform, dst, ind, false, &tab, &tab2);
3350
}
3351
3352
3353
LogicVRegister Simulator::tbx(VectorFormat vform,
3354
LogicVRegister dst,
3355
const LogicVRegister& tab,
3356
const LogicVRegister& tab2,
3357
const LogicVRegister& tab3,
3358
const LogicVRegister& ind) {
3359
return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3360
}
3361
3362
3363
LogicVRegister Simulator::tbx(VectorFormat vform,
3364
LogicVRegister dst,
3365
const LogicVRegister& tab,
3366
const LogicVRegister& tab2,
3367
const LogicVRegister& tab3,
3368
const LogicVRegister& tab4,
3369
const LogicVRegister& ind) {
3370
return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3371
}
3372
3373
3374
LogicVRegister Simulator::uqshrn(VectorFormat vform,
3375
LogicVRegister dst,
3376
const LogicVRegister& src,
3377
int shift) {
3378
return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3379
}
3380
3381
3382
LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3383
LogicVRegister dst,
3384
const LogicVRegister& src,
3385
int shift) {
3386
return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3387
}
3388
3389
3390
LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3391
LogicVRegister dst,
3392
const LogicVRegister& src,
3393
int shift) {
3394
return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3395
}
3396
3397
3398
LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3399
LogicVRegister dst,
3400
const LogicVRegister& src,
3401
int shift) {
3402
return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3403
}
3404
3405
3406
LogicVRegister Simulator::sqshrn(VectorFormat vform,
3407
LogicVRegister dst,
3408
const LogicVRegister& src,
3409
int shift) {
3410
SimVRegister temp;
3411
VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3412
VectorFormat vformdst = vform;
3413
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3414
return sqxtn(vformdst, dst, shifted_src);
3415
}
3416
3417
3418
LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3419
LogicVRegister dst,
3420
const LogicVRegister& src,
3421
int shift) {
3422
SimVRegister temp;
3423
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3424
VectorFormat vformdst = vform;
3425
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3426
return sqxtn(vformdst, dst, shifted_src);
3427
}
3428
3429
3430
LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3431
LogicVRegister dst,
3432
const LogicVRegister& src,
3433
int shift) {
3434
SimVRegister temp;
3435
VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3436
VectorFormat vformdst = vform;
3437
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3438
return sqxtn(vformdst, dst, shifted_src);
3439
}
3440
3441
3442
LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3443
LogicVRegister dst,
3444
const LogicVRegister& src,
3445
int shift) {
3446
SimVRegister temp;
3447
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3448
VectorFormat vformdst = vform;
3449
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3450
return sqxtn(vformdst, dst, shifted_src);
3451
}
3452
3453
3454
LogicVRegister Simulator::sqshrun(VectorFormat vform,
3455
LogicVRegister dst,
3456
const LogicVRegister& src,
3457
int shift) {
3458
SimVRegister temp;
3459
VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3460
VectorFormat vformdst = vform;
3461
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3462
return sqxtun(vformdst, dst, shifted_src);
3463
}
3464
3465
3466
LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3467
LogicVRegister dst,
3468
const LogicVRegister& src,
3469
int shift) {
3470
SimVRegister temp;
3471
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3472
VectorFormat vformdst = vform;
3473
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3474
return sqxtun(vformdst, dst, shifted_src);
3475
}
3476
3477
3478
LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3479
LogicVRegister dst,
3480
const LogicVRegister& src,
3481
int shift) {
3482
SimVRegister temp;
3483
VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3484
VectorFormat vformdst = vform;
3485
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3486
return sqxtun(vformdst, dst, shifted_src);
3487
}
3488
3489
3490
LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3491
LogicVRegister dst,
3492
const LogicVRegister& src,
3493
int shift) {
3494
SimVRegister temp;
3495
VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3496
VectorFormat vformdst = vform;
3497
LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3498
return sqxtun(vformdst, dst, shifted_src);
3499
}
3500
3501
3502
LogicVRegister Simulator::uaddl(VectorFormat vform,
3503
LogicVRegister dst,
3504
const LogicVRegister& src1,
3505
const LogicVRegister& src2) {
3506
SimVRegister temp1, temp2;
3507
uxtl(vform, temp1, src1);
3508
uxtl(vform, temp2, src2);
3509
add(vform, dst, temp1, temp2);
3510
return dst;
3511
}
3512
3513
3514
LogicVRegister Simulator::uaddl2(VectorFormat vform,
3515
LogicVRegister dst,
3516
const LogicVRegister& src1,
3517
const LogicVRegister& src2) {
3518
SimVRegister temp1, temp2;
3519
uxtl2(vform, temp1, src1);
3520
uxtl2(vform, temp2, src2);
3521
add(vform, dst, temp1, temp2);
3522
return dst;
3523
}
3524
3525
3526
LogicVRegister Simulator::uaddw(VectorFormat vform,
3527
LogicVRegister dst,
3528
const LogicVRegister& src1,
3529
const LogicVRegister& src2) {
3530
SimVRegister temp;
3531
uxtl(vform, temp, src2);
3532
add(vform, dst, src1, temp);
3533
return dst;
3534
}
3535
3536
3537
LogicVRegister Simulator::uaddw2(VectorFormat vform,
3538
LogicVRegister dst,
3539
const LogicVRegister& src1,
3540
const LogicVRegister& src2) {
3541
SimVRegister temp;
3542
uxtl2(vform, temp, src2);
3543
add(vform, dst, src1, temp);
3544
return dst;
3545
}
3546
3547
3548
LogicVRegister Simulator::saddl(VectorFormat vform,
3549
LogicVRegister dst,
3550
const LogicVRegister& src1,
3551
const LogicVRegister& src2) {
3552
SimVRegister temp1, temp2;
3553
sxtl(vform, temp1, src1);
3554
sxtl(vform, temp2, src2);
3555
add(vform, dst, temp1, temp2);
3556
return dst;
3557
}
3558
3559
3560
LogicVRegister Simulator::saddl2(VectorFormat vform,
3561
LogicVRegister dst,
3562
const LogicVRegister& src1,
3563
const LogicVRegister& src2) {
3564
SimVRegister temp1, temp2;
3565
sxtl2(vform, temp1, src1);
3566
sxtl2(vform, temp2, src2);
3567
add(vform, dst, temp1, temp2);
3568
return dst;
3569
}
3570
3571
3572
LogicVRegister Simulator::saddw(VectorFormat vform,
3573
LogicVRegister dst,
3574
const LogicVRegister& src1,
3575
const LogicVRegister& src2) {
3576
SimVRegister temp;
3577
sxtl(vform, temp, src2);
3578
add(vform, dst, src1, temp);
3579
return dst;
3580
}
3581
3582
3583
LogicVRegister Simulator::saddw2(VectorFormat vform,
3584
LogicVRegister dst,
3585
const LogicVRegister& src1,
3586
const LogicVRegister& src2) {
3587
SimVRegister temp;
3588
sxtl2(vform, temp, src2);
3589
add(vform, dst, src1, temp);
3590
return dst;
3591
}
3592
3593
3594
LogicVRegister Simulator::usubl(VectorFormat vform,
3595
LogicVRegister dst,
3596
const LogicVRegister& src1,
3597
const LogicVRegister& src2) {
3598
SimVRegister temp1, temp2;
3599
uxtl(vform, temp1, src1);
3600
uxtl(vform, temp2, src2);
3601
sub(vform, dst, temp1, temp2);
3602
return dst;
3603
}
3604
3605
3606
LogicVRegister Simulator::usubl2(VectorFormat vform,
3607
LogicVRegister dst,
3608
const LogicVRegister& src1,
3609
const LogicVRegister& src2) {
3610
SimVRegister temp1, temp2;
3611
uxtl2(vform, temp1, src1);
3612
uxtl2(vform, temp2, src2);
3613
sub(vform, dst, temp1, temp2);
3614
return dst;
3615
}
3616
3617
3618
LogicVRegister Simulator::usubw(VectorFormat vform,
3619
LogicVRegister dst,
3620
const LogicVRegister& src1,
3621
const LogicVRegister& src2) {
3622
SimVRegister temp;
3623
uxtl(vform, temp, src2);
3624
sub(vform, dst, src1, temp);
3625
return dst;
3626
}
3627
3628
3629
LogicVRegister Simulator::usubw2(VectorFormat vform,
3630
LogicVRegister dst,
3631
const LogicVRegister& src1,
3632
const LogicVRegister& src2) {
3633
SimVRegister temp;
3634
uxtl2(vform, temp, src2);
3635
sub(vform, dst, src1, temp);
3636
return dst;
3637
}
3638
3639
3640
LogicVRegister Simulator::ssubl(VectorFormat vform,
3641
LogicVRegister dst,
3642
const LogicVRegister& src1,
3643
const LogicVRegister& src2) {
3644
SimVRegister temp1, temp2;
3645
sxtl(vform, temp1, src1);
3646
sxtl(vform, temp2, src2);
3647
sub(vform, dst, temp1, temp2);
3648
return dst;
3649
}
3650
3651
3652
LogicVRegister Simulator::ssubl2(VectorFormat vform,
3653
LogicVRegister dst,
3654
const LogicVRegister& src1,
3655
const LogicVRegister& src2) {
3656
SimVRegister temp1, temp2;
3657
sxtl2(vform, temp1, src1);
3658
sxtl2(vform, temp2, src2);
3659
sub(vform, dst, temp1, temp2);
3660
return dst;
3661
}
3662
3663
3664
LogicVRegister Simulator::ssubw(VectorFormat vform,
3665
LogicVRegister dst,
3666
const LogicVRegister& src1,
3667
const LogicVRegister& src2) {
3668
SimVRegister temp;
3669
sxtl(vform, temp, src2);
3670
sub(vform, dst, src1, temp);
3671
return dst;
3672
}
3673
3674
3675
LogicVRegister Simulator::ssubw2(VectorFormat vform,
3676
LogicVRegister dst,
3677
const LogicVRegister& src1,
3678
const LogicVRegister& src2) {
3679
SimVRegister temp;
3680
sxtl2(vform, temp, src2);
3681
sub(vform, dst, src1, temp);
3682
return dst;
3683
}
3684
3685
3686
LogicVRegister Simulator::uabal(VectorFormat vform,
3687
LogicVRegister dst,
3688
const LogicVRegister& src1,
3689
const LogicVRegister& src2) {
3690
SimVRegister temp1, temp2;
3691
uxtl(vform, temp1, src1);
3692
uxtl(vform, temp2, src2);
3693
uaba(vform, dst, temp1, temp2);
3694
return dst;
3695
}
3696
3697
3698
LogicVRegister Simulator::uabal2(VectorFormat vform,
3699
LogicVRegister dst,
3700
const LogicVRegister& src1,
3701
const LogicVRegister& src2) {
3702
SimVRegister temp1, temp2;
3703
uxtl2(vform, temp1, src1);
3704
uxtl2(vform, temp2, src2);
3705
uaba(vform, dst, temp1, temp2);
3706
return dst;
3707
}
3708
3709
3710
LogicVRegister Simulator::sabal(VectorFormat vform,
3711
LogicVRegister dst,
3712
const LogicVRegister& src1,
3713
const LogicVRegister& src2) {
3714
SimVRegister temp1, temp2;
3715
sxtl(vform, temp1, src1);
3716
sxtl(vform, temp2, src2);
3717
saba(vform, dst, temp1, temp2);
3718
return dst;
3719
}
3720
3721
3722
LogicVRegister Simulator::sabal2(VectorFormat vform,
3723
LogicVRegister dst,
3724
const LogicVRegister& src1,
3725
const LogicVRegister& src2) {
3726
SimVRegister temp1, temp2;
3727
sxtl2(vform, temp1, src1);
3728
sxtl2(vform, temp2, src2);
3729
saba(vform, dst, temp1, temp2);
3730
return dst;
3731
}
3732
3733
3734
LogicVRegister Simulator::uabdl(VectorFormat vform,
3735
LogicVRegister dst,
3736
const LogicVRegister& src1,
3737
const LogicVRegister& src2) {
3738
SimVRegister temp1, temp2;
3739
uxtl(vform, temp1, src1);
3740
uxtl(vform, temp2, src2);
3741
absdiff(vform, dst, temp1, temp2, false);
3742
return dst;
3743
}
3744
3745
3746
LogicVRegister Simulator::uabdl2(VectorFormat vform,
3747
LogicVRegister dst,
3748
const LogicVRegister& src1,
3749
const LogicVRegister& src2) {
3750
SimVRegister temp1, temp2;
3751
uxtl2(vform, temp1, src1);
3752
uxtl2(vform, temp2, src2);
3753
absdiff(vform, dst, temp1, temp2, false);
3754
return dst;
3755
}
3756
3757
3758
LogicVRegister Simulator::sabdl(VectorFormat vform,
3759
LogicVRegister dst,
3760
const LogicVRegister& src1,
3761
const LogicVRegister& src2) {
3762
SimVRegister temp1, temp2;
3763
sxtl(vform, temp1, src1);
3764
sxtl(vform, temp2, src2);
3765
absdiff(vform, dst, temp1, temp2, true);
3766
return dst;
3767
}
3768
3769
3770
LogicVRegister Simulator::sabdl2(VectorFormat vform,
3771
LogicVRegister dst,
3772
const LogicVRegister& src1,
3773
const LogicVRegister& src2) {
3774
SimVRegister temp1, temp2;
3775
sxtl2(vform, temp1, src1);
3776
sxtl2(vform, temp2, src2);
3777
absdiff(vform, dst, temp1, temp2, true);
3778
return dst;
3779
}
3780
3781
3782
LogicVRegister Simulator::umull(VectorFormat vform,
3783
LogicVRegister dst,
3784
const LogicVRegister& src1,
3785
const LogicVRegister& src2,
3786
bool is_2) {
3787
SimVRegister temp1, temp2;
3788
uxtl(vform, temp1, src1, is_2);
3789
uxtl(vform, temp2, src2, is_2);
3790
mul(vform, dst, temp1, temp2);
3791
return dst;
3792
}
3793
3794
3795
LogicVRegister Simulator::umull2(VectorFormat vform,
3796
LogicVRegister dst,
3797
const LogicVRegister& src1,
3798
const LogicVRegister& src2) {
3799
return umull(vform, dst, src1, src2, /* is_2 = */ true);
3800
}
3801
3802
3803
LogicVRegister Simulator::smull(VectorFormat vform,
3804
LogicVRegister dst,
3805
const LogicVRegister& src1,
3806
const LogicVRegister& src2,
3807
bool is_2) {
3808
SimVRegister temp1, temp2;
3809
sxtl(vform, temp1, src1, is_2);
3810
sxtl(vform, temp2, src2, is_2);
3811
mul(vform, dst, temp1, temp2);
3812
return dst;
3813
}
3814
3815
3816
LogicVRegister Simulator::smull2(VectorFormat vform,
3817
LogicVRegister dst,
3818
const LogicVRegister& src1,
3819
const LogicVRegister& src2) {
3820
return smull(vform, dst, src1, src2, /* is_2 = */ true);
3821
}
3822
3823
3824
LogicVRegister Simulator::umlsl(VectorFormat vform,
3825
LogicVRegister dst,
3826
const LogicVRegister& src1,
3827
const LogicVRegister& src2,
3828
bool is_2) {
3829
SimVRegister temp1, temp2;
3830
uxtl(vform, temp1, src1, is_2);
3831
uxtl(vform, temp2, src2, is_2);
3832
mls(vform, dst, dst, temp1, temp2);
3833
return dst;
3834
}
3835
3836
3837
LogicVRegister Simulator::umlsl2(VectorFormat vform,
3838
LogicVRegister dst,
3839
const LogicVRegister& src1,
3840
const LogicVRegister& src2) {
3841
return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3842
}
3843
3844
3845
LogicVRegister Simulator::smlsl(VectorFormat vform,
3846
LogicVRegister dst,
3847
const LogicVRegister& src1,
3848
const LogicVRegister& src2,
3849
bool is_2) {
3850
SimVRegister temp1, temp2;
3851
sxtl(vform, temp1, src1, is_2);
3852
sxtl(vform, temp2, src2, is_2);
3853
mls(vform, dst, dst, temp1, temp2);
3854
return dst;
3855
}
3856
3857
3858
LogicVRegister Simulator::smlsl2(VectorFormat vform,
3859
LogicVRegister dst,
3860
const LogicVRegister& src1,
3861
const LogicVRegister& src2) {
3862
return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3863
}
3864
3865
3866
LogicVRegister Simulator::umlal(VectorFormat vform,
3867
LogicVRegister dst,
3868
const LogicVRegister& src1,
3869
const LogicVRegister& src2,
3870
bool is_2) {
3871
SimVRegister temp1, temp2;
3872
uxtl(vform, temp1, src1, is_2);
3873
uxtl(vform, temp2, src2, is_2);
3874
mla(vform, dst, dst, temp1, temp2);
3875
return dst;
3876
}
3877
3878
3879
LogicVRegister Simulator::umlal2(VectorFormat vform,
3880
LogicVRegister dst,
3881
const LogicVRegister& src1,
3882
const LogicVRegister& src2) {
3883
return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3884
}
3885
3886
3887
LogicVRegister Simulator::smlal(VectorFormat vform,
3888
LogicVRegister dst,
3889
const LogicVRegister& src1,
3890
const LogicVRegister& src2,
3891
bool is_2) {
3892
SimVRegister temp1, temp2;
3893
sxtl(vform, temp1, src1, is_2);
3894
sxtl(vform, temp2, src2, is_2);
3895
mla(vform, dst, dst, temp1, temp2);
3896
return dst;
3897
}
3898
3899
3900
LogicVRegister Simulator::smlal2(VectorFormat vform,
3901
LogicVRegister dst,
3902
const LogicVRegister& src1,
3903
const LogicVRegister& src2) {
3904
return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3905
}
3906
3907
3908
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3909
LogicVRegister dst,
3910
const LogicVRegister& src1,
3911
const LogicVRegister& src2,
3912
bool is_2) {
3913
SimVRegister temp;
3914
LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3915
return add(vform, dst, dst, product).SignedSaturate(vform);
3916
}
3917
3918
3919
LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3920
LogicVRegister dst,
3921
const LogicVRegister& src1,
3922
const LogicVRegister& src2) {
3923
return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3924
}
3925
3926
3927
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3928
LogicVRegister dst,
3929
const LogicVRegister& src1,
3930
const LogicVRegister& src2,
3931
bool is_2) {
3932
SimVRegister temp;
3933
LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3934
return sub(vform, dst, dst, product).SignedSaturate(vform);
3935
}
3936
3937
3938
LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3939
LogicVRegister dst,
3940
const LogicVRegister& src1,
3941
const LogicVRegister& src2) {
3942
return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3943
}
3944
3945
3946
LogicVRegister Simulator::sqdmull(VectorFormat vform,
3947
LogicVRegister dst,
3948
const LogicVRegister& src1,
3949
const LogicVRegister& src2,
3950
bool is_2) {
3951
SimVRegister temp;
3952
LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3953
return add(vform, dst, product, product).SignedSaturate(vform);
3954
}
3955
3956
3957
LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3958
LogicVRegister dst,
3959
const LogicVRegister& src1,
3960
const LogicVRegister& src2) {
3961
return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3962
}
3963
3964
LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3965
LogicVRegister dst,
3966
const LogicVRegister& src1,
3967
const LogicVRegister& src2,
3968
bool round) {
3969
int esize = LaneSizeInBitsFromFormat(vform);
3970
3971
SimVRegister temp_lo, temp_hi;
3972
3973
// Compute low and high multiplication results.
3974
mul(vform, temp_lo, src1, src2);
3975
smulh(vform, temp_hi, src1, src2);
3976
3977
// Double by shifting high half, and adding in most-significant bit of low
3978
// half.
3979
shl(vform, temp_hi, temp_hi, 1);
3980
usra(vform, temp_hi, temp_lo, esize - 1);
3981
3982
if (round) {
3983
// Add the second (due to doubling) most-significant bit of the low half
3984
// into the result.
3985
shl(vform, temp_lo, temp_lo, 1);
3986
usra(vform, temp_hi, temp_lo, esize - 1);
3987
}
3988
3989
SimPRegister not_sat;
3990
LogicPRegister ptemp(not_sat);
3991
dst.ClearForWrite(vform);
3992
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3993
// Saturation only occurs when src1 = src2 = minimum representable value.
3994
// Check this as a special case.
3995
ptemp.SetActive(vform, i, true);
3996
if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3997
(src2.Int(vform, i) == MinIntFromFormat(vform))) {
3998
ptemp.SetActive(vform, i, false);
3999
}
4000
dst.SetInt(vform, i, MaxIntFromFormat(vform));
4001
}
4002
4003
mov_merging(vform, dst, not_sat, temp_hi);
4004
return dst;
4005
}
4006
4007
4008
LogicVRegister Simulator::dot(VectorFormat vform,
4009
LogicVRegister dst,
4010
const LogicVRegister& src1,
4011
const LogicVRegister& src2,
4012
bool is_src1_signed,
4013
bool is_src2_signed) {
4014
VectorFormat quarter_vform =
4015
VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4016
4017
dst.ClearForWrite(vform);
4018
for (int e = 0; e < LaneCountFromFormat(vform); e++) {
4019
uint64_t result = 0;
4020
int64_t element1, element2;
4021
for (int i = 0; i < 4; i++) {
4022
int index = 4 * e + i;
4023
if (is_src1_signed) {
4024
element1 = src1.Int(quarter_vform, index);
4025
} else {
4026
element1 = src1.Uint(quarter_vform, index);
4027
}
4028
if (is_src2_signed) {
4029
element2 = src2.Int(quarter_vform, index);
4030
} else {
4031
element2 = src2.Uint(quarter_vform, index);
4032
}
4033
result += element1 * element2;
4034
}
4035
dst.SetUint(vform, e, result + dst.Uint(vform, e));
4036
}
4037
return dst;
4038
}
4039
4040
4041
LogicVRegister Simulator::sdot(VectorFormat vform,
4042
LogicVRegister dst,
4043
const LogicVRegister& src1,
4044
const LogicVRegister& src2) {
4045
return dot(vform, dst, src1, src2, true, true);
4046
}
4047
4048
4049
LogicVRegister Simulator::udot(VectorFormat vform,
4050
LogicVRegister dst,
4051
const LogicVRegister& src1,
4052
const LogicVRegister& src2) {
4053
return dot(vform, dst, src1, src2, false, false);
4054
}
4055
4056
LogicVRegister Simulator::usdot(VectorFormat vform,
4057
LogicVRegister dst,
4058
const LogicVRegister& src1,
4059
const LogicVRegister& src2) {
4060
return dot(vform, dst, src1, src2, false, true);
4061
}
4062
4063
LogicVRegister Simulator::cdot(VectorFormat vform,
4064
LogicVRegister dst,
4065
const LogicVRegister& acc,
4066
const LogicVRegister& src1,
4067
const LogicVRegister& src2,
4068
int rot) {
4069
VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4070
VectorFormat quarter_vform =
4071
VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4072
4073
int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4074
int sel_b = 1 - sel_a;
4075
int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4076
4077
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4078
int64_t result = acc.Int(vform, i);
4079
for (int j = 0; j < 2; j++) {
4080
int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4081
int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4082
int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4083
int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4084
result += (r1 * r2) + (sub_i * i1 * i2);
4085
}
4086
dst.SetInt(vform, i, result);
4087
}
4088
return dst;
4089
}
4090
4091
LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4092
LogicVRegister dst,
4093
const LogicVRegister& srca,
4094
const LogicVRegister& src1,
4095
const LogicVRegister& src2,
4096
int rot) {
4097
SimVRegister src1_a, src1_b;
4098
SimVRegister src2_a, src2_b;
4099
SimVRegister srca_i, srca_r;
4100
SimVRegister zero, temp;
4101
zero.Clear();
4102
4103
if ((rot == 0) || (rot == 180)) {
4104
uzp1(vform, src1_a, src1, zero);
4105
uzp1(vform, src2_a, src2, zero);
4106
uzp2(vform, src2_b, src2, zero);
4107
} else {
4108
uzp2(vform, src1_a, src1, zero);
4109
uzp2(vform, src2_a, src2, zero);
4110
uzp1(vform, src2_b, src2, zero);
4111
}
4112
4113
uzp1(vform, srca_r, srca, zero);
4114
uzp2(vform, srca_i, srca, zero);
4115
4116
bool sub_r = (rot == 90) || (rot == 180);
4117
bool sub_i = (rot == 180) || (rot == 270);
4118
4119
const bool round = true;
4120
sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4121
sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4122
zip1(vform, dst, srca_r, srca_i);
4123
return dst;
4124
}
4125
4126
LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4127
LogicVRegister dst,
4128
const LogicVRegister& srca,
4129
const LogicVRegister& src1,
4130
const LogicVRegister& src2,
4131
int index,
4132
int rot) {
4133
SimVRegister temp;
4134
dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4135
return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4136
}
4137
4138
LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4139
LogicVRegister dst,
4140
const LogicVRegister& src1,
4141
const LogicVRegister& src2,
4142
bool round,
4143
bool sub_op) {
4144
// 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4145
// To avoid this, we use:
4146
// (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4147
// which is same as:
4148
// (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4149
4150
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4151
int esize = kDRegSize;
4152
vixl_uint128_t round_const, accum;
4153
round_const.first = 0;
4154
if (round) {
4155
round_const.second = UINT64_C(1) << (esize - 2);
4156
} else {
4157
round_const.second = 0;
4158
}
4159
4160
dst.ClearForWrite(vform);
4161
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4162
// Shift the whole value left by `esize - 1` bits.
4163
accum.first = dst.Int(vform, i) >> 1;
4164
accum.second = dst.Int(vform, i) << (esize - 1);
4165
4166
vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4167
4168
if (sub_op) {
4169
product = Neg128(product);
4170
}
4171
accum = Add128(accum, product);
4172
4173
// Perform rounding.
4174
accum = Add128(accum, round_const);
4175
4176
// Arithmetic shift the whole value right by `esize - 1` bits.
4177
accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4178
accum.first = -(accum.first >> (esize - 1));
4179
4180
// Perform saturation.
4181
bool is_pos = (accum.first == 0) ? true : false;
4182
if (is_pos &&
4183
(accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4184
accum.second = MaxIntFromFormat(vform);
4185
} else if (!is_pos && (accum.second <
4186
static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4187
accum.second = MinIntFromFormat(vform);
4188
}
4189
4190
dst.SetInt(vform, i, accum.second);
4191
}
4192
4193
return dst;
4194
}
4195
4196
LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4197
LogicVRegister dst,
4198
const LogicVRegister& src1,
4199
const LogicVRegister& src2,
4200
bool round,
4201
bool sub_op) {
4202
// 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4203
// To avoid this, we use:
4204
// (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4205
// which is same as:
4206
// (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4207
4208
if (vform == kFormatVnD) {
4209
return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4210
}
4211
4212
int esize = LaneSizeInBitsFromFormat(vform);
4213
int round_const = round ? (1 << (esize - 2)) : 0;
4214
int64_t accum;
4215
4216
dst.ClearForWrite(vform);
4217
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4218
accum = dst.Int(vform, i) << (esize - 1);
4219
if (sub_op) {
4220
accum -= src1.Int(vform, i) * src2.Int(vform, i);
4221
} else {
4222
accum += src1.Int(vform, i) * src2.Int(vform, i);
4223
}
4224
accum += round_const;
4225
accum = accum >> (esize - 1);
4226
4227
if (accum > MaxIntFromFormat(vform)) {
4228
accum = MaxIntFromFormat(vform);
4229
} else if (accum < MinIntFromFormat(vform)) {
4230
accum = MinIntFromFormat(vform);
4231
}
4232
dst.SetInt(vform, i, accum);
4233
}
4234
return dst;
4235
}
4236
4237
4238
LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4239
LogicVRegister dst,
4240
const LogicVRegister& src1,
4241
const LogicVRegister& src2,
4242
bool round) {
4243
return sqrdmlash(vform, dst, src1, src2, round, false);
4244
}
4245
4246
4247
LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4248
LogicVRegister dst,
4249
const LogicVRegister& src1,
4250
const LogicVRegister& src2,
4251
bool round) {
4252
return sqrdmlash(vform, dst, src1, src2, round, true);
4253
}
4254
4255
4256
LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4257
LogicVRegister dst,
4258
const LogicVRegister& src1,
4259
const LogicVRegister& src2) {
4260
return sqrdmulh(vform, dst, src1, src2, false);
4261
}
4262
4263
4264
LogicVRegister Simulator::addhn(VectorFormat vform,
4265
LogicVRegister dst,
4266
const LogicVRegister& src1,
4267
const LogicVRegister& src2) {
4268
SimVRegister temp;
4269
add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4270
shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4271
return dst;
4272
}
4273
4274
4275
LogicVRegister Simulator::addhn2(VectorFormat vform,
4276
LogicVRegister dst,
4277
const LogicVRegister& src1,
4278
const LogicVRegister& src2) {
4279
SimVRegister temp;
4280
add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4281
shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4282
return dst;
4283
}
4284
4285
4286
LogicVRegister Simulator::raddhn(VectorFormat vform,
4287
LogicVRegister dst,
4288
const LogicVRegister& src1,
4289
const LogicVRegister& src2) {
4290
SimVRegister temp;
4291
add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4292
rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4293
return dst;
4294
}
4295
4296
4297
LogicVRegister Simulator::raddhn2(VectorFormat vform,
4298
LogicVRegister dst,
4299
const LogicVRegister& src1,
4300
const LogicVRegister& src2) {
4301
SimVRegister temp;
4302
add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4303
rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4304
return dst;
4305
}
4306
4307
4308
LogicVRegister Simulator::subhn(VectorFormat vform,
4309
LogicVRegister dst,
4310
const LogicVRegister& src1,
4311
const LogicVRegister& src2) {
4312
SimVRegister temp;
4313
sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4314
shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4315
return dst;
4316
}
4317
4318
4319
LogicVRegister Simulator::subhn2(VectorFormat vform,
4320
LogicVRegister dst,
4321
const LogicVRegister& src1,
4322
const LogicVRegister& src2) {
4323
SimVRegister temp;
4324
sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4325
shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4326
return dst;
4327
}
4328
4329
4330
LogicVRegister Simulator::rsubhn(VectorFormat vform,
4331
LogicVRegister dst,
4332
const LogicVRegister& src1,
4333
const LogicVRegister& src2) {
4334
SimVRegister temp;
4335
sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4336
rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4337
return dst;
4338
}
4339
4340
4341
LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4342
LogicVRegister dst,
4343
const LogicVRegister& src1,
4344
const LogicVRegister& src2) {
4345
SimVRegister temp;
4346
sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4347
rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4348
return dst;
4349
}
4350
4351
4352
LogicVRegister Simulator::trn1(VectorFormat vform,
4353
LogicVRegister dst,
4354
const LogicVRegister& src1,
4355
const LogicVRegister& src2) {
4356
uint64_t result[kZRegMaxSizeInBytes] = {};
4357
int lane_count = LaneCountFromFormat(vform);
4358
int pairs = lane_count / 2;
4359
for (int i = 0; i < pairs; ++i) {
4360
result[2 * i] = src1.Uint(vform, 2 * i);
4361
result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4362
}
4363
4364
dst.ClearForWrite(vform);
4365
for (int i = 0; i < lane_count; ++i) {
4366
dst.SetUint(vform, i, result[i]);
4367
}
4368
return dst;
4369
}
4370
4371
4372
LogicVRegister Simulator::trn2(VectorFormat vform,
4373
LogicVRegister dst,
4374
const LogicVRegister& src1,
4375
const LogicVRegister& src2) {
4376
uint64_t result[kZRegMaxSizeInBytes] = {};
4377
int lane_count = LaneCountFromFormat(vform);
4378
int pairs = lane_count / 2;
4379
for (int i = 0; i < pairs; ++i) {
4380
result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4381
result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4382
}
4383
4384
dst.ClearForWrite(vform);
4385
for (int i = 0; i < lane_count; ++i) {
4386
dst.SetUint(vform, i, result[i]);
4387
}
4388
return dst;
4389
}
4390
4391
4392
LogicVRegister Simulator::zip1(VectorFormat vform,
4393
LogicVRegister dst,
4394
const LogicVRegister& src1,
4395
const LogicVRegister& src2) {
4396
uint64_t result[kZRegMaxSizeInBytes] = {};
4397
int lane_count = LaneCountFromFormat(vform);
4398
int pairs = lane_count / 2;
4399
for (int i = 0; i < pairs; ++i) {
4400
result[2 * i] = src1.Uint(vform, i);
4401
result[(2 * i) + 1] = src2.Uint(vform, i);
4402
}
4403
4404
dst.ClearForWrite(vform);
4405
for (int i = 0; i < lane_count; ++i) {
4406
dst.SetUint(vform, i, result[i]);
4407
}
4408
return dst;
4409
}
4410
4411
4412
LogicVRegister Simulator::zip2(VectorFormat vform,
4413
LogicVRegister dst,
4414
const LogicVRegister& src1,
4415
const LogicVRegister& src2) {
4416
uint64_t result[kZRegMaxSizeInBytes] = {};
4417
int lane_count = LaneCountFromFormat(vform);
4418
int pairs = lane_count / 2;
4419
for (int i = 0; i < pairs; ++i) {
4420
result[2 * i] = src1.Uint(vform, pairs + i);
4421
result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4422
}
4423
4424
dst.ClearForWrite(vform);
4425
for (int i = 0; i < lane_count; ++i) {
4426
dst.SetUint(vform, i, result[i]);
4427
}
4428
return dst;
4429
}
4430
4431
4432
LogicVRegister Simulator::uzp1(VectorFormat vform,
4433
LogicVRegister dst,
4434
const LogicVRegister& src1,
4435
const LogicVRegister& src2) {
4436
uint64_t result[kZRegMaxSizeInBytes * 2];
4437
int lane_count = LaneCountFromFormat(vform);
4438
for (int i = 0; i < lane_count; ++i) {
4439
result[i] = src1.Uint(vform, i);
4440
result[lane_count + i] = src2.Uint(vform, i);
4441
}
4442
4443
dst.ClearForWrite(vform);
4444
for (int i = 0; i < lane_count; ++i) {
4445
dst.SetUint(vform, i, result[2 * i]);
4446
}
4447
return dst;
4448
}
4449
4450
4451
LogicVRegister Simulator::uzp2(VectorFormat vform,
4452
LogicVRegister dst,
4453
const LogicVRegister& src1,
4454
const LogicVRegister& src2) {
4455
uint64_t result[kZRegMaxSizeInBytes * 2];
4456
int lane_count = LaneCountFromFormat(vform);
4457
for (int i = 0; i < lane_count; ++i) {
4458
result[i] = src1.Uint(vform, i);
4459
result[lane_count + i] = src2.Uint(vform, i);
4460
}
4461
4462
dst.ClearForWrite(vform);
4463
for (int i = 0; i < lane_count; ++i) {
4464
dst.SetUint(vform, i, result[(2 * i) + 1]);
4465
}
4466
return dst;
4467
}
4468
4469
LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4470
LogicVRegister dst,
4471
const LogicVRegister& src) {
4472
// Interleave the top and bottom half of a vector, ie. for a vector:
4473
//
4474
// [ ... | F | D | B | ... | E | C | A ]
4475
//
4476
// where B is the first element in the top half of the vector, produce a
4477
// result vector:
4478
//
4479
// [ ... | ... | F | E | D | C | B | A ]
4480
4481
uint64_t result[kZRegMaxSizeInBytes] = {};
4482
int lane_count = LaneCountFromFormat(vform);
4483
for (int i = 0; i < lane_count; i += 2) {
4484
result[i] = src.Uint(vform, i / 2);
4485
result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4486
}
4487
dst.SetUintArray(vform, result);
4488
return dst;
4489
}
4490
4491
template <typename T>
4492
T Simulator::FPNeg(T op) {
4493
return -op;
4494
}
4495
4496
template <typename T>
4497
T Simulator::FPAdd(T op1, T op2) {
4498
T result = FPProcessNaNs(op1, op2);
4499
if (IsNaN(result)) {
4500
return result;
4501
}
4502
4503
if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4504
// inf + -inf returns the default NaN.
4505
FPProcessException();
4506
return FPDefaultNaN<T>();
4507
} else {
4508
// Other cases should be handled by standard arithmetic.
4509
return op1 + op2;
4510
}
4511
}
4512
4513
4514
template <typename T>
4515
T Simulator::FPSub(T op1, T op2) {
4516
// NaNs should be handled elsewhere.
4517
VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4518
4519
if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4520
// inf - inf returns the default NaN.
4521
FPProcessException();
4522
return FPDefaultNaN<T>();
4523
} else {
4524
// Other cases should be handled by standard arithmetic.
4525
return op1 - op2;
4526
}
4527
}
4528
4529
template <typename T>
4530
T Simulator::FPMulNaNs(T op1, T op2) {
4531
T result = FPProcessNaNs(op1, op2);
4532
return IsNaN(result) ? result : FPMul(op1, op2);
4533
}
4534
4535
template <typename T>
4536
T Simulator::FPMul(T op1, T op2) {
4537
// NaNs should be handled elsewhere.
4538
VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4539
4540
if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4541
// inf * 0.0 returns the default NaN.
4542
FPProcessException();
4543
return FPDefaultNaN<T>();
4544
} else {
4545
// Other cases should be handled by standard arithmetic.
4546
return op1 * op2;
4547
}
4548
}
4549
4550
4551
template <typename T>
4552
T Simulator::FPMulx(T op1, T op2) {
4553
if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4554
// inf * 0.0 returns +/-2.0.
4555
T two = 2.0;
4556
return copysign(1.0, op1) * copysign(1.0, op2) * two;
4557
}
4558
return FPMul(op1, op2);
4559
}
4560
4561
4562
template <typename T>
4563
T Simulator::FPMulAdd(T a, T op1, T op2) {
4564
T result = FPProcessNaNs3(a, op1, op2);
4565
4566
T sign_a = copysign(1.0, a);
4567
T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4568
bool isinf_prod = IsInf(op1) || IsInf(op2);
4569
bool operation_generates_nan =
4570
(IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4571
(IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4572
(IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4573
4574
if (IsNaN(result)) {
4575
// Generated NaNs override quiet NaNs propagated from a.
4576
if (operation_generates_nan && IsQuietNaN(a)) {
4577
FPProcessException();
4578
return FPDefaultNaN<T>();
4579
} else {
4580
return result;
4581
}
4582
}
4583
4584
// If the operation would produce a NaN, return the default NaN.
4585
if (operation_generates_nan) {
4586
FPProcessException();
4587
return FPDefaultNaN<T>();
4588
}
4589
4590
// Work around broken fma implementations for exact zero results: The sign of
4591
// exact 0.0 results is positive unless both a and op1 * op2 are negative.
4592
if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4593
return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4594
}
4595
4596
result = FusedMultiplyAdd(op1, op2, a);
4597
VIXL_ASSERT(!IsNaN(result));
4598
4599
// Work around broken fma implementations for rounded zero results: If a is
4600
// 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4601
if ((a == 0.0) && (result == 0.0)) {
4602
return copysign(0.0, sign_prod);
4603
}
4604
4605
return result;
4606
}
4607
4608
4609
template <typename T>
4610
T Simulator::FPDiv(T op1, T op2) {
4611
// NaNs should be handled elsewhere.
4612
VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4613
4614
if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4615
// inf / inf and 0.0 / 0.0 return the default NaN.
4616
FPProcessException();
4617
return FPDefaultNaN<T>();
4618
} else {
4619
if (op2 == 0.0) {
4620
FPProcessException();
4621
if (!IsNaN(op1)) {
4622
double op1_sign = copysign(1.0, op1);
4623
double op2_sign = copysign(1.0, op2);
4624
return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4625
}
4626
}
4627
4628
// Other cases should be handled by standard arithmetic.
4629
return op1 / op2;
4630
}
4631
}
4632
4633
4634
template <typename T>
4635
T Simulator::FPSqrt(T op) {
4636
if (IsNaN(op)) {
4637
return FPProcessNaN(op);
4638
} else if (op < T(0.0)) {
4639
FPProcessException();
4640
return FPDefaultNaN<T>();
4641
} else {
4642
return sqrt(op);
4643
}
4644
}
4645
4646
4647
template <typename T>
4648
T Simulator::FPMax(T a, T b) {
4649
T result = FPProcessNaNs(a, b);
4650
if (IsNaN(result)) return result;
4651
4652
if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4653
// a and b are zero, and the sign differs: return +0.0.
4654
return 0.0;
4655
} else {
4656
return (a > b) ? a : b;
4657
}
4658
}
4659
4660
4661
template <typename T>
4662
T Simulator::FPMaxNM(T a, T b) {
4663
if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4664
a = kFP64NegativeInfinity;
4665
} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4666
b = kFP64NegativeInfinity;
4667
}
4668
4669
T result = FPProcessNaNs(a, b);
4670
return IsNaN(result) ? result : FPMax(a, b);
4671
}
4672
4673
4674
template <typename T>
4675
T Simulator::FPMin(T a, T b) {
4676
T result = FPProcessNaNs(a, b);
4677
if (IsNaN(result)) return result;
4678
4679
if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4680
// a and b are zero, and the sign differs: return -0.0.
4681
return -0.0;
4682
} else {
4683
return (a < b) ? a : b;
4684
}
4685
}
4686
4687
4688
template <typename T>
4689
T Simulator::FPMinNM(T a, T b) {
4690
if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4691
a = kFP64PositiveInfinity;
4692
} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4693
b = kFP64PositiveInfinity;
4694
}
4695
4696
T result = FPProcessNaNs(a, b);
4697
return IsNaN(result) ? result : FPMin(a, b);
4698
}
4699
4700
4701
template <typename T>
4702
T Simulator::FPRecipStepFused(T op1, T op2) {
4703
const T two = 2.0;
4704
if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4705
return two;
4706
} else if (IsInf(op1) || IsInf(op2)) {
4707
// Return +inf if signs match, otherwise -inf.
4708
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4709
: kFP64NegativeInfinity;
4710
} else {
4711
return FusedMultiplyAdd(op1, op2, two);
4712
}
4713
}
4714
4715
template <typename T>
4716
bool IsNormal(T value) {
4717
return std::isnormal(value);
4718
}
4719
4720
template <>
4721
bool IsNormal(SimFloat16 value) {
4722
uint16_t rawbits = Float16ToRawbits(value);
4723
uint16_t exp_mask = 0x7c00;
4724
// Check that the exponent is neither all zeroes or all ones.
4725
return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4726
}
4727
4728
4729
template <typename T>
4730
T Simulator::FPRSqrtStepFused(T op1, T op2) {
4731
const T one_point_five = 1.5;
4732
const T two = 2.0;
4733
4734
if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4735
return one_point_five;
4736
} else if (IsInf(op1) || IsInf(op2)) {
4737
// Return +inf if signs match, otherwise -inf.
4738
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4739
: kFP64NegativeInfinity;
4740
} else {
4741
// The multiply-add-halve operation must be fully fused, so avoid interim
4742
// rounding by checking which operand can be losslessly divided by two
4743
// before doing the multiply-add.
4744
if (IsNormal(op1 / two)) {
4745
return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4746
} else if (IsNormal(op2 / two)) {
4747
return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4748
} else {
4749
// Neither operand is normal after halving: the result is dominated by
4750
// the addition term, so just return that.
4751
return one_point_five;
4752
}
4753
}
4754
}
4755
4756
int32_t Simulator::FPToFixedJS(double value) {
4757
// The Z-flag is set when the conversion from double precision floating-point
4758
// to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4759
// outside the bounds of a 32-bit integer, or isn't an exact integer then the
4760
// Z-flag is unset.
4761
int Z = 1;
4762
int32_t result;
4763
4764
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4765
(value == kFP64NegativeInfinity)) {
4766
// +/- zero and infinity all return zero, however -0 and +/- Infinity also
4767
// unset the Z-flag.
4768
result = 0.0;
4769
if ((value != 0.0) || std::signbit(value)) {
4770
Z = 0;
4771
}
4772
} else if (std::isnan(value)) {
4773
// NaN values unset the Z-flag and set the result to 0.
4774
FPProcessNaN(value);
4775
result = 0;
4776
Z = 0;
4777
} else {
4778
// All other values are converted to an integer representation, rounded
4779
// toward zero.
4780
double int_result = std::floor(value);
4781
double error = value - int_result;
4782
4783
if ((error != 0.0) && (int_result < 0.0)) {
4784
int_result++;
4785
}
4786
4787
// Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4788
// write a one-liner with std::round, but the behaviour on ties is incorrect
4789
// for our purposes.
4790
double mod_const = static_cast<double>(UINT64_C(1) << 32);
4791
double mod_error =
4792
(int_result / mod_const) - std::floor(int_result / mod_const);
4793
double constrained;
4794
if (mod_error == 0.5) {
4795
constrained = INT32_MIN;
4796
} else {
4797
constrained = int_result - mod_const * round(int_result / mod_const);
4798
}
4799
4800
VIXL_ASSERT(std::floor(constrained) == constrained);
4801
VIXL_ASSERT(constrained >= INT32_MIN);
4802
VIXL_ASSERT(constrained <= INT32_MAX);
4803
4804
// Take the bottom 32 bits of the result as a 32-bit integer.
4805
result = static_cast<int32_t>(constrained);
4806
4807
if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4808
(error != 0.0)) {
4809
// If the integer result is out of range or the conversion isn't exact,
4810
// take exception and unset the Z-flag.
4811
FPProcessException();
4812
Z = 0;
4813
}
4814
}
4815
4816
ReadNzcv().SetN(0);
4817
ReadNzcv().SetZ(Z);
4818
ReadNzcv().SetC(0);
4819
ReadNzcv().SetV(0);
4820
4821
return result;
4822
}
4823
4824
double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4825
VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4826
(value != kFP64NegativeInfinity));
4827
VIXL_ASSERT(!IsNaN(value));
4828
4829
double int_result = std::floor(value);
4830
double error = value - int_result;
4831
switch (round_mode) {
4832
case FPTieAway: {
4833
// Take care of correctly handling the range ]-0.5, -0.0], which must
4834
// yield -0.0.
4835
if ((-0.5 < value) && (value < 0.0)) {
4836
int_result = -0.0;
4837
4838
} else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4839
// If the error is greater than 0.5, or is equal to 0.5 and the integer
4840
// result is positive, round up.
4841
int_result++;
4842
}
4843
break;
4844
}
4845
case FPTieEven: {
4846
// Take care of correctly handling the range [-0.5, -0.0], which must
4847
// yield -0.0.
4848
if ((-0.5 <= value) && (value < 0.0)) {
4849
int_result = -0.0;
4850
4851
// If the error is greater than 0.5, or is equal to 0.5 and the integer
4852
// result is odd, round up.
4853
} else if ((error > 0.5) ||
4854
((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4855
int_result++;
4856
}
4857
break;
4858
}
4859
case FPZero: {
4860
// If value>0 then we take floor(value)
4861
// otherwise, ceil(value).
4862
if (value < 0) {
4863
int_result = ceil(value);
4864
}
4865
break;
4866
}
4867
case FPNegativeInfinity: {
4868
// We always use floor(value).
4869
break;
4870
}
4871
case FPPositiveInfinity: {
4872
// Take care of correctly handling the range ]-1.0, -0.0], which must
4873
// yield -0.0.
4874
if ((-1.0 < value) && (value < 0.0)) {
4875
int_result = -0.0;
4876
4877
// If the error is non-zero, round up.
4878
} else if (error > 0.0) {
4879
int_result++;
4880
}
4881
break;
4882
}
4883
default:
4884
VIXL_UNIMPLEMENTED();
4885
}
4886
return int_result;
4887
}
4888
4889
double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4890
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4891
(value == kFP64NegativeInfinity)) {
4892
return value;
4893
} else if (IsNaN(value)) {
4894
return FPProcessNaN(value);
4895
}
4896
return FPRoundIntCommon(value, round_mode);
4897
}
4898
4899
double Simulator::FPRoundInt(double value,
4900
FPRounding round_mode,
4901
FrintMode frint_mode) {
4902
if (frint_mode == kFrintToInteger) {
4903
return FPRoundInt(value, round_mode);
4904
}
4905
4906
VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4907
4908
if (value == 0.0) {
4909
return value;
4910
}
4911
4912
if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4913
IsNaN(value)) {
4914
if (frint_mode == kFrintToInt32) {
4915
return INT32_MIN;
4916
} else {
4917
return INT64_MIN;
4918
}
4919
}
4920
4921
double result = FPRoundIntCommon(value, round_mode);
4922
4923
// We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4924
// representable as a double, and is rounded to (INT64_MAX + 1) when
4925
// converted. To avoid this, we compare `result >= int64_max_plus_one`
4926
// instead; this is safe because `result` is known to be integral, and
4927
// `int64_max_plus_one` is exactly representable as a double.
4928
constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4929
VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4930
int64_max_plus_one)) == int64_max_plus_one);
4931
4932
if (frint_mode == kFrintToInt32) {
4933
if ((result > INT32_MAX) || (result < INT32_MIN)) {
4934
return INT32_MIN;
4935
}
4936
} else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4937
return INT64_MIN;
4938
}
4939
4940
return result;
4941
}
4942
4943
int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4944
value = FPRoundInt(value, rmode);
4945
if (value >= kHMaxInt) {
4946
return kHMaxInt;
4947
} else if (value < kHMinInt) {
4948
return kHMinInt;
4949
}
4950
return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4951
}
4952
4953
4954
int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4955
value = FPRoundInt(value, rmode);
4956
if (value >= kWMaxInt) {
4957
return kWMaxInt;
4958
} else if (value < kWMinInt) {
4959
return kWMinInt;
4960
}
4961
return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4962
}
4963
4964
4965
int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4966
value = FPRoundInt(value, rmode);
4967
// This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4968
// as a result of kMaxInt not being representable as a double.
4969
if (value >= 9223372036854775808.) {
4970
return kXMaxInt;
4971
} else if (value < kXMinInt) {
4972
return kXMinInt;
4973
}
4974
return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4975
}
4976
4977
4978
uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4979
value = FPRoundInt(value, rmode);
4980
if (value >= kHMaxUInt) {
4981
return kHMaxUInt;
4982
} else if (value < 0.0) {
4983
return 0;
4984
}
4985
return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4986
}
4987
4988
4989
uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4990
value = FPRoundInt(value, rmode);
4991
if (value >= kWMaxUInt) {
4992
return kWMaxUInt;
4993
} else if (value < 0.0) {
4994
return 0;
4995
}
4996
return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4997
}
4998
4999
5000
uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
5001
value = FPRoundInt(value, rmode);
5002
// This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
5003
// as a result of kMaxUInt not being representable as a double.
5004
if (value >= 18446744073709551616.) {
5005
return kXMaxUInt;
5006
} else if (value < 0.0) {
5007
return 0;
5008
}
5009
return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
5010
}
5011
5012
5013
#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
5014
template <typename T> \
5015
LogicVRegister Simulator::FN(VectorFormat vform, \
5016
LogicVRegister dst, \
5017
const LogicVRegister& src1, \
5018
const LogicVRegister& src2) { \
5019
dst.ClearForWrite(vform); \
5020
for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
5021
T op1 = src1.Float<T>(i); \
5022
T op2 = src2.Float<T>(i); \
5023
T result; \
5024
if (PROCNAN) { \
5025
result = FPProcessNaNs(op1, op2); \
5026
if (!IsNaN(result)) { \
5027
result = OP(op1, op2); \
5028
} \
5029
} else { \
5030
result = OP(op1, op2); \
5031
} \
5032
dst.SetFloat(vform, i, result); \
5033
} \
5034
return dst; \
5035
} \
5036
\
5037
LogicVRegister Simulator::FN(VectorFormat vform, \
5038
LogicVRegister dst, \
5039
const LogicVRegister& src1, \
5040
const LogicVRegister& src2) { \
5041
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5042
FN<SimFloat16>(vform, dst, src1, src2); \
5043
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5044
FN<float>(vform, dst, src1, src2); \
5045
} else { \
5046
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5047
FN<double>(vform, dst, src1, src2); \
5048
} \
5049
return dst; \
5050
}
5051
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5052
#undef DEFINE_NEON_FP_VECTOR_OP
5053
5054
5055
LogicVRegister Simulator::fnmul(VectorFormat vform,
5056
LogicVRegister dst,
5057
const LogicVRegister& src1,
5058
const LogicVRegister& src2) {
5059
SimVRegister temp;
5060
LogicVRegister product = fmul(vform, temp, src1, src2);
5061
return fneg(vform, dst, product);
5062
}
5063
5064
5065
template <typename T>
5066
LogicVRegister Simulator::frecps(VectorFormat vform,
5067
LogicVRegister dst,
5068
const LogicVRegister& src1,
5069
const LogicVRegister& src2) {
5070
dst.ClearForWrite(vform);
5071
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5072
T op1 = -src1.Float<T>(i);
5073
T op2 = src2.Float<T>(i);
5074
T result = FPProcessNaNs(op1, op2);
5075
dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5076
}
5077
return dst;
5078
}
5079
5080
5081
LogicVRegister Simulator::frecps(VectorFormat vform,
5082
LogicVRegister dst,
5083
const LogicVRegister& src1,
5084
const LogicVRegister& src2) {
5085
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5086
frecps<SimFloat16>(vform, dst, src1, src2);
5087
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5088
frecps<float>(vform, dst, src1, src2);
5089
} else {
5090
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5091
frecps<double>(vform, dst, src1, src2);
5092
}
5093
return dst;
5094
}
5095
5096
5097
template <typename T>
5098
LogicVRegister Simulator::frsqrts(VectorFormat vform,
5099
LogicVRegister dst,
5100
const LogicVRegister& src1,
5101
const LogicVRegister& src2) {
5102
dst.ClearForWrite(vform);
5103
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5104
T op1 = -src1.Float<T>(i);
5105
T op2 = src2.Float<T>(i);
5106
T result = FPProcessNaNs(op1, op2);
5107
dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5108
}
5109
return dst;
5110
}
5111
5112
5113
LogicVRegister Simulator::frsqrts(VectorFormat vform,
5114
LogicVRegister dst,
5115
const LogicVRegister& src1,
5116
const LogicVRegister& src2) {
5117
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5118
frsqrts<SimFloat16>(vform, dst, src1, src2);
5119
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5120
frsqrts<float>(vform, dst, src1, src2);
5121
} else {
5122
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5123
frsqrts<double>(vform, dst, src1, src2);
5124
}
5125
return dst;
5126
}
5127
5128
5129
template <typename T>
5130
LogicVRegister Simulator::fcmp(VectorFormat vform,
5131
LogicVRegister dst,
5132
const LogicVRegister& src1,
5133
const LogicVRegister& src2,
5134
Condition cond) {
5135
dst.ClearForWrite(vform);
5136
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5137
bool result = false;
5138
T op1 = src1.Float<T>(i);
5139
T op2 = src2.Float<T>(i);
5140
bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5141
5142
switch (cond) {
5143
case eq:
5144
result = (op1 == op2);
5145
break;
5146
case ge:
5147
result = (op1 >= op2);
5148
break;
5149
case gt:
5150
result = (op1 > op2);
5151
break;
5152
case le:
5153
result = (op1 <= op2);
5154
break;
5155
case lt:
5156
result = (op1 < op2);
5157
break;
5158
case ne:
5159
result = (op1 != op2);
5160
break;
5161
case uo:
5162
result = unordered;
5163
break;
5164
default:
5165
// Other conditions are defined in terms of those above.
5166
VIXL_UNREACHABLE();
5167
break;
5168
}
5169
5170
if (result && unordered) {
5171
// Only `uo` and `ne` can be true for unordered comparisons.
5172
VIXL_ASSERT((cond == uo) || (cond == ne));
5173
}
5174
5175
dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5176
}
5177
return dst;
5178
}
5179
5180
5181
LogicVRegister Simulator::fcmp(VectorFormat vform,
5182
LogicVRegister dst,
5183
const LogicVRegister& src1,
5184
const LogicVRegister& src2,
5185
Condition cond) {
5186
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5187
fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5188
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5189
fcmp<float>(vform, dst, src1, src2, cond);
5190
} else {
5191
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5192
fcmp<double>(vform, dst, src1, src2, cond);
5193
}
5194
return dst;
5195
}
5196
5197
5198
LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5199
LogicVRegister dst,
5200
const LogicVRegister& src,
5201
Condition cond) {
5202
SimVRegister temp;
5203
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5204
LogicVRegister zero_reg =
5205
dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5206
fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5207
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5208
LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5209
fcmp<float>(vform, dst, src, zero_reg, cond);
5210
} else {
5211
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5212
LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5213
fcmp<double>(vform, dst, src, zero_reg, cond);
5214
}
5215
return dst;
5216
}
5217
5218
5219
LogicVRegister Simulator::fabscmp(VectorFormat vform,
5220
LogicVRegister dst,
5221
const LogicVRegister& src1,
5222
const LogicVRegister& src2,
5223
Condition cond) {
5224
SimVRegister temp1, temp2;
5225
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5226
LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5227
LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5228
fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5229
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5230
LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5231
LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5232
fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5233
} else {
5234
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5235
LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5236
LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5237
fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5238
}
5239
return dst;
5240
}
5241
5242
5243
template <typename T>
5244
LogicVRegister Simulator::fmla(VectorFormat vform,
5245
LogicVRegister dst,
5246
const LogicVRegister& srca,
5247
const LogicVRegister& src1,
5248
const LogicVRegister& src2) {
5249
dst.ClearForWrite(vform);
5250
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5251
T op1 = src1.Float<T>(i);
5252
T op2 = src2.Float<T>(i);
5253
T acc = srca.Float<T>(i);
5254
T result = FPMulAdd(acc, op1, op2);
5255
dst.SetFloat(vform, i, result);
5256
}
5257
return dst;
5258
}
5259
5260
5261
LogicVRegister Simulator::fmla(VectorFormat vform,
5262
LogicVRegister dst,
5263
const LogicVRegister& srca,
5264
const LogicVRegister& src1,
5265
const LogicVRegister& src2) {
5266
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5267
fmla<SimFloat16>(vform, dst, srca, src1, src2);
5268
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5269
fmla<float>(vform, dst, srca, src1, src2);
5270
} else {
5271
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5272
fmla<double>(vform, dst, srca, src1, src2);
5273
}
5274
return dst;
5275
}
5276
5277
5278
template <typename T>
5279
LogicVRegister Simulator::fmls(VectorFormat vform,
5280
LogicVRegister dst,
5281
const LogicVRegister& srca,
5282
const LogicVRegister& src1,
5283
const LogicVRegister& src2) {
5284
dst.ClearForWrite(vform);
5285
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5286
T op1 = -src1.Float<T>(i);
5287
T op2 = src2.Float<T>(i);
5288
T acc = srca.Float<T>(i);
5289
T result = FPMulAdd(acc, op1, op2);
5290
dst.SetFloat(i, result);
5291
}
5292
return dst;
5293
}
5294
5295
5296
LogicVRegister Simulator::fmls(VectorFormat vform,
5297
LogicVRegister dst,
5298
const LogicVRegister& srca,
5299
const LogicVRegister& src1,
5300
const LogicVRegister& src2) {
5301
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5302
fmls<SimFloat16>(vform, dst, srca, src1, src2);
5303
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5304
fmls<float>(vform, dst, srca, src1, src2);
5305
} else {
5306
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5307
fmls<double>(vform, dst, srca, src1, src2);
5308
}
5309
return dst;
5310
}
5311
5312
5313
LogicVRegister Simulator::fmlal(VectorFormat vform,
5314
LogicVRegister dst,
5315
const LogicVRegister& src1,
5316
const LogicVRegister& src2) {
5317
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5318
dst.ClearForWrite(vform);
5319
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5320
float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5321
float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5322
float acc = dst.Float<float>(i);
5323
float result = FPMulAdd(acc, op1, op2);
5324
dst.SetFloat(i, result);
5325
}
5326
return dst;
5327
}
5328
5329
5330
LogicVRegister Simulator::fmlal2(VectorFormat vform,
5331
LogicVRegister dst,
5332
const LogicVRegister& src1,
5333
const LogicVRegister& src2) {
5334
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5335
dst.ClearForWrite(vform);
5336
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5337
int src = i + LaneCountFromFormat(vform);
5338
float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5339
float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5340
float acc = dst.Float<float>(i);
5341
float result = FPMulAdd(acc, op1, op2);
5342
dst.SetFloat(i, result);
5343
}
5344
return dst;
5345
}
5346
5347
5348
LogicVRegister Simulator::fmlsl(VectorFormat vform,
5349
LogicVRegister dst,
5350
const LogicVRegister& src1,
5351
const LogicVRegister& src2) {
5352
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5353
dst.ClearForWrite(vform);
5354
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5355
float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5356
float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5357
float acc = dst.Float<float>(i);
5358
float result = FPMulAdd(acc, op1, op2);
5359
dst.SetFloat(i, result);
5360
}
5361
return dst;
5362
}
5363
5364
5365
LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5366
LogicVRegister dst,
5367
const LogicVRegister& src1,
5368
const LogicVRegister& src2) {
5369
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370
dst.ClearForWrite(vform);
5371
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5372
int src = i + LaneCountFromFormat(vform);
5373
float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5374
float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5375
float acc = dst.Float<float>(i);
5376
float result = FPMulAdd(acc, op1, op2);
5377
dst.SetFloat(i, result);
5378
}
5379
return dst;
5380
}
5381
5382
5383
LogicVRegister Simulator::fmlal(VectorFormat vform,
5384
LogicVRegister dst,
5385
const LogicVRegister& src1,
5386
const LogicVRegister& src2,
5387
int index) {
5388
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5389
dst.ClearForWrite(vform);
5390
float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5391
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5392
float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5393
float acc = dst.Float<float>(i);
5394
float result = FPMulAdd(acc, op1, op2);
5395
dst.SetFloat(i, result);
5396
}
5397
return dst;
5398
}
5399
5400
5401
LogicVRegister Simulator::fmlal2(VectorFormat vform,
5402
LogicVRegister dst,
5403
const LogicVRegister& src1,
5404
const LogicVRegister& src2,
5405
int index) {
5406
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407
dst.ClearForWrite(vform);
5408
float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410
int src = i + LaneCountFromFormat(vform);
5411
float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5412
float acc = dst.Float<float>(i);
5413
float result = FPMulAdd(acc, op1, op2);
5414
dst.SetFloat(i, result);
5415
}
5416
return dst;
5417
}
5418
5419
5420
LogicVRegister Simulator::fmlsl(VectorFormat vform,
5421
LogicVRegister dst,
5422
const LogicVRegister& src1,
5423
const LogicVRegister& src2,
5424
int index) {
5425
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5426
dst.ClearForWrite(vform);
5427
float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5428
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5429
float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5430
float acc = dst.Float<float>(i);
5431
float result = FPMulAdd(acc, op1, op2);
5432
dst.SetFloat(i, result);
5433
}
5434
return dst;
5435
}
5436
5437
5438
LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5439
LogicVRegister dst,
5440
const LogicVRegister& src1,
5441
const LogicVRegister& src2,
5442
int index) {
5443
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5444
dst.ClearForWrite(vform);
5445
float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5446
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5447
int src = i + LaneCountFromFormat(vform);
5448
float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5449
float acc = dst.Float<float>(i);
5450
float result = FPMulAdd(acc, op1, op2);
5451
dst.SetFloat(i, result);
5452
}
5453
return dst;
5454
}
5455
5456
5457
template <typename T>
5458
LogicVRegister Simulator::fneg(VectorFormat vform,
5459
LogicVRegister dst,
5460
const LogicVRegister& src) {
5461
dst.ClearForWrite(vform);
5462
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5463
T op = src.Float<T>(i);
5464
op = -op;
5465
dst.SetFloat(i, op);
5466
}
5467
return dst;
5468
}
5469
5470
5471
LogicVRegister Simulator::fneg(VectorFormat vform,
5472
LogicVRegister dst,
5473
const LogicVRegister& src) {
5474
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5475
fneg<SimFloat16>(vform, dst, src);
5476
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5477
fneg<float>(vform, dst, src);
5478
} else {
5479
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5480
fneg<double>(vform, dst, src);
5481
}
5482
return dst;
5483
}
5484
5485
5486
template <typename T>
5487
LogicVRegister Simulator::fabs_(VectorFormat vform,
5488
LogicVRegister dst,
5489
const LogicVRegister& src) {
5490
dst.ClearForWrite(vform);
5491
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5492
T op = src.Float<T>(i);
5493
if (copysign(1.0, op) < 0.0) {
5494
op = -op;
5495
}
5496
dst.SetFloat(i, op);
5497
}
5498
return dst;
5499
}
5500
5501
5502
LogicVRegister Simulator::fabs_(VectorFormat vform,
5503
LogicVRegister dst,
5504
const LogicVRegister& src) {
5505
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5506
fabs_<SimFloat16>(vform, dst, src);
5507
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5508
fabs_<float>(vform, dst, src);
5509
} else {
5510
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5511
fabs_<double>(vform, dst, src);
5512
}
5513
return dst;
5514
}
5515
5516
5517
LogicVRegister Simulator::fabd(VectorFormat vform,
5518
LogicVRegister dst,
5519
const LogicVRegister& src1,
5520
const LogicVRegister& src2) {
5521
SimVRegister temp;
5522
fsub(vform, temp, src1, src2);
5523
fabs_(vform, dst, temp);
5524
return dst;
5525
}
5526
5527
5528
LogicVRegister Simulator::fsqrt(VectorFormat vform,
5529
LogicVRegister dst,
5530
const LogicVRegister& src) {
5531
dst.ClearForWrite(vform);
5532
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5533
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5534
SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5535
dst.SetFloat(i, result);
5536
}
5537
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5538
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5539
float result = FPSqrt(src.Float<float>(i));
5540
dst.SetFloat(i, result);
5541
}
5542
} else {
5543
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5544
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5545
double result = FPSqrt(src.Float<double>(i));
5546
dst.SetFloat(i, result);
5547
}
5548
}
5549
return dst;
5550
}
5551
5552
5553
#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5554
LogicVRegister Simulator::FNP(VectorFormat vform, \
5555
LogicVRegister dst, \
5556
const LogicVRegister& src1, \
5557
const LogicVRegister& src2) { \
5558
SimVRegister temp1, temp2; \
5559
uzp1(vform, temp1, src1, src2); \
5560
uzp2(vform, temp2, src1, src2); \
5561
FN(vform, dst, temp1, temp2); \
5562
if (IsSVEFormat(vform)) { \
5563
interleave_top_bottom(vform, dst, dst); \
5564
} \
5565
return dst; \
5566
} \
5567
\
5568
LogicVRegister Simulator::FNP(VectorFormat vform, \
5569
LogicVRegister dst, \
5570
const LogicVRegister& src) { \
5571
if (vform == kFormatH) { \
5572
SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5573
SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5574
dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5575
} else if (vform == kFormatS) { \
5576
float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5577
dst.SetFloat(0, result); \
5578
} else { \
5579
VIXL_ASSERT(vform == kFormatD); \
5580
double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5581
dst.SetFloat(0, result); \
5582
} \
5583
dst.ClearForWrite(vform); \
5584
return dst; \
5585
}
5586
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5587
#undef DEFINE_NEON_FP_PAIR_OP
5588
5589
template <typename T>
5590
LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5591
LogicVRegister dst,
5592
const LogicVRegister& src,
5593
typename TFPPairOp<T>::type fn,
5594
uint64_t inactive_value) {
5595
int lane_count = LaneCountFromFormat(vform);
5596
T result[kZRegMaxSizeInBytes / sizeof(T)];
5597
// Copy the source vector into a working array. Initialise the unused elements
5598
// at the end of the array to the same value that a false predicate would set.
5599
for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5600
result[i] = (i < lane_count)
5601
? src.Float<T>(i)
5602
: RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5603
}
5604
5605
// Pairwise reduce the elements to a single value, using the pair op function
5606
// argument.
5607
for (int step = 1; step < lane_count; step *= 2) {
5608
for (int i = 0; i < lane_count; i += step * 2) {
5609
result[i] = (this->*fn)(result[i], result[i + step]);
5610
}
5611
}
5612
dst.ClearForWrite(ScalarFormatFromFormat(vform));
5613
dst.SetFloat<T>(0, result[0]);
5614
return dst;
5615
}
5616
5617
LogicVRegister Simulator::FPPairedAcrossHelper(
5618
VectorFormat vform,
5619
LogicVRegister dst,
5620
const LogicVRegister& src,
5621
typename TFPPairOp<SimFloat16>::type fn16,
5622
typename TFPPairOp<float>::type fn32,
5623
typename TFPPairOp<double>::type fn64,
5624
uint64_t inactive_value) {
5625
switch (LaneSizeInBitsFromFormat(vform)) {
5626
case kHRegSize:
5627
return FPPairedAcrossHelper<SimFloat16>(vform,
5628
dst,
5629
src,
5630
fn16,
5631
inactive_value);
5632
case kSRegSize:
5633
return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5634
default:
5635
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5636
return FPPairedAcrossHelper<double>(vform,
5637
dst,
5638
src,
5639
fn64,
5640
inactive_value);
5641
}
5642
}
5643
5644
LogicVRegister Simulator::faddv(VectorFormat vform,
5645
LogicVRegister dst,
5646
const LogicVRegister& src) {
5647
return FPPairedAcrossHelper(vform,
5648
dst,
5649
src,
5650
&Simulator::FPAdd<SimFloat16>,
5651
&Simulator::FPAdd<float>,
5652
&Simulator::FPAdd<double>,
5653
0);
5654
}
5655
5656
LogicVRegister Simulator::fmaxv(VectorFormat vform,
5657
LogicVRegister dst,
5658
const LogicVRegister& src) {
5659
int lane_size = LaneSizeInBitsFromFormat(vform);
5660
uint64_t inactive_value =
5661
FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5662
return FPPairedAcrossHelper(vform,
5663
dst,
5664
src,
5665
&Simulator::FPMax<SimFloat16>,
5666
&Simulator::FPMax<float>,
5667
&Simulator::FPMax<double>,
5668
inactive_value);
5669
}
5670
5671
5672
LogicVRegister Simulator::fminv(VectorFormat vform,
5673
LogicVRegister dst,
5674
const LogicVRegister& src) {
5675
int lane_size = LaneSizeInBitsFromFormat(vform);
5676
uint64_t inactive_value =
5677
FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5678
return FPPairedAcrossHelper(vform,
5679
dst,
5680
src,
5681
&Simulator::FPMin<SimFloat16>,
5682
&Simulator::FPMin<float>,
5683
&Simulator::FPMin<double>,
5684
inactive_value);
5685
}
5686
5687
5688
LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5689
LogicVRegister dst,
5690
const LogicVRegister& src) {
5691
int lane_size = LaneSizeInBitsFromFormat(vform);
5692
uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5693
return FPPairedAcrossHelper(vform,
5694
dst,
5695
src,
5696
&Simulator::FPMaxNM<SimFloat16>,
5697
&Simulator::FPMaxNM<float>,
5698
&Simulator::FPMaxNM<double>,
5699
inactive_value);
5700
}
5701
5702
5703
LogicVRegister Simulator::fminnmv(VectorFormat vform,
5704
LogicVRegister dst,
5705
const LogicVRegister& src) {
5706
int lane_size = LaneSizeInBitsFromFormat(vform);
5707
uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5708
return FPPairedAcrossHelper(vform,
5709
dst,
5710
src,
5711
&Simulator::FPMinNM<SimFloat16>,
5712
&Simulator::FPMinNM<float>,
5713
&Simulator::FPMinNM<double>,
5714
inactive_value);
5715
}
5716
5717
5718
LogicVRegister Simulator::fmul(VectorFormat vform,
5719
LogicVRegister dst,
5720
const LogicVRegister& src1,
5721
const LogicVRegister& src2,
5722
int index) {
5723
dst.ClearForWrite(vform);
5724
SimVRegister temp;
5725
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5726
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5727
fmul<SimFloat16>(vform, dst, src1, index_reg);
5728
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5729
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5730
fmul<float>(vform, dst, src1, index_reg);
5731
} else {
5732
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5733
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5734
fmul<double>(vform, dst, src1, index_reg);
5735
}
5736
return dst;
5737
}
5738
5739
5740
LogicVRegister Simulator::fmla(VectorFormat vform,
5741
LogicVRegister dst,
5742
const LogicVRegister& src1,
5743
const LogicVRegister& src2,
5744
int index) {
5745
dst.ClearForWrite(vform);
5746
SimVRegister temp;
5747
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5748
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5749
fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5750
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5751
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5752
fmla<float>(vform, dst, dst, src1, index_reg);
5753
} else {
5754
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5755
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5756
fmla<double>(vform, dst, dst, src1, index_reg);
5757
}
5758
return dst;
5759
}
5760
5761
5762
LogicVRegister Simulator::fmls(VectorFormat vform,
5763
LogicVRegister dst,
5764
const LogicVRegister& src1,
5765
const LogicVRegister& src2,
5766
int index) {
5767
dst.ClearForWrite(vform);
5768
SimVRegister temp;
5769
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5770
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5771
fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5772
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5773
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5774
fmls<float>(vform, dst, dst, src1, index_reg);
5775
} else {
5776
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5777
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5778
fmls<double>(vform, dst, dst, src1, index_reg);
5779
}
5780
return dst;
5781
}
5782
5783
5784
LogicVRegister Simulator::fmulx(VectorFormat vform,
5785
LogicVRegister dst,
5786
const LogicVRegister& src1,
5787
const LogicVRegister& src2,
5788
int index) {
5789
dst.ClearForWrite(vform);
5790
SimVRegister temp;
5791
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5792
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5793
fmulx<SimFloat16>(vform, dst, src1, index_reg);
5794
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5795
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5796
fmulx<float>(vform, dst, src1, index_reg);
5797
} else {
5798
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5799
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5800
fmulx<double>(vform, dst, src1, index_reg);
5801
}
5802
return dst;
5803
}
5804
5805
5806
LogicVRegister Simulator::frint(VectorFormat vform,
5807
LogicVRegister dst,
5808
const LogicVRegister& src,
5809
FPRounding rounding_mode,
5810
bool inexact_exception,
5811
FrintMode frint_mode) {
5812
dst.ClearForWrite(vform);
5813
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5814
VIXL_ASSERT(frint_mode == kFrintToInteger);
5815
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5816
SimFloat16 input = src.Float<SimFloat16>(i);
5817
SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5818
if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5819
FPProcessException();
5820
}
5821
dst.SetFloat<SimFloat16>(i, rounded);
5822
}
5823
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5824
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5825
float input = src.Float<float>(i);
5826
float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5827
5828
if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5829
FPProcessException();
5830
}
5831
dst.SetFloat<float>(i, rounded);
5832
}
5833
} else {
5834
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5835
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5836
double input = src.Float<double>(i);
5837
double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5838
if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5839
FPProcessException();
5840
}
5841
dst.SetFloat<double>(i, rounded);
5842
}
5843
}
5844
return dst;
5845
}
5846
5847
LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5848
VectorFormat src_vform,
5849
LogicVRegister dst,
5850
const LogicPRegister& pg,
5851
const LogicVRegister& src) {
5852
unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5853
unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5854
VectorFormat vform = SVEFormatFromLaneSizeInBits(
5855
std::max(dst_data_size_in_bits, src_data_size_in_bits));
5856
5857
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5858
if (!pg.IsActive(vform, i)) continue;
5859
5860
uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5861
0,
5862
src.Uint(vform, i));
5863
double dst_value =
5864
RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5865
5866
uint64_t dst_raw_bits =
5867
FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5868
5869
dst.SetUint(vform, i, dst_raw_bits);
5870
}
5871
5872
return dst;
5873
}
5874
5875
LogicVRegister Simulator::fcvts(VectorFormat vform,
5876
unsigned dst_data_size_in_bits,
5877
unsigned src_data_size_in_bits,
5878
LogicVRegister dst,
5879
const LogicPRegister& pg,
5880
const LogicVRegister& src,
5881
FPRounding round,
5882
int fbits) {
5883
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5884
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5885
5886
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5887
if (!pg.IsActive(vform, i)) continue;
5888
5889
uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5890
0,
5891
src.Uint(vform, i));
5892
double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5893
std::pow(2.0, fbits);
5894
5895
switch (dst_data_size_in_bits) {
5896
case kHRegSize:
5897
dst.SetInt(vform, i, FPToInt16(result, round));
5898
break;
5899
case kSRegSize:
5900
dst.SetInt(vform, i, FPToInt32(result, round));
5901
break;
5902
case kDRegSize:
5903
dst.SetInt(vform, i, FPToInt64(result, round));
5904
break;
5905
default:
5906
VIXL_UNIMPLEMENTED();
5907
break;
5908
}
5909
}
5910
5911
return dst;
5912
}
5913
5914
LogicVRegister Simulator::fcvts(VectorFormat vform,
5915
LogicVRegister dst,
5916
const LogicVRegister& src,
5917
FPRounding round,
5918
int fbits) {
5919
dst.ClearForWrite(vform);
5920
return fcvts(vform,
5921
LaneSizeInBitsFromFormat(vform),
5922
LaneSizeInBitsFromFormat(vform),
5923
dst,
5924
GetPTrue(),
5925
src,
5926
round,
5927
fbits);
5928
}
5929
5930
LogicVRegister Simulator::fcvtu(VectorFormat vform,
5931
unsigned dst_data_size_in_bits,
5932
unsigned src_data_size_in_bits,
5933
LogicVRegister dst,
5934
const LogicPRegister& pg,
5935
const LogicVRegister& src,
5936
FPRounding round,
5937
int fbits) {
5938
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5939
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5940
5941
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5942
if (!pg.IsActive(vform, i)) continue;
5943
5944
uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5945
0,
5946
src.Uint(vform, i));
5947
double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5948
std::pow(2.0, fbits);
5949
5950
switch (dst_data_size_in_bits) {
5951
case kHRegSize:
5952
dst.SetUint(vform, i, FPToUInt16(result, round));
5953
break;
5954
case kSRegSize:
5955
dst.SetUint(vform, i, FPToUInt32(result, round));
5956
break;
5957
case kDRegSize:
5958
dst.SetUint(vform, i, FPToUInt64(result, round));
5959
break;
5960
default:
5961
VIXL_UNIMPLEMENTED();
5962
break;
5963
}
5964
}
5965
5966
return dst;
5967
}
5968
5969
LogicVRegister Simulator::fcvtu(VectorFormat vform,
5970
LogicVRegister dst,
5971
const LogicVRegister& src,
5972
FPRounding round,
5973
int fbits) {
5974
dst.ClearForWrite(vform);
5975
return fcvtu(vform,
5976
LaneSizeInBitsFromFormat(vform),
5977
LaneSizeInBitsFromFormat(vform),
5978
dst,
5979
GetPTrue(),
5980
src,
5981
round,
5982
fbits);
5983
}
5984
5985
LogicVRegister Simulator::fcvtl(VectorFormat vform,
5986
LogicVRegister dst,
5987
const LogicVRegister& src) {
5988
dst.ClearForWrite(vform);
5989
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5990
for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5991
// TODO: Full support for SimFloat16 in SimRegister(s).
5992
dst.SetFloat(i,
5993
FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5994
ReadDN()));
5995
}
5996
} else {
5997
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5998
for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5999
dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
6000
}
6001
}
6002
return dst;
6003
}
6004
6005
6006
LogicVRegister Simulator::fcvtl2(VectorFormat vform,
6007
LogicVRegister dst,
6008
const LogicVRegister& src) {
6009
dst.ClearForWrite(vform);
6010
int lane_count = LaneCountFromFormat(vform);
6011
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6012
for (int i = 0; i < lane_count; i++) {
6013
// TODO: Full support for SimFloat16 in SimRegister(s).
6014
dst.SetFloat(i,
6015
FPToFloat(RawbitsToFloat16(
6016
src.Float<uint16_t>(i + lane_count)),
6017
ReadDN()));
6018
}
6019
} else {
6020
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6021
for (int i = 0; i < lane_count; i++) {
6022
dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6023
}
6024
}
6025
return dst;
6026
}
6027
6028
6029
LogicVRegister Simulator::fcvtn(VectorFormat vform,
6030
LogicVRegister dst,
6031
const LogicVRegister& src) {
6032
SimVRegister tmp;
6033
LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6034
dst.ClearForWrite(vform);
6035
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6036
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6037
dst.SetFloat(i,
6038
Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6039
FPTieEven,
6040
ReadDN())));
6041
}
6042
} else {
6043
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6044
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6045
dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6046
}
6047
}
6048
return dst;
6049
}
6050
6051
6052
LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6053
LogicVRegister dst,
6054
const LogicVRegister& src) {
6055
dst.ClearForWrite(vform);
6056
int lane_count = LaneCountFromFormat(vform) / 2;
6057
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6058
for (int i = lane_count - 1; i >= 0; i--) {
6059
dst.SetFloat(i + lane_count,
6060
Float16ToRawbits(
6061
FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6062
}
6063
} else {
6064
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6065
for (int i = lane_count - 1; i >= 0; i--) {
6066
dst.SetFloat(i + lane_count,
6067
FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6068
}
6069
}
6070
return dst;
6071
}
6072
6073
6074
LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6075
LogicVRegister dst,
6076
const LogicVRegister& src) {
6077
SimVRegister tmp;
6078
LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6079
int input_lane_count = LaneCountFromFormat(vform);
6080
if (IsSVEFormat(vform)) {
6081
mov(kFormatVnB, tmp, src);
6082
input_lane_count /= 2;
6083
}
6084
6085
dst.ClearForWrite(vform);
6086
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6087
6088
for (int i = 0; i < input_lane_count; i++) {
6089
dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6090
}
6091
return dst;
6092
}
6093
6094
6095
LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6096
LogicVRegister dst,
6097
const LogicVRegister& src) {
6098
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6099
dst.ClearForWrite(vform);
6100
int lane_count = LaneCountFromFormat(vform) / 2;
6101
for (int i = lane_count - 1; i >= 0; i--) {
6102
dst.SetFloat(i + lane_count,
6103
FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6104
}
6105
return dst;
6106
}
6107
6108
6109
// Based on reference C function recip_sqrt_estimate from ARM ARM.
6110
double Simulator::recip_sqrt_estimate(double a) {
6111
int quot0, quot1, s;
6112
double r;
6113
if (a < 0.5) {
6114
quot0 = static_cast<int>(a * 512.0);
6115
r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6116
} else {
6117
quot1 = static_cast<int>(a * 256.0);
6118
r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6119
}
6120
s = static_cast<int>(256.0 * r + 0.5);
6121
return static_cast<double>(s) / 256.0;
6122
}
6123
6124
6125
static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6126
return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6127
}
6128
6129
6130
template <typename T>
6131
T Simulator::FPRecipSqrtEstimate(T op) {
6132
if (IsNaN(op)) {
6133
return FPProcessNaN(op);
6134
} else if (op == 0.0) {
6135
if (copysign(1.0, op) < 0.0) {
6136
return kFP64NegativeInfinity;
6137
} else {
6138
return kFP64PositiveInfinity;
6139
}
6140
} else if (copysign(1.0, op) < 0.0) {
6141
FPProcessException();
6142
return FPDefaultNaN<T>();
6143
} else if (IsInf(op)) {
6144
return 0.0;
6145
} else {
6146
uint64_t fraction;
6147
int exp, result_exp;
6148
6149
if (IsFloat16<T>()) {
6150
exp = Float16Exp(op);
6151
fraction = Float16Mantissa(op);
6152
fraction <<= 42;
6153
} else if (IsFloat32<T>()) {
6154
exp = FloatExp(op);
6155
fraction = FloatMantissa(op);
6156
fraction <<= 29;
6157
} else {
6158
VIXL_ASSERT(IsFloat64<T>());
6159
exp = DoubleExp(op);
6160
fraction = DoubleMantissa(op);
6161
}
6162
6163
if (exp == 0) {
6164
while (Bits(fraction, 51, 51) == 0) {
6165
fraction = Bits(fraction, 50, 0) << 1;
6166
exp -= 1;
6167
}
6168
fraction = Bits(fraction, 50, 0) << 1;
6169
}
6170
6171
double scaled;
6172
if (Bits(exp, 0, 0) == 0) {
6173
scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6174
} else {
6175
scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6176
}
6177
6178
if (IsFloat16<T>()) {
6179
result_exp = (44 - exp) / 2;
6180
} else if (IsFloat32<T>()) {
6181
result_exp = (380 - exp) / 2;
6182
} else {
6183
VIXL_ASSERT(IsFloat64<T>());
6184
result_exp = (3068 - exp) / 2;
6185
}
6186
6187
uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6188
6189
if (IsFloat16<T>()) {
6190
uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6191
uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6192
return Float16Pack(0, exp_bits, est_bits);
6193
} else if (IsFloat32<T>()) {
6194
uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6195
uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6196
return FloatPack(0, exp_bits, est_bits);
6197
} else {
6198
VIXL_ASSERT(IsFloat64<T>());
6199
return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6200
}
6201
}
6202
}
6203
6204
6205
LogicVRegister Simulator::frsqrte(VectorFormat vform,
6206
LogicVRegister dst,
6207
const LogicVRegister& src) {
6208
dst.ClearForWrite(vform);
6209
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6210
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6211
SimFloat16 input = src.Float<SimFloat16>(i);
6212
dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6213
}
6214
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6215
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6216
float input = src.Float<float>(i);
6217
dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6218
}
6219
} else {
6220
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6221
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6222
double input = src.Float<double>(i);
6223
dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6224
}
6225
}
6226
return dst;
6227
}
6228
6229
template <typename T>
6230
T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6231
uint32_t sign;
6232
6233
if (IsFloat16<T>()) {
6234
sign = Float16Sign(op);
6235
} else if (IsFloat32<T>()) {
6236
sign = FloatSign(op);
6237
} else {
6238
VIXL_ASSERT(IsFloat64<T>());
6239
sign = DoubleSign(op);
6240
}
6241
6242
if (IsNaN(op)) {
6243
return FPProcessNaN(op);
6244
} else if (IsInf(op)) {
6245
return (sign == 1) ? -0.0 : 0.0;
6246
} else if (op == 0.0) {
6247
FPProcessException(); // FPExc_DivideByZero exception.
6248
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6249
} else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6250
(IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6251
(IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6252
bool overflow_to_inf = false;
6253
switch (rounding) {
6254
case FPTieEven:
6255
overflow_to_inf = true;
6256
break;
6257
case FPPositiveInfinity:
6258
overflow_to_inf = (sign == 0);
6259
break;
6260
case FPNegativeInfinity:
6261
overflow_to_inf = (sign == 1);
6262
break;
6263
case FPZero:
6264
overflow_to_inf = false;
6265
break;
6266
default:
6267
break;
6268
}
6269
FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6270
if (overflow_to_inf) {
6271
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6272
} else {
6273
// Return FPMaxNormal(sign).
6274
if (IsFloat16<T>()) {
6275
return Float16Pack(sign, 0x1f, 0x3ff);
6276
} else if (IsFloat32<T>()) {
6277
return FloatPack(sign, 0xfe, 0x07fffff);
6278
} else {
6279
VIXL_ASSERT(IsFloat64<T>());
6280
return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6281
}
6282
}
6283
} else {
6284
uint64_t fraction;
6285
int exp, result_exp;
6286
6287
if (IsFloat16<T>()) {
6288
sign = Float16Sign(op);
6289
exp = Float16Exp(op);
6290
fraction = Float16Mantissa(op);
6291
fraction <<= 42;
6292
} else if (IsFloat32<T>()) {
6293
sign = FloatSign(op);
6294
exp = FloatExp(op);
6295
fraction = FloatMantissa(op);
6296
fraction <<= 29;
6297
} else {
6298
VIXL_ASSERT(IsFloat64<T>());
6299
sign = DoubleSign(op);
6300
exp = DoubleExp(op);
6301
fraction = DoubleMantissa(op);
6302
}
6303
6304
if (exp == 0) {
6305
if (Bits(fraction, 51, 51) == 0) {
6306
exp -= 1;
6307
fraction = Bits(fraction, 49, 0) << 2;
6308
} else {
6309
fraction = Bits(fraction, 50, 0) << 1;
6310
}
6311
}
6312
6313
double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6314
6315
if (IsFloat16<T>()) {
6316
result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6317
} else if (IsFloat32<T>()) {
6318
result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6319
} else {
6320
VIXL_ASSERT(IsFloat64<T>());
6321
result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6322
}
6323
6324
double estimate = recip_estimate(scaled);
6325
6326
fraction = DoubleMantissa(estimate);
6327
if (result_exp == 0) {
6328
fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6329
} else if (result_exp == -1) {
6330
fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6331
result_exp = 0;
6332
}
6333
if (IsFloat16<T>()) {
6334
uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6335
uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6336
return Float16Pack(sign, exp_bits, frac_bits);
6337
} else if (IsFloat32<T>()) {
6338
uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6339
uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6340
return FloatPack(sign, exp_bits, frac_bits);
6341
} else {
6342
VIXL_ASSERT(IsFloat64<T>());
6343
return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6344
}
6345
}
6346
}
6347
6348
6349
LogicVRegister Simulator::frecpe(VectorFormat vform,
6350
LogicVRegister dst,
6351
const LogicVRegister& src,
6352
FPRounding round) {
6353
dst.ClearForWrite(vform);
6354
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6355
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6356
SimFloat16 input = src.Float<SimFloat16>(i);
6357
dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6358
}
6359
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6360
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6361
float input = src.Float<float>(i);
6362
dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6363
}
6364
} else {
6365
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6366
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6367
double input = src.Float<double>(i);
6368
dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6369
}
6370
}
6371
return dst;
6372
}
6373
6374
6375
LogicVRegister Simulator::ursqrte(VectorFormat vform,
6376
LogicVRegister dst,
6377
const LogicVRegister& src) {
6378
dst.ClearForWrite(vform);
6379
uint64_t operand;
6380
uint32_t result;
6381
double dp_operand, dp_result;
6382
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6383
operand = src.Uint(vform, i);
6384
if (operand <= 0x3FFFFFFF) {
6385
result = 0xFFFFFFFF;
6386
} else {
6387
dp_operand = operand * std::pow(2.0, -32);
6388
dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6389
result = static_cast<uint32_t>(dp_result);
6390
}
6391
dst.SetUint(vform, i, result);
6392
}
6393
return dst;
6394
}
6395
6396
6397
// Based on reference C function recip_estimate from ARM ARM.
6398
double Simulator::recip_estimate(double a) {
6399
int q, s;
6400
double r;
6401
q = static_cast<int>(a * 512.0);
6402
r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6403
s = static_cast<int>(256.0 * r + 0.5);
6404
return static_cast<double>(s) / 256.0;
6405
}
6406
6407
6408
LogicVRegister Simulator::urecpe(VectorFormat vform,
6409
LogicVRegister dst,
6410
const LogicVRegister& src) {
6411
dst.ClearForWrite(vform);
6412
uint64_t operand;
6413
uint32_t result;
6414
double dp_operand, dp_result;
6415
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6416
operand = src.Uint(vform, i);
6417
if (operand <= 0x7FFFFFFF) {
6418
result = 0xFFFFFFFF;
6419
} else {
6420
dp_operand = operand * std::pow(2.0, -32);
6421
dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6422
result = static_cast<uint32_t>(dp_result);
6423
}
6424
dst.SetUint(vform, i, result);
6425
}
6426
return dst;
6427
}
6428
6429
LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6430
dst.Clear();
6431
return dst;
6432
}
6433
6434
LogicPRegister Simulator::pfirst(LogicPRegister dst,
6435
const LogicPRegister& pg,
6436
const LogicPRegister& src) {
6437
int first_pg = GetFirstActive(kFormatVnB, pg);
6438
VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6439
mov(dst, src);
6440
if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6441
return dst;
6442
}
6443
6444
LogicPRegister Simulator::ptrue(VectorFormat vform,
6445
LogicPRegister dst,
6446
int pattern) {
6447
int count = GetPredicateConstraintLaneCount(vform, pattern);
6448
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6449
dst.SetActive(vform, i, i < count);
6450
}
6451
return dst;
6452
}
6453
6454
LogicPRegister Simulator::pnext(VectorFormat vform,
6455
LogicPRegister dst,
6456
const LogicPRegister& pg,
6457
const LogicPRegister& src) {
6458
int next = GetLastActive(vform, src) + 1;
6459
while (next < LaneCountFromFormat(vform)) {
6460
if (pg.IsActive(vform, next)) break;
6461
next++;
6462
}
6463
6464
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6465
dst.SetActive(vform, i, (i == next));
6466
}
6467
return dst;
6468
}
6469
6470
template <typename T>
6471
LogicVRegister Simulator::frecpx(VectorFormat vform,
6472
LogicVRegister dst,
6473
const LogicVRegister& src) {
6474
dst.ClearForWrite(vform);
6475
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6476
T op = src.Float<T>(i);
6477
T result;
6478
if (IsNaN(op)) {
6479
result = FPProcessNaN(op);
6480
} else {
6481
int exp;
6482
uint32_t sign;
6483
if (IsFloat16<T>()) {
6484
sign = Float16Sign(op);
6485
exp = Float16Exp(op);
6486
exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6487
result = Float16Pack(sign, exp, 0);
6488
} else if (IsFloat32<T>()) {
6489
sign = FloatSign(op);
6490
exp = FloatExp(op);
6491
exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6492
result = FloatPack(sign, exp, 0);
6493
} else {
6494
VIXL_ASSERT(IsFloat64<T>());
6495
sign = DoubleSign(op);
6496
exp = DoubleExp(op);
6497
exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6498
result = DoublePack(sign, exp, 0);
6499
}
6500
}
6501
dst.SetFloat(i, result);
6502
}
6503
return dst;
6504
}
6505
6506
6507
LogicVRegister Simulator::frecpx(VectorFormat vform,
6508
LogicVRegister dst,
6509
const LogicVRegister& src) {
6510
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6511
frecpx<SimFloat16>(vform, dst, src);
6512
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6513
frecpx<float>(vform, dst, src);
6514
} else {
6515
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6516
frecpx<double>(vform, dst, src);
6517
}
6518
return dst;
6519
}
6520
6521
LogicVRegister Simulator::flogb(VectorFormat vform,
6522
LogicVRegister dst,
6523
const LogicVRegister& src) {
6524
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6525
double op = 0.0;
6526
switch (vform) {
6527
case kFormatVnH:
6528
op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6529
break;
6530
case kFormatVnS:
6531
op = src.Float<float>(i);
6532
break;
6533
case kFormatVnD:
6534
op = src.Float<double>(i);
6535
break;
6536
default:
6537
VIXL_UNREACHABLE();
6538
}
6539
6540
switch (std::fpclassify(op)) {
6541
case FP_INFINITE:
6542
dst.SetInt(vform, i, MaxIntFromFormat(vform));
6543
break;
6544
case FP_NAN:
6545
case FP_ZERO:
6546
dst.SetInt(vform, i, MinIntFromFormat(vform));
6547
break;
6548
case FP_SUBNORMAL: {
6549
// DoubleMantissa returns the mantissa of its input, leaving 12 zero
6550
// bits where the sign and exponent would be. We subtract 12 to
6551
// find the number of leading zero bits in the mantissa itself.
6552
int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6553
// Log2 of a subnormal is the lowest exponent a normal number can
6554
// represent, together with the zeros in the mantissa.
6555
dst.SetInt(vform, i, -1023 - mant_zero_count);
6556
break;
6557
}
6558
case FP_NORMAL:
6559
// Log2 of a normal number is the exponent minus the bias.
6560
dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6561
break;
6562
}
6563
}
6564
return dst;
6565
}
6566
6567
LogicVRegister Simulator::ftsmul(VectorFormat vform,
6568
LogicVRegister dst,
6569
const LogicVRegister& src1,
6570
const LogicVRegister& src2) {
6571
SimVRegister maybe_neg_src1;
6572
6573
// The bottom bit of src2 controls the sign of the result. Use it to
6574
// conditionally invert the sign of one `fmul` operand.
6575
shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6576
eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6577
6578
// Multiply src1 by the modified neg_src1, which is potentially its negation.
6579
// In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6580
// rather than neg_src1, must be the first source argument.
6581
fmul(vform, dst, src1, maybe_neg_src1);
6582
6583
return dst;
6584
}
6585
6586
LogicVRegister Simulator::ftssel(VectorFormat vform,
6587
LogicVRegister dst,
6588
const LogicVRegister& src1,
6589
const LogicVRegister& src2) {
6590
unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6591
uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6592
uint64_t one;
6593
6594
if (lane_bits == kHRegSize) {
6595
one = Float16ToRawbits(Float16(1.0));
6596
} else if (lane_bits == kSRegSize) {
6597
one = FloatToRawbits(1.0);
6598
} else {
6599
VIXL_ASSERT(lane_bits == kDRegSize);
6600
one = DoubleToRawbits(1.0);
6601
}
6602
6603
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6604
// Use integer accessors for this operation, as this is a data manipulation
6605
// task requiring no calculation.
6606
uint64_t op = src1.Uint(vform, i);
6607
6608
// Only the bottom two bits of the src2 register are significant, indicating
6609
// the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6610
// determines the sign of the value written to dst.
6611
uint64_t q = src2.Uint(vform, i);
6612
if ((q & 1) == 1) op = one;
6613
if ((q & 2) == 2) op ^= sign_bit;
6614
6615
dst.SetUint(vform, i, op);
6616
}
6617
6618
return dst;
6619
}
6620
6621
template <typename T>
6622
LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6623
LogicVRegister dst,
6624
const LogicVRegister& src1,
6625
const LogicVRegister& src2,
6626
uint64_t coeff_pos,
6627
uint64_t coeff_neg) {
6628
SimVRegister zero;
6629
dup_immediate(kFormatVnB, zero, 0);
6630
6631
SimVRegister cf;
6632
SimVRegister cfn;
6633
dup_immediate(vform, cf, coeff_pos);
6634
dup_immediate(vform, cfn, coeff_neg);
6635
6636
// The specification requires testing the top bit of the raw value, rather
6637
// than the sign of the floating point number, so use an integer comparison
6638
// here.
6639
SimPRegister is_neg;
6640
SVEIntCompareVectorsHelper(lt,
6641
vform,
6642
is_neg,
6643
GetPTrue(),
6644
src2,
6645
zero,
6646
false,
6647
LeaveFlags);
6648
mov_merging(vform, cf, is_neg, cfn);
6649
6650
SimVRegister temp;
6651
fabs_<T>(vform, temp, src2);
6652
fmla<T>(vform, cf, cf, src1, temp);
6653
mov(vform, dst, cf);
6654
return dst;
6655
}
6656
6657
6658
LogicVRegister Simulator::ftmad(VectorFormat vform,
6659
LogicVRegister dst,
6660
const LogicVRegister& src1,
6661
const LogicVRegister& src2,
6662
unsigned index) {
6663
static const uint64_t ftmad_coeff16[] = {0x3c00,
6664
0xb155,
6665
0x2030,
6666
0x0000,
6667
0x0000,
6668
0x0000,
6669
0x0000,
6670
0x0000,
6671
0x3c00,
6672
0xb800,
6673
0x293a,
6674
0x0000,
6675
0x0000,
6676
0x0000,
6677
0x0000,
6678
0x0000};
6679
6680
static const uint64_t ftmad_coeff32[] = {0x3f800000,
6681
0xbe2aaaab,
6682
0x3c088886,
6683
0xb95008b9,
6684
0x36369d6d,
6685
0x00000000,
6686
0x00000000,
6687
0x00000000,
6688
0x3f800000,
6689
0xbf000000,
6690
0x3d2aaaa6,
6691
0xbab60705,
6692
0x37cd37cc,
6693
0x00000000,
6694
0x00000000,
6695
0x00000000};
6696
6697
static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6698
0xbfc5555555555543,
6699
0x3f8111111110f30c,
6700
0xbf2a01a019b92fc6,
6701
0x3ec71de351f3d22b,
6702
0xbe5ae5e2b60f7b91,
6703
0x3de5d8408868552f,
6704
0x0000000000000000,
6705
0x3ff0000000000000,
6706
0xbfe0000000000000,
6707
0x3fa5555555555536,
6708
0xbf56c16c16c13a0b,
6709
0x3efa01a019b1e8d8,
6710
0xbe927e4f7282f468,
6711
0x3e21ee96d2641b13,
6712
0xbda8f76380fbb401};
6713
VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6714
VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6715
VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6716
6717
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6718
FTMaddHelper<SimFloat16>(vform,
6719
dst,
6720
src1,
6721
src2,
6722
ftmad_coeff16[index],
6723
ftmad_coeff16[index + 8]);
6724
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6725
FTMaddHelper<float>(vform,
6726
dst,
6727
src1,
6728
src2,
6729
ftmad_coeff32[index],
6730
ftmad_coeff32[index + 8]);
6731
} else {
6732
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6733
FTMaddHelper<double>(vform,
6734
dst,
6735
src1,
6736
src2,
6737
ftmad_coeff64[index],
6738
ftmad_coeff64[index + 8]);
6739
}
6740
return dst;
6741
}
6742
6743
LogicVRegister Simulator::fexpa(VectorFormat vform,
6744
LogicVRegister dst,
6745
const LogicVRegister& src) {
6746
static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6747
0x005d, 0x0075, 0x008e, 0x00a8,
6748
0x00c2, 0x00dc, 0x00f8, 0x0114,
6749
0x0130, 0x014d, 0x016b, 0x0189,
6750
0x01a8, 0x01c8, 0x01e8, 0x0209,
6751
0x022b, 0x024e, 0x0271, 0x0295,
6752
0x02ba, 0x02e0, 0x0306, 0x032e,
6753
0x0356, 0x037f, 0x03a9, 0x03d4};
6754
6755
static const uint64_t fexpa_coeff32[] =
6756
{0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6757
0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6758
0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6759
0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6760
0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6761
0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6762
0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6763
0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6764
0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6765
0x7d3e0c};
6766
6767
static const uint64_t fexpa_coeff64[] =
6768
{0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6769
0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6770
0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6771
0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6772
0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6773
0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6774
0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6775
0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6776
0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6777
0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6778
0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6779
0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6780
0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6781
0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6782
0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6783
0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6784
6785
unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6786
int index_highbit = 5;
6787
int op_highbit, op_shift;
6788
const uint64_t* fexpa_coeff;
6789
6790
if (lane_size == kHRegSize) {
6791
index_highbit = 4;
6792
VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6793
fexpa_coeff = fexpa_coeff16;
6794
op_highbit = 9;
6795
op_shift = 10;
6796
} else if (lane_size == kSRegSize) {
6797
VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6798
fexpa_coeff = fexpa_coeff32;
6799
op_highbit = 13;
6800
op_shift = 23;
6801
} else {
6802
VIXL_ASSERT(lane_size == kDRegSize);
6803
VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6804
fexpa_coeff = fexpa_coeff64;
6805
op_highbit = 16;
6806
op_shift = 52;
6807
}
6808
6809
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6810
uint64_t op = src.Uint(vform, i);
6811
uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6812
result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6813
dst.SetUint(vform, i, result);
6814
}
6815
return dst;
6816
}
6817
6818
template <typename T>
6819
LogicVRegister Simulator::fscale(VectorFormat vform,
6820
LogicVRegister dst,
6821
const LogicVRegister& src1,
6822
const LogicVRegister& src2) {
6823
T two = T(2.0);
6824
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6825
T src1_val = src1.Float<T>(i);
6826
if (!IsNaN(src1_val)) {
6827
int64_t scale = src2.Int(vform, i);
6828
// TODO: this is a low-performance implementation, but it's simple and
6829
// less likely to be buggy. Consider replacing it with something faster.
6830
6831
// Scales outside of these bounds become infinity or zero, so there's no
6832
// point iterating further.
6833
scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6834
6835
// Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6836
// decrement scale until it's zero.
6837
while (scale-- > 0) {
6838
src1_val = FPMul(src1_val, two);
6839
}
6840
6841
// If scale is negative, divide by two and increment scale until it's
6842
// zero. Initially, scale is (src2 - 1), so we pre-increment.
6843
while (++scale < 0) {
6844
src1_val = FPDiv(src1_val, two);
6845
}
6846
}
6847
dst.SetFloat<T>(i, src1_val);
6848
}
6849
return dst;
6850
}
6851
6852
LogicVRegister Simulator::fscale(VectorFormat vform,
6853
LogicVRegister dst,
6854
const LogicVRegister& src1,
6855
const LogicVRegister& src2) {
6856
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6857
fscale<SimFloat16>(vform, dst, src1, src2);
6858
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6859
fscale<float>(vform, dst, src1, src2);
6860
} else {
6861
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6862
fscale<double>(vform, dst, src1, src2);
6863
}
6864
return dst;
6865
}
6866
6867
LogicVRegister Simulator::scvtf(VectorFormat vform,
6868
unsigned dst_data_size_in_bits,
6869
unsigned src_data_size_in_bits,
6870
LogicVRegister dst,
6871
const LogicPRegister& pg,
6872
const LogicVRegister& src,
6873
FPRounding round,
6874
int fbits) {
6875
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6876
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6877
dst.ClearForWrite(vform);
6878
6879
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6880
if (!pg.IsActive(vform, i)) continue;
6881
6882
int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6883
0,
6884
src.Uint(vform, i));
6885
6886
switch (dst_data_size_in_bits) {
6887
case kHRegSize: {
6888
SimFloat16 result = FixedToFloat16(value, fbits, round);
6889
dst.SetUint(vform, i, Float16ToRawbits(result));
6890
break;
6891
}
6892
case kSRegSize: {
6893
float result = FixedToFloat(value, fbits, round);
6894
dst.SetUint(vform, i, FloatToRawbits(result));
6895
break;
6896
}
6897
case kDRegSize: {
6898
double result = FixedToDouble(value, fbits, round);
6899
dst.SetUint(vform, i, DoubleToRawbits(result));
6900
break;
6901
}
6902
default:
6903
VIXL_UNIMPLEMENTED();
6904
break;
6905
}
6906
}
6907
6908
return dst;
6909
}
6910
6911
LogicVRegister Simulator::scvtf(VectorFormat vform,
6912
LogicVRegister dst,
6913
const LogicVRegister& src,
6914
int fbits,
6915
FPRounding round) {
6916
return scvtf(vform,
6917
LaneSizeInBitsFromFormat(vform),
6918
LaneSizeInBitsFromFormat(vform),
6919
dst,
6920
GetPTrue(),
6921
src,
6922
round,
6923
fbits);
6924
}
6925
6926
LogicVRegister Simulator::ucvtf(VectorFormat vform,
6927
unsigned dst_data_size_in_bits,
6928
unsigned src_data_size_in_bits,
6929
LogicVRegister dst,
6930
const LogicPRegister& pg,
6931
const LogicVRegister& src,
6932
FPRounding round,
6933
int fbits) {
6934
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6935
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6936
dst.ClearForWrite(vform);
6937
6938
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6939
if (!pg.IsActive(vform, i)) continue;
6940
6941
uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6942
0,
6943
src.Uint(vform, i));
6944
6945
switch (dst_data_size_in_bits) {
6946
case kHRegSize: {
6947
SimFloat16 result = UFixedToFloat16(value, fbits, round);
6948
dst.SetUint(vform, i, Float16ToRawbits(result));
6949
break;
6950
}
6951
case kSRegSize: {
6952
float result = UFixedToFloat(value, fbits, round);
6953
dst.SetUint(vform, i, FloatToRawbits(result));
6954
break;
6955
}
6956
case kDRegSize: {
6957
double result = UFixedToDouble(value, fbits, round);
6958
dst.SetUint(vform, i, DoubleToRawbits(result));
6959
break;
6960
}
6961
default:
6962
VIXL_UNIMPLEMENTED();
6963
break;
6964
}
6965
}
6966
6967
return dst;
6968
}
6969
6970
LogicVRegister Simulator::ucvtf(VectorFormat vform,
6971
LogicVRegister dst,
6972
const LogicVRegister& src,
6973
int fbits,
6974
FPRounding round) {
6975
return ucvtf(vform,
6976
LaneSizeInBitsFromFormat(vform),
6977
LaneSizeInBitsFromFormat(vform),
6978
dst,
6979
GetPTrue(),
6980
src,
6981
round,
6982
fbits);
6983
}
6984
6985
LogicVRegister Simulator::unpk(VectorFormat vform,
6986
LogicVRegister dst,
6987
const LogicVRegister& src,
6988
UnpackType unpack_type,
6989
ExtendType extend_type) {
6990
VectorFormat vform_half = VectorFormatHalfWidth(vform);
6991
const int lane_count = LaneCountFromFormat(vform);
6992
const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6993
6994
switch (extend_type) {
6995
case kSignedExtend: {
6996
int64_t result[kZRegMaxSizeInBytes];
6997
for (int i = 0; i < lane_count; ++i) {
6998
result[i] = src.Int(vform_half, i + src_start_lane);
6999
}
7000
for (int i = 0; i < lane_count; ++i) {
7001
dst.SetInt(vform, i, result[i]);
7002
}
7003
break;
7004
}
7005
case kUnsignedExtend: {
7006
uint64_t result[kZRegMaxSizeInBytes];
7007
for (int i = 0; i < lane_count; ++i) {
7008
result[i] = src.Uint(vform_half, i + src_start_lane);
7009
}
7010
for (int i = 0; i < lane_count; ++i) {
7011
dst.SetUint(vform, i, result[i]);
7012
}
7013
break;
7014
}
7015
default:
7016
VIXL_UNREACHABLE();
7017
}
7018
return dst;
7019
}
7020
7021
LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
7022
VectorFormat vform,
7023
LogicPRegister dst,
7024
const LogicPRegister& mask,
7025
const LogicVRegister& src1,
7026
const LogicVRegister& src2,
7027
bool is_wide_elements,
7028
FlagsUpdate flags) {
7029
for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7030
bool result = false;
7031
if (mask.IsActive(vform, lane)) {
7032
int64_t op1 = 0xbadbeef;
7033
int64_t op2 = 0xbadbeef;
7034
int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7035
switch (cond) {
7036
case eq:
7037
case ge:
7038
case gt:
7039
case lt:
7040
case le:
7041
case ne:
7042
op1 = src1.Int(vform, lane);
7043
op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7044
: src2.Int(vform, lane);
7045
break;
7046
case hi:
7047
case hs:
7048
case ls:
7049
case lo:
7050
op1 = src1.Uint(vform, lane);
7051
op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7052
: src2.Uint(vform, lane);
7053
break;
7054
default:
7055
VIXL_UNREACHABLE();
7056
}
7057
7058
switch (cond) {
7059
case eq:
7060
result = (op1 == op2);
7061
break;
7062
case ne:
7063
result = (op1 != op2);
7064
break;
7065
case ge:
7066
result = (op1 >= op2);
7067
break;
7068
case gt:
7069
result = (op1 > op2);
7070
break;
7071
case le:
7072
result = (op1 <= op2);
7073
break;
7074
case lt:
7075
result = (op1 < op2);
7076
break;
7077
case hs:
7078
result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7079
break;
7080
case hi:
7081
result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7082
break;
7083
case ls:
7084
result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7085
break;
7086
case lo:
7087
result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7088
break;
7089
default:
7090
VIXL_UNREACHABLE();
7091
}
7092
}
7093
dst.SetActive(vform, lane, result);
7094
}
7095
7096
if (flags == SetFlags) PredTest(vform, mask, dst);
7097
7098
return dst;
7099
}
7100
7101
LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7102
VectorFormat vform,
7103
LogicVRegister dst,
7104
const LogicVRegister& src1,
7105
const LogicVRegister& src2,
7106
bool is_wide_elements) {
7107
unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7108
VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7109
7110
for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7111
int shift_src_lane = lane;
7112
if (is_wide_elements) {
7113
// If the shift amount comes from wide elements, select the D-sized lane
7114
// which occupies the corresponding lanes of the value to be shifted.
7115
shift_src_lane = (lane * lane_size) / kDRegSize;
7116
}
7117
uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7118
7119
// Saturate shift_amount to the size of the lane that will be shifted.
7120
if (shift_amount > lane_size) shift_amount = lane_size;
7121
7122
uint64_t value = src1.Uint(vform, lane);
7123
int64_t result = ShiftOperand(lane_size,
7124
value,
7125
shift_op,
7126
static_cast<unsigned>(shift_amount));
7127
dst.SetUint(vform, lane, result);
7128
}
7129
7130
return dst;
7131
}
7132
7133
LogicVRegister Simulator::asrd(VectorFormat vform,
7134
LogicVRegister dst,
7135
const LogicVRegister& src1,
7136
int shift) {
7137
VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7138
LaneSizeInBitsFromFormat(vform)));
7139
7140
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7141
int64_t value = src1.Int(vform, i);
7142
if (shift <= 63) {
7143
if (value < 0) {
7144
// The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7145
// cast to int64_t, and cannot cause signed overflow in the result.
7146
value = value + GetUintMask(shift);
7147
}
7148
value = ShiftOperand(kDRegSize, value, ASR, shift);
7149
} else {
7150
value = 0;
7151
}
7152
dst.SetInt(vform, i, value);
7153
}
7154
return dst;
7155
}
7156
7157
LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7158
LogicalOp logical_op,
7159
VectorFormat vform,
7160
LogicVRegister zd,
7161
const LogicVRegister& zn,
7162
const LogicVRegister& zm) {
7163
VIXL_ASSERT(IsSVEFormat(vform));
7164
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7165
uint64_t op1 = zn.Uint(vform, i);
7166
uint64_t op2 = zm.Uint(vform, i);
7167
uint64_t result = 0;
7168
switch (logical_op) {
7169
case AND:
7170
result = op1 & op2;
7171
break;
7172
case BIC:
7173
result = op1 & ~op2;
7174
break;
7175
case EOR:
7176
result = op1 ^ op2;
7177
break;
7178
case ORR:
7179
result = op1 | op2;
7180
break;
7181
default:
7182
VIXL_UNIMPLEMENTED();
7183
}
7184
zd.SetUint(vform, i, result);
7185
}
7186
7187
return zd;
7188
}
7189
7190
LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7191
LogicPRegister pd,
7192
const LogicPRegister& pn,
7193
const LogicPRegister& pm) {
7194
for (int i = 0; i < pn.GetChunkCount(); i++) {
7195
LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7196
LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7197
LogicPRegister::ChunkType result = 0;
7198
switch (op) {
7199
case ANDS_p_p_pp_z:
7200
case AND_p_p_pp_z:
7201
result = op1 & op2;
7202
break;
7203
case BICS_p_p_pp_z:
7204
case BIC_p_p_pp_z:
7205
result = op1 & ~op2;
7206
break;
7207
case EORS_p_p_pp_z:
7208
case EOR_p_p_pp_z:
7209
result = op1 ^ op2;
7210
break;
7211
case NANDS_p_p_pp_z:
7212
case NAND_p_p_pp_z:
7213
result = ~(op1 & op2);
7214
break;
7215
case NORS_p_p_pp_z:
7216
case NOR_p_p_pp_z:
7217
result = ~(op1 | op2);
7218
break;
7219
case ORNS_p_p_pp_z:
7220
case ORN_p_p_pp_z:
7221
result = op1 | ~op2;
7222
break;
7223
case ORRS_p_p_pp_z:
7224
case ORR_p_p_pp_z:
7225
result = op1 | op2;
7226
break;
7227
default:
7228
VIXL_UNIMPLEMENTED();
7229
}
7230
pd.SetChunk(i, result);
7231
}
7232
return pd;
7233
}
7234
7235
LogicVRegister Simulator::SVEBitwiseImmHelper(
7236
SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7237
VectorFormat vform,
7238
LogicVRegister zd,
7239
uint64_t imm) {
7240
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7241
uint64_t op1 = zd.Uint(vform, i);
7242
uint64_t result = 0;
7243
switch (op) {
7244
case AND_z_zi:
7245
result = op1 & imm;
7246
break;
7247
case EOR_z_zi:
7248
result = op1 ^ imm;
7249
break;
7250
case ORR_z_zi:
7251
result = op1 | imm;
7252
break;
7253
default:
7254
VIXL_UNIMPLEMENTED();
7255
}
7256
zd.SetUint(vform, i, result);
7257
}
7258
7259
return zd;
7260
}
7261
7262
void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7263
const LogicPRegister& pg,
7264
unsigned zt_code,
7265
const LogicSVEAddressVector& addr) {
7266
VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7267
7268
int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7269
int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7270
int msize_in_bytes = addr.GetMsizeInBytes();
7271
int reg_count = addr.GetRegCount();
7272
7273
VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7274
VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7275
7276
unsigned zt_codes[4] = {zt_code,
7277
(zt_code + 1) % kNumberOfZRegisters,
7278
(zt_code + 2) % kNumberOfZRegisters,
7279
(zt_code + 3) % kNumberOfZRegisters};
7280
7281
LogicVRegister zt[4] = {
7282
ReadVRegister(zt_codes[0]),
7283
ReadVRegister(zt_codes[1]),
7284
ReadVRegister(zt_codes[2]),
7285
ReadVRegister(zt_codes[3]),
7286
};
7287
7288
// For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7289
// are ignored, so read the source register using the VectorFormat that
7290
// corresponds with the storage format, and multiply the index accordingly.
7291
VectorFormat unpack_vform =
7292
SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7293
int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7294
7295
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7296
if (!pg.IsActive(vform, i)) continue;
7297
7298
for (int r = 0; r < reg_count; r++) {
7299
uint64_t element_address = addr.GetElementAddress(i, r);
7300
if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
7301
return;
7302
}
7303
}
7304
}
7305
7306
if (ShouldTraceWrites()) {
7307
PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7308
if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7309
// Use an FP format where it's likely that we're accessing FP data.
7310
format = GetPrintRegisterFormatTryFP(format);
7311
}
7312
// Stores don't represent a change to the source register's value, so only
7313
// print the relevant part of the value.
7314
format = GetPrintRegPartial(format);
7315
7316
PrintZStructAccess(zt_code,
7317
reg_count,
7318
pg,
7319
format,
7320
msize_in_bytes,
7321
"->",
7322
addr);
7323
}
7324
}
7325
7326
bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7327
const LogicPRegister& pg,
7328
unsigned zt_code,
7329
const LogicSVEAddressVector& addr,
7330
bool is_signed) {
7331
int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7332
int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7333
int msize_in_bytes = addr.GetMsizeInBytes();
7334
int reg_count = addr.GetRegCount();
7335
7336
VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7337
VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7338
VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7339
7340
unsigned zt_codes[4] = {zt_code,
7341
(zt_code + 1) % kNumberOfZRegisters,
7342
(zt_code + 2) % kNumberOfZRegisters,
7343
(zt_code + 3) % kNumberOfZRegisters};
7344
LogicVRegister zt[4] = {
7345
ReadVRegister(zt_codes[0]),
7346
ReadVRegister(zt_codes[1]),
7347
ReadVRegister(zt_codes[2]),
7348
ReadVRegister(zt_codes[3]),
7349
};
7350
7351
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7352
for (int r = 0; r < reg_count; r++) {
7353
uint64_t element_address = addr.GetElementAddress(i, r);
7354
7355
if (!pg.IsActive(vform, i)) {
7356
zt[r].SetUint(vform, i, 0);
7357
continue;
7358
}
7359
7360
if (is_signed) {
7361
if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7362
return false;
7363
}
7364
} else {
7365
if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7366
return false;
7367
}
7368
}
7369
}
7370
}
7371
7372
if (ShouldTraceVRegs()) {
7373
PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7374
if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7375
// Use an FP format where it's likely that we're accessing FP data.
7376
format = GetPrintRegisterFormatTryFP(format);
7377
}
7378
PrintZStructAccess(zt_code,
7379
reg_count,
7380
pg,
7381
format,
7382
msize_in_bytes,
7383
"<-",
7384
addr);
7385
}
7386
return true;
7387
}
7388
7389
LogicPRegister Simulator::brka(LogicPRegister pd,
7390
const LogicPRegister& pg,
7391
const LogicPRegister& pn) {
7392
bool break_ = false;
7393
for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7394
if (pg.IsActive(kFormatVnB, i)) {
7395
pd.SetActive(kFormatVnB, i, !break_);
7396
break_ |= pn.IsActive(kFormatVnB, i);
7397
}
7398
}
7399
7400
return pd;
7401
}
7402
7403
LogicPRegister Simulator::brkb(LogicPRegister pd,
7404
const LogicPRegister& pg,
7405
const LogicPRegister& pn) {
7406
bool break_ = false;
7407
for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7408
if (pg.IsActive(kFormatVnB, i)) {
7409
break_ |= pn.IsActive(kFormatVnB, i);
7410
pd.SetActive(kFormatVnB, i, !break_);
7411
}
7412
}
7413
7414
return pd;
7415
}
7416
7417
LogicPRegister Simulator::brkn(LogicPRegister pdm,
7418
const LogicPRegister& pg,
7419
const LogicPRegister& pn) {
7420
if (!IsLastActive(kFormatVnB, pg, pn)) {
7421
pfalse(pdm);
7422
}
7423
return pdm;
7424
}
7425
7426
LogicPRegister Simulator::brkpa(LogicPRegister pd,
7427
const LogicPRegister& pg,
7428
const LogicPRegister& pn,
7429
const LogicPRegister& pm) {
7430
bool last_active = IsLastActive(kFormatVnB, pg, pn);
7431
7432
for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7433
bool active = false;
7434
if (pg.IsActive(kFormatVnB, i)) {
7435
active = last_active;
7436
last_active = last_active && !pm.IsActive(kFormatVnB, i);
7437
}
7438
pd.SetActive(kFormatVnB, i, active);
7439
}
7440
7441
return pd;
7442
}
7443
7444
LogicPRegister Simulator::brkpb(LogicPRegister pd,
7445
const LogicPRegister& pg,
7446
const LogicPRegister& pn,
7447
const LogicPRegister& pm) {
7448
bool last_active = IsLastActive(kFormatVnB, pg, pn);
7449
7450
for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7451
bool active = false;
7452
if (pg.IsActive(kFormatVnB, i)) {
7453
last_active = last_active && !pm.IsActive(kFormatVnB, i);
7454
active = last_active;
7455
}
7456
pd.SetActive(kFormatVnB, i, active);
7457
}
7458
7459
return pd;
7460
}
7461
7462
void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7463
const LogicPRegister& pg,
7464
unsigned zt_code,
7465
const LogicSVEAddressVector& addr,
7466
SVEFaultTolerantLoadType type,
7467
bool is_signed) {
7468
int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7469
int msize_in_bits = addr.GetMsizeInBits();
7470
int msize_in_bytes = addr.GetMsizeInBytes();
7471
7472
VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7473
VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7474
VIXL_ASSERT(addr.GetRegCount() == 1);
7475
7476
LogicVRegister zt = ReadVRegister(zt_code);
7477
LogicPRegister ffr = ReadFFR();
7478
7479
// Non-faulting loads are allowed to fail arbitrarily. To stress user
7480
// code, fail a random element in roughly one in eight full-vector loads.
7481
uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7482
int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7483
7484
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7485
uint64_t value = 0;
7486
7487
if (pg.IsActive(vform, i)) {
7488
uint64_t element_address = addr.GetElementAddress(i, 0);
7489
7490
if (type == kSVEFirstFaultLoad) {
7491
// First-faulting loads always load the first active element, regardless
7492
// of FFR. The result will be discarded if its FFR lane is inactive, but
7493
// it could still generate a fault.
7494
VIXL_DEFINE_OR_RETURN(mem_result,
7495
MemReadUint(msize_in_bytes, element_address));
7496
value = mem_result;
7497
// All subsequent elements have non-fault semantics.
7498
type = kSVENonFaultLoad;
7499
7500
} else if (ffr.IsActive(vform, i)) {
7501
// Simulation of fault-tolerant loads relies on system calls, and is
7502
// likely to be relatively slow, so we only actually perform the load if
7503
// its FFR lane is active.
7504
7505
bool can_read = (i < fake_fault_at_lane) &&
7506
CanReadMemory(element_address, msize_in_bytes);
7507
if (can_read) {
7508
VIXL_DEFINE_OR_RETURN(mem_result,
7509
MemReadUint(msize_in_bytes, element_address));
7510
value = mem_result;
7511
} else {
7512
// Propagate the fault to the end of FFR.
7513
for (int j = i; j < LaneCountFromFormat(vform); j++) {
7514
ffr.SetActive(vform, j, false);
7515
}
7516
}
7517
}
7518
}
7519
7520
// The architecture permits a few possible results for inactive FFR lanes
7521
// (including those caused by a fault in this instruction). We choose to
7522
// leave the register value unchanged (like merging predication) because
7523
// no other input to this instruction can have the same behaviour.
7524
//
7525
// Note that this behaviour takes precedence over pg's zeroing predication.
7526
7527
if (ffr.IsActive(vform, i)) {
7528
int msb = msize_in_bits - 1;
7529
if (is_signed) {
7530
zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7531
} else {
7532
zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7533
}
7534
}
7535
}
7536
7537
if (ShouldTraceVRegs()) {
7538
PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7539
if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7540
// Use an FP format where it's likely that we're accessing FP data.
7541
format = GetPrintRegisterFormatTryFP(format);
7542
}
7543
// Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7544
// expects a single mask, so combine the two predicates.
7545
SimPRegister mask;
7546
SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7547
PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7548
}
7549
}
7550
7551
void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7552
VectorFormat vform,
7553
SVEOffsetModifier mod) {
7554
bool is_signed = instr->ExtractBit(14) == 0;
7555
bool is_ff = instr->ExtractBit(13) == 1;
7556
// Note that these instructions don't use the Dtype encoding.
7557
int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7558
int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7559
uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7560
LogicSVEAddressVector addr(base,
7561
&ReadVRegister(instr->GetRm()),
7562
vform,
7563
mod,
7564
scale);
7565
addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7566
if (is_ff) {
7567
SVEFaultTolerantLoadHelper(vform,
7568
ReadPRegister(instr->GetPgLow8()),
7569
instr->GetRt(),
7570
addr,
7571
kSVEFirstFaultLoad,
7572
is_signed);
7573
} else {
7574
SVEStructuredLoadHelper(vform,
7575
ReadPRegister(instr->GetPgLow8()),
7576
instr->GetRt(),
7577
addr,
7578
is_signed);
7579
}
7580
}
7581
7582
int Simulator::GetFirstActive(VectorFormat vform,
7583
const LogicPRegister& pg) const {
7584
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7585
if (pg.IsActive(vform, i)) return i;
7586
}
7587
return -1;
7588
}
7589
7590
int Simulator::GetLastActive(VectorFormat vform,
7591
const LogicPRegister& pg) const {
7592
for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7593
if (pg.IsActive(vform, i)) return i;
7594
}
7595
return -1;
7596
}
7597
7598
int Simulator::CountActiveLanes(VectorFormat vform,
7599
const LogicPRegister& pg) const {
7600
int count = 0;
7601
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7602
count += pg.IsActive(vform, i) ? 1 : 0;
7603
}
7604
return count;
7605
}
7606
7607
int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7608
const LogicPRegister& pg,
7609
const LogicPRegister& pn) const {
7610
int count = 0;
7611
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7612
count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7613
}
7614
return count;
7615
}
7616
7617
int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7618
int pattern) const {
7619
VIXL_ASSERT(IsSVEFormat(vform));
7620
int all = LaneCountFromFormat(vform);
7621
VIXL_ASSERT(all > 0);
7622
7623
switch (pattern) {
7624
case SVE_VL1:
7625
case SVE_VL2:
7626
case SVE_VL3:
7627
case SVE_VL4:
7628
case SVE_VL5:
7629
case SVE_VL6:
7630
case SVE_VL7:
7631
case SVE_VL8:
7632
// VL1-VL8 are encoded directly.
7633
VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7634
VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7635
return (pattern <= all) ? pattern : 0;
7636
case SVE_VL16:
7637
case SVE_VL32:
7638
case SVE_VL64:
7639
case SVE_VL128:
7640
case SVE_VL256: {
7641
// VL16-VL256 are encoded as log2(N) + c.
7642
int min = 16 << (pattern - SVE_VL16);
7643
return (min <= all) ? min : 0;
7644
}
7645
// Special cases.
7646
case SVE_POW2:
7647
return 1 << HighestSetBitPosition(all);
7648
case SVE_MUL4:
7649
return all - (all % 4);
7650
case SVE_MUL3:
7651
return all - (all % 3);
7652
case SVE_ALL:
7653
return all;
7654
}
7655
// Unnamed cases architecturally return 0.
7656
return 0;
7657
}
7658
7659
LogicPRegister Simulator::match(VectorFormat vform,
7660
LogicPRegister dst,
7661
const LogicVRegister& haystack,
7662
const LogicVRegister& needles,
7663
bool negate_match) {
7664
SimVRegister ztemp;
7665
SimPRegister ptemp;
7666
7667
pfalse(dst);
7668
int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7669
for (int i = 0; i < lanes_per_segment; i++) {
7670
dup_elements_to_segments(vform, ztemp, needles, i);
7671
SVEIntCompareVectorsHelper(eq,
7672
vform,
7673
ptemp,
7674
GetPTrue(),
7675
haystack,
7676
ztemp,
7677
false,
7678
LeaveFlags);
7679
SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7680
}
7681
if (negate_match) {
7682
ptrue(vform, ptemp, SVE_ALL);
7683
SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7684
}
7685
return dst;
7686
}
7687
7688
uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7689
if (IsContiguous()) {
7690
return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7691
}
7692
7693
VIXL_ASSERT(IsScatterGather());
7694
VIXL_ASSERT(vector_ != NULL);
7695
7696
// For scatter-gather accesses, we need to extract the offset from vector_,
7697
// and apply modifiers.
7698
7699
uint64_t offset = 0;
7700
switch (vector_form_) {
7701
case kFormatVnS:
7702
offset = vector_->GetLane<uint32_t>(lane);
7703
break;
7704
case kFormatVnD:
7705
offset = vector_->GetLane<uint64_t>(lane);
7706
break;
7707
default:
7708
VIXL_UNIMPLEMENTED();
7709
break;
7710
}
7711
7712
switch (vector_mod_) {
7713
case SVE_MUL_VL:
7714
VIXL_UNIMPLEMENTED();
7715
break;
7716
case SVE_LSL:
7717
// We apply the shift below. There's nothing to do here.
7718
break;
7719
case NO_SVE_OFFSET_MODIFIER:
7720
VIXL_ASSERT(vector_shift_ == 0);
7721
break;
7722
case SVE_UXTW:
7723
offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7724
break;
7725
case SVE_SXTW:
7726
offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7727
break;
7728
}
7729
7730
return base_ + (offset << vector_shift_);
7731
}
7732
7733
LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7734
LogicVRegister dst,
7735
const LogicVRegister& src) {
7736
SimVRegister zero;
7737
zero.Clear();
7738
return uzp2(vform, dst, src, zero);
7739
}
7740
7741
LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7742
LogicVRegister dst,
7743
const LogicVRegister& src) {
7744
SimVRegister zero;
7745
zero.Clear();
7746
return uzp1(vform, dst, src, zero);
7747
}
7748
7749
LogicVRegister Simulator::adcl(VectorFormat vform,
7750
LogicVRegister dst,
7751
const LogicVRegister& src1,
7752
const LogicVRegister& src2,
7753
bool top) {
7754
unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7755
VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7756
7757
for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7758
uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7759
uint64_t right = dst.Uint(vform, i);
7760
unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7761
std::pair<uint64_t, uint8_t> val_and_flags =
7762
AddWithCarry(reg_size, left, right, carry_in);
7763
7764
// Set even lanes to the result of the addition.
7765
dst.SetUint(vform, i, val_and_flags.first);
7766
7767
// Set odd lanes to the carry flag from the addition.
7768
uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7769
dst.SetUint(vform, i + 1, carry_out);
7770
}
7771
return dst;
7772
}
7773
7774
// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7775
// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7776
//
7777
// Matrices of the form:
7778
//
7779
// src1 = ( a b c d e f g h ) src2 = ( A B )
7780
// ( i j k l m n o p ) ( C D )
7781
// ( E F )
7782
// ( G H )
7783
// ( I J )
7784
// ( K L )
7785
// ( M N )
7786
// ( O P )
7787
//
7788
// Are stored in the input vector registers as:
7789
//
7790
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7791
// src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7792
// src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7793
//
7794
LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7795
LogicVRegister srcdst,
7796
const LogicVRegister& src1,
7797
const LogicVRegister& src2,
7798
bool src1_signed,
7799
bool src2_signed) {
7800
// Two destination forms are supported: Q register containing four S-sized
7801
// elements (4S) and Z register containing n S-sized elements (VnS).
7802
VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7803
VectorFormat vform_src = kFormatVnB;
7804
int b_per_segment = kQRegSize / kBRegSize;
7805
int s_per_segment = kQRegSize / kSRegSize;
7806
int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7807
int segment_count = LaneCountFromFormat(vform_dst) / 4;
7808
for (int seg = 0; seg < segment_count; seg++) {
7809
for (int i = 0; i < 2; i++) {
7810
for (int j = 0; j < 2; j++) {
7811
int dstidx = (2 * i) + j + (seg * s_per_segment);
7812
int64_t sum = srcdst.Int(vform_dst, dstidx);
7813
for (int k = 0; k < 8; k++) {
7814
int idx1 = (8 * i) + k + (seg * b_per_segment);
7815
int idx2 = (8 * j) + k + (seg * b_per_segment);
7816
int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7817
: src1.Uint(vform_src, idx1);
7818
int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7819
: src2.Uint(vform_src, idx2);
7820
sum += e1 * e2;
7821
}
7822
result[dstidx] = sum;
7823
}
7824
}
7825
}
7826
srcdst.SetIntArray(vform_dst, result);
7827
return srcdst;
7828
}
7829
7830
// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7831
// result to the matrix in srcdst, and write back to srcdst.
7832
//
7833
// Matrices of the form:
7834
//
7835
// src1 = ( a b ) src2 = ( A B )
7836
// ( c d ) ( C D )
7837
//
7838
// Are stored in the input vector registers as:
7839
//
7840
// 3 2 1 0
7841
// src1 = [ d | c | b | a ]
7842
// src2 = [ D | B | C | A ]
7843
//
7844
template <typename T>
7845
LogicVRegister Simulator::fmatmul(VectorFormat vform,
7846
LogicVRegister srcdst,
7847
const LogicVRegister& src1,
7848
const LogicVRegister& src2) {
7849
T result[kZRegMaxSizeInBytes / sizeof(T)];
7850
int T_per_segment = 4;
7851
int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7852
for (int seg = 0; seg < segment_count; seg++) {
7853
int segoff = seg * T_per_segment;
7854
for (int i = 0; i < 2; i++) {
7855
for (int j = 0; j < 2; j++) {
7856
T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7857
src2.Float<T>(2 * j + 0 + segoff));
7858
T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7859
src2.Float<T>(2 * j + 1 + segoff));
7860
T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7861
result[2 * i + j + segoff] = FPAdd(sum, prod1);
7862
}
7863
}
7864
}
7865
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7866
// Elements outside a multiple of 4T are set to zero. This happens only
7867
// for double precision operations, when the VL is a multiple of 128 bits,
7868
// but not a multiple of 256 bits.
7869
T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7870
srcdst.SetFloat<T>(vform, i, value);
7871
}
7872
return srcdst;
7873
}
7874
7875
LogicVRegister Simulator::fmatmul(VectorFormat vform,
7876
LogicVRegister dst,
7877
const LogicVRegister& src1,
7878
const LogicVRegister& src2) {
7879
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7880
fmatmul<float>(vform, dst, src1, src2);
7881
} else {
7882
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7883
fmatmul<double>(vform, dst, src1, src2);
7884
}
7885
return dst;
7886
}
7887
7888
} // namespace aarch64
7889
} // namespace vixl
7890
7891
#endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7892
7893