CoCalc -- logic-aarch64.cc

GitHub Repository: stenzek/duckstation
Path: blob/master/dep/vixl/src/aarch64/logic-aarch64.cc
⁴²⁶¹ views
1
// Copyright 2015, VIXL authors
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are met:
6
//
7
//   * Redistributions of source code must retain the above copyright notice,
8
//     this list of conditions and the following disclaimer.
9
//   * Redistributions in binary form must reproduce the above copyright notice,
10
//     this list of conditions and the following disclaimer in the documentation
11
//     and/or other materials provided with the distribution.
12
//   * Neither the name of ARM Limited nor the names of its contributors may be
13
//     used to endorse or promote products derived from this software without
14
//     specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26

27
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28

29
#include <cmath>
30

31
#include "simulator-aarch64.h"
32

33
namespace vixl {
34
namespace aarch64 {
35

36
using vixl::internal::SimFloat16;
37

38
template <typename T>
39
bool IsFloat64() {
40
  return false;
41
}
42
template <>
43
bool IsFloat64<double>() {
44
  return true;
45
}
46

47
template <typename T>
48
bool IsFloat32() {
49
  return false;
50
}
51
template <>
52
bool IsFloat32<float>() {
53
  return true;
54
}
55

56
template <typename T>
57
bool IsFloat16() {
58
  return false;
59
}
60
template <>
61
bool IsFloat16<Float16>() {
62
  return true;
63
}
64
template <>
65
bool IsFloat16<SimFloat16>() {
66
  return true;
67
}
68

69
template <>
70
double Simulator::FPDefaultNaN<double>() {
71
  return kFP64DefaultNaN;
72
}
73

74

75
template <>
76
float Simulator::FPDefaultNaN<float>() {
77
  return kFP32DefaultNaN;
78
}
79

80

81
template <>
82
SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83
  return SimFloat16(kFP16DefaultNaN);
84
}
85

86

87
double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88
  if (src >= 0) {
89
    return UFixedToDouble(src, fbits, round);
90
  } else if (src == INT64_MIN) {
91
    return -UFixedToDouble(src, fbits, round);
92
  } else {
93
    return -UFixedToDouble(-src, fbits, round);
94
  }
95
}
96

97

98
double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99
  // An input of 0 is a special case because the result is effectively
100
  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101
  if (src == 0) {
102
    return 0.0;
103
  }
104

105
  // Calculate the exponent. The highest significant bit will have the value
106
  // 2^exponent.
107
  const int highest_significant_bit = 63 - CountLeadingZeros(src);
108
  const int64_t exponent = highest_significant_bit - fbits;
109

110
  return FPRoundToDouble(0, exponent, src, round);
111
}
112

113

114
float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115
  if (src >= 0) {
116
    return UFixedToFloat(src, fbits, round);
117
  } else if (src == INT64_MIN) {
118
    return -UFixedToFloat(src, fbits, round);
119
  } else {
120
    return -UFixedToFloat(-src, fbits, round);
121
  }
122
}
123

124

125
float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126
  // An input of 0 is a special case because the result is effectively
127
  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128
  if (src == 0) {
129
    return 0.0f;
130
  }
131

132
  // Calculate the exponent. The highest significant bit will have the value
133
  // 2^exponent.
134
  const int highest_significant_bit = 63 - CountLeadingZeros(src);
135
  const int32_t exponent = highest_significant_bit - fbits;
136

137
  return FPRoundToFloat(0, exponent, src, round);
138
}
139

140

141
SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142
  if (src >= 0) {
143
    return UFixedToFloat16(src, fbits, round);
144
  } else if (src == INT64_MIN) {
145
    return -UFixedToFloat16(src, fbits, round);
146
  } else {
147
    return -UFixedToFloat16(-src, fbits, round);
148
  }
149
}
150

151

152
SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153
                                      int fbits,
154
                                      FPRounding round) {
155
  // An input of 0 is a special case because the result is effectively
156
  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157
  if (src == 0) {
158
    return 0.0f;
159
  }
160

161
  // Calculate the exponent. The highest significant bit will have the value
162
  // 2^exponent.
163
  const int highest_significant_bit = 63 - CountLeadingZeros(src);
164
  const int16_t exponent = highest_significant_bit - fbits;
165

166
  return FPRoundToFloat16(0, exponent, src, round);
167
}
168

169

170
uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171
  uint64_t rtag = nrand48(rand_state_) >> 28;
172
  VIXL_ASSERT(IsUint4(rtag));
173

174
  if (exclude == 0) {
175
    exclude = nrand48(rand_state_) >> 27;
176
  }
177

178
  // TODO: implement this to better match the specification, which calls for a
179
  // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
180
  // PRNG.
181
  return ChooseNonExcludedTag(rtag, 0, exclude);
182
}
183

184

185
bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
186
  dst.ClearForWrite(vform);
187
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
188
    if (!LoadLane(dst, vform, i, addr)) {
189
      return false;
190
    }
191
    addr += LaneSizeInBytesFromFormat(vform);
192
  }
193
  return true;
194
}
195

196

197
bool Simulator::ld1(VectorFormat vform,
198
                    LogicVRegister dst,
199
                    int index,
200
                    uint64_t addr) {
201
  dst.ClearForWrite(vform);
202
  return LoadLane(dst, vform, index, addr);
203
}
204

205

206
bool Simulator::ld1r(VectorFormat vform,
207
                     VectorFormat unpack_vform,
208
                     LogicVRegister dst,
209
                     uint64_t addr,
210
                     bool is_signed) {
211
  unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
212
  dst.ClearForWrite(vform);
213
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
214
    if (is_signed) {
215
      if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {
216
        return false;
217
      }
218
    } else {
219
      if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {
220
        return false;
221
      }
222
    }
223
  }
224
  return true;
225
}
226

227

228
bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
229
  return ld1r(vform, vform, dst, addr);
230
}
231

232

233
bool Simulator::ld2(VectorFormat vform,
234
                    LogicVRegister dst1,
235
                    LogicVRegister dst2,
236
                    uint64_t addr1) {
237
  dst1.ClearForWrite(vform);
238
  dst2.ClearForWrite(vform);
239
  int esize = LaneSizeInBytesFromFormat(vform);
240
  uint64_t addr2 = addr1 + esize;
241
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
242
    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {
243
      return false;
244
    }
245
    addr1 += 2 * esize;
246
    addr2 += 2 * esize;
247
  }
248
  return true;
249
}
250

251

252
bool Simulator::ld2(VectorFormat vform,
253
                    LogicVRegister dst1,
254
                    LogicVRegister dst2,
255
                    int index,
256
                    uint64_t addr1) {
257
  dst1.ClearForWrite(vform);
258
  dst2.ClearForWrite(vform);
259
  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
260
  return (LoadLane(dst1, vform, index, addr1) &&
261
          LoadLane(dst2, vform, index, addr2));
262
}
263

264

265
bool Simulator::ld2r(VectorFormat vform,
266
                     LogicVRegister dst1,
267
                     LogicVRegister dst2,
268
                     uint64_t addr) {
269
  dst1.ClearForWrite(vform);
270
  dst2.ClearForWrite(vform);
271
  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
272
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
273
    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {
274
      return false;
275
    }
276
  }
277
  return true;
278
}
279

280

281
bool Simulator::ld3(VectorFormat vform,
282
                    LogicVRegister dst1,
283
                    LogicVRegister dst2,
284
                    LogicVRegister dst3,
285
                    uint64_t addr1) {
286
  dst1.ClearForWrite(vform);
287
  dst2.ClearForWrite(vform);
288
  dst3.ClearForWrite(vform);
289
  int esize = LaneSizeInBytesFromFormat(vform);
290
  uint64_t addr2 = addr1 + esize;
291
  uint64_t addr3 = addr2 + esize;
292
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
293
    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
294
        !LoadLane(dst3, vform, i, addr3)) {
295
      return false;
296
    }
297
    addr1 += 3 * esize;
298
    addr2 += 3 * esize;
299
    addr3 += 3 * esize;
300
  }
301
  return true;
302
}
303

304

305
bool Simulator::ld3(VectorFormat vform,
306
                    LogicVRegister dst1,
307
                    LogicVRegister dst2,
308
                    LogicVRegister dst3,
309
                    int index,
310
                    uint64_t addr1) {
311
  dst1.ClearForWrite(vform);
312
  dst2.ClearForWrite(vform);
313
  dst3.ClearForWrite(vform);
314
  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
315
  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
316
  return (LoadLane(dst1, vform, index, addr1) &&
317
          LoadLane(dst2, vform, index, addr2) &&
318
          LoadLane(dst3, vform, index, addr3));
319
}
320

321

322
bool Simulator::ld3r(VectorFormat vform,
323
                     LogicVRegister dst1,
324
                     LogicVRegister dst2,
325
                     LogicVRegister dst3,
326
                     uint64_t addr) {
327
  dst1.ClearForWrite(vform);
328
  dst2.ClearForWrite(vform);
329
  dst3.ClearForWrite(vform);
330
  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
331
  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
332
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
333
    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
334
        !LoadLane(dst3, vform, i, addr3)) {
335
      return false;
336
    }
337
  }
338
  return true;
339
}
340

341

342
bool Simulator::ld4(VectorFormat vform,
343
                    LogicVRegister dst1,
344
                    LogicVRegister dst2,
345
                    LogicVRegister dst3,
346
                    LogicVRegister dst4,
347
                    uint64_t addr1) {
348
  dst1.ClearForWrite(vform);
349
  dst2.ClearForWrite(vform);
350
  dst3.ClearForWrite(vform);
351
  dst4.ClearForWrite(vform);
352
  int esize = LaneSizeInBytesFromFormat(vform);
353
  uint64_t addr2 = addr1 + esize;
354
  uint64_t addr3 = addr2 + esize;
355
  uint64_t addr4 = addr3 + esize;
356
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
357
    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
358
        !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
359
      return false;
360
    }
361
    addr1 += 4 * esize;
362
    addr2 += 4 * esize;
363
    addr3 += 4 * esize;
364
    addr4 += 4 * esize;
365
  }
366
  return true;
367
}
368

369

370
bool Simulator::ld4(VectorFormat vform,
371
                    LogicVRegister dst1,
372
                    LogicVRegister dst2,
373
                    LogicVRegister dst3,
374
                    LogicVRegister dst4,
375
                    int index,
376
                    uint64_t addr1) {
377
  dst1.ClearForWrite(vform);
378
  dst2.ClearForWrite(vform);
379
  dst3.ClearForWrite(vform);
380
  dst4.ClearForWrite(vform);
381
  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
382
  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
383
  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
384
  return (LoadLane(dst1, vform, index, addr1) &&
385
          LoadLane(dst2, vform, index, addr2) &&
386
          LoadLane(dst3, vform, index, addr3) &&
387
          LoadLane(dst4, vform, index, addr4));
388
}
389

390

391
bool Simulator::ld4r(VectorFormat vform,
392
                     LogicVRegister dst1,
393
                     LogicVRegister dst2,
394
                     LogicVRegister dst3,
395
                     LogicVRegister dst4,
396
                     uint64_t addr) {
397
  dst1.ClearForWrite(vform);
398
  dst2.ClearForWrite(vform);
399
  dst3.ClearForWrite(vform);
400
  dst4.ClearForWrite(vform);
401
  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
402
  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
403
  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
404
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
405
    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
406
        !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
407
      return false;
408
    }
409
  }
410
  return true;
411
}
412

413

414
bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
415
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
416
    if (!StoreLane(src, vform, i, addr)) return false;
417
    addr += LaneSizeInBytesFromFormat(vform);
418
  }
419
  return true;
420
}
421

422

423
bool Simulator::st1(VectorFormat vform,
424
                    LogicVRegister src,
425
                    int index,
426
                    uint64_t addr) {
427
  return StoreLane(src, vform, index, addr);
428
}
429

430

431
bool Simulator::st2(VectorFormat vform,
432
                    LogicVRegister src,
433
                    LogicVRegister src2,
434
                    uint64_t addr) {
435
  int esize = LaneSizeInBytesFromFormat(vform);
436
  uint64_t addr2 = addr + esize;
437
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
438
    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
439
      return false;
440
    }
441
    addr += 2 * esize;
442
    addr2 += 2 * esize;
443
  }
444
  return true;
445
}
446

447

448
bool Simulator::st2(VectorFormat vform,
449
                    LogicVRegister src,
450
                    LogicVRegister src2,
451
                    int index,
452
                    uint64_t addr) {
453
  int esize = LaneSizeInBytesFromFormat(vform);
454
  return (StoreLane(src, vform, index, addr) &&
455
          StoreLane(src2, vform, index, addr + 1 * esize));
456
}
457

458

459
bool Simulator::st3(VectorFormat vform,
460
                    LogicVRegister src,
461
                    LogicVRegister src2,
462
                    LogicVRegister src3,
463
                    uint64_t addr) {
464
  int esize = LaneSizeInBytesFromFormat(vform);
465
  uint64_t addr2 = addr + esize;
466
  uint64_t addr3 = addr2 + esize;
467
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
468
    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
469
        !StoreLane(src3, vform, i, addr3)) {
470
      return false;
471
    }
472
    addr += 3 * esize;
473
    addr2 += 3 * esize;
474
    addr3 += 3 * esize;
475
  }
476
  return true;
477
}
478

479

480
bool Simulator::st3(VectorFormat vform,
481
                    LogicVRegister src,
482
                    LogicVRegister src2,
483
                    LogicVRegister src3,
484
                    int index,
485
                    uint64_t addr) {
486
  int esize = LaneSizeInBytesFromFormat(vform);
487
  return (StoreLane(src, vform, index, addr) &&
488
          StoreLane(src2, vform, index, addr + 1 * esize) &&
489
          StoreLane(src3, vform, index, addr + 2 * esize));
490
}
491

492

493
bool Simulator::st4(VectorFormat vform,
494
                    LogicVRegister src,
495
                    LogicVRegister src2,
496
                    LogicVRegister src3,
497
                    LogicVRegister src4,
498
                    uint64_t addr) {
499
  int esize = LaneSizeInBytesFromFormat(vform);
500
  uint64_t addr2 = addr + esize;
501
  uint64_t addr3 = addr2 + esize;
502
  uint64_t addr4 = addr3 + esize;
503
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504
    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
505
        !StoreLane(src3, vform, i, addr3) ||
506
        !StoreLane(src4, vform, i, addr4)) {
507
      return false;
508
    }
509
    addr += 4 * esize;
510
    addr2 += 4 * esize;
511
    addr3 += 4 * esize;
512
    addr4 += 4 * esize;
513
  }
514
  return true;
515
}
516

517

518
bool Simulator::st4(VectorFormat vform,
519
                    LogicVRegister src,
520
                    LogicVRegister src2,
521
                    LogicVRegister src3,
522
                    LogicVRegister src4,
523
                    int index,
524
                    uint64_t addr) {
525
  int esize = LaneSizeInBytesFromFormat(vform);
526
  return (StoreLane(src, vform, index, addr) &&
527
          StoreLane(src2, vform, index, addr + 1 * esize) &&
528
          StoreLane(src3, vform, index, addr + 2 * esize) &&
529
          StoreLane(src4, vform, index, addr + 3 * esize));
530
}
531

532

533
LogicVRegister Simulator::cmp(VectorFormat vform,
534
                              LogicVRegister dst,
535
                              const LogicVRegister& src1,
536
                              const LogicVRegister& src2,
537
                              Condition cond) {
538
  dst.ClearForWrite(vform);
539
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
540
    int64_t sa = src1.Int(vform, i);
541
    int64_t sb = src2.Int(vform, i);
542
    uint64_t ua = src1.Uint(vform, i);
543
    uint64_t ub = src2.Uint(vform, i);
544
    bool result = false;
545
    switch (cond) {
546
      case eq:
547
        result = (ua == ub);
548
        break;
549
      case ge:
550
        result = (sa >= sb);
551
        break;
552
      case gt:
553
        result = (sa > sb);
554
        break;
555
      case hi:
556
        result = (ua > ub);
557
        break;
558
      case hs:
559
        result = (ua >= ub);
560
        break;
561
      case lt:
562
        result = (sa < sb);
563
        break;
564
      case le:
565
        result = (sa <= sb);
566
        break;
567
      default:
568
        VIXL_UNREACHABLE();
569
        break;
570
    }
571
    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
572
  }
573
  return dst;
574
}
575

576

577
LogicVRegister Simulator::cmp(VectorFormat vform,
578
                              LogicVRegister dst,
579
                              const LogicVRegister& src1,
580
                              int imm,
581
                              Condition cond) {
582
  SimVRegister temp;
583
  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
584
  return cmp(vform, dst, src1, imm_reg, cond);
585
}
586

587

588
LogicVRegister Simulator::cmptst(VectorFormat vform,
589
                                 LogicVRegister dst,
590
                                 const LogicVRegister& src1,
591
                                 const LogicVRegister& src2) {
592
  dst.ClearForWrite(vform);
593
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
594
    uint64_t ua = src1.Uint(vform, i);
595
    uint64_t ub = src2.Uint(vform, i);
596
    dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
597
  }
598
  return dst;
599
}
600

601

602
LogicVRegister Simulator::add(VectorFormat vform,
603
                              LogicVRegister dst,
604
                              const LogicVRegister& src1,
605
                              const LogicVRegister& src2) {
606
  int lane_size = LaneSizeInBitsFromFormat(vform);
607
  dst.ClearForWrite(vform);
608

609
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
610
    // Test for unsigned saturation.
611
    uint64_t ua = src1.UintLeftJustified(vform, i);
612
    uint64_t ub = src2.UintLeftJustified(vform, i);
613
    uint64_t ur = ua + ub;
614
    if (ur < ua) {
615
      dst.SetUnsignedSat(i, true);
616
    }
617

618
    // Test for signed saturation.
619
    bool pos_a = (ua >> 63) == 0;
620
    bool pos_b = (ub >> 63) == 0;
621
    bool pos_r = (ur >> 63) == 0;
622
    // If the signs of the operands are the same, but different from the result,
623
    // there was an overflow.
624
    if ((pos_a == pos_b) && (pos_a != pos_r)) {
625
      dst.SetSignedSat(i, pos_a);
626
    }
627
    dst.SetInt(vform, i, ur >> (64 - lane_size));
628
  }
629
  return dst;
630
}
631

632
LogicVRegister Simulator::add_uint(VectorFormat vform,
633
                                   LogicVRegister dst,
634
                                   const LogicVRegister& src1,
635
                                   uint64_t value) {
636
  int lane_size = LaneSizeInBitsFromFormat(vform);
637
  VIXL_ASSERT(IsUintN(lane_size, value));
638
  dst.ClearForWrite(vform);
639
  // Left-justify `value`.
640
  uint64_t ub = value << (64 - lane_size);
641
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
642
    // Test for unsigned saturation.
643
    uint64_t ua = src1.UintLeftJustified(vform, i);
644
    uint64_t ur = ua + ub;
645
    if (ur < ua) {
646
      dst.SetUnsignedSat(i, true);
647
    }
648

649
    // Test for signed saturation.
650
    // `value` is always positive, so we have an overflow if the (signed) result
651
    // is smaller than the first operand.
652
    if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
653
      dst.SetSignedSat(i, true);
654
    }
655

656
    dst.SetInt(vform, i, ur >> (64 - lane_size));
657
  }
658
  return dst;
659
}
660

661
LogicVRegister Simulator::addp(VectorFormat vform,
662
                               LogicVRegister dst,
663
                               const LogicVRegister& src1,
664
                               const LogicVRegister& src2) {
665
  SimVRegister temp1, temp2;
666
  uzp1(vform, temp1, src1, src2);
667
  uzp2(vform, temp2, src1, src2);
668
  add(vform, dst, temp1, temp2);
669
  if (IsSVEFormat(vform)) {
670
    interleave_top_bottom(vform, dst, dst);
671
  }
672
  return dst;
673
}
674

675
LogicVRegister Simulator::sdiv(VectorFormat vform,
676
                               LogicVRegister dst,
677
                               const LogicVRegister& src1,
678
                               const LogicVRegister& src2) {
679
  VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
680

681
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
682
    int64_t val1 = src1.Int(vform, i);
683
    int64_t val2 = src2.Int(vform, i);
684
    int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
685
    int64_t quotient = 0;
686
    if ((val1 == min_int) && (val2 == -1)) {
687
      quotient = min_int;
688
    } else if (val2 != 0) {
689
      quotient = val1 / val2;
690
    }
691
    dst.SetInt(vform, i, quotient);
692
  }
693

694
  return dst;
695
}
696

697
LogicVRegister Simulator::udiv(VectorFormat vform,
698
                               LogicVRegister dst,
699
                               const LogicVRegister& src1,
700
                               const LogicVRegister& src2) {
701
  VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
702

703
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
704
    uint64_t val1 = src1.Uint(vform, i);
705
    uint64_t val2 = src2.Uint(vform, i);
706
    uint64_t quotient = 0;
707
    if (val2 != 0) {
708
      quotient = val1 / val2;
709
    }
710
    dst.SetUint(vform, i, quotient);
711
  }
712

713
  return dst;
714
}
715

716

717
LogicVRegister Simulator::mla(VectorFormat vform,
718
                              LogicVRegister dst,
719
                              const LogicVRegister& srca,
720
                              const LogicVRegister& src1,
721
                              const LogicVRegister& src2) {
722
  SimVRegister temp;
723
  mul(vform, temp, src1, src2);
724
  add(vform, dst, srca, temp);
725
  return dst;
726
}
727

728

729
LogicVRegister Simulator::mls(VectorFormat vform,
730
                              LogicVRegister dst,
731
                              const LogicVRegister& srca,
732
                              const LogicVRegister& src1,
733
                              const LogicVRegister& src2) {
734
  SimVRegister temp;
735
  mul(vform, temp, src1, src2);
736
  sub(vform, dst, srca, temp);
737
  return dst;
738
}
739

740

741
LogicVRegister Simulator::mul(VectorFormat vform,
742
                              LogicVRegister dst,
743
                              const LogicVRegister& src1,
744
                              const LogicVRegister& src2) {
745
  dst.ClearForWrite(vform);
746

747
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
748
    dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
749
  }
750
  return dst;
751
}
752

753

754
LogicVRegister Simulator::mul(VectorFormat vform,
755
                              LogicVRegister dst,
756
                              const LogicVRegister& src1,
757
                              const LogicVRegister& src2,
758
                              int index) {
759
  SimVRegister temp;
760
  VectorFormat indexform = VectorFormatFillQ(vform);
761
  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
762
}
763

764

765
LogicVRegister Simulator::smulh(VectorFormat vform,
766
                                LogicVRegister dst,
767
                                const LogicVRegister& src1,
768
                                const LogicVRegister& src2) {
769
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
770
    int64_t dst_val = 0xbadbeef;
771
    int64_t val1 = src1.Int(vform, i);
772
    int64_t val2 = src2.Int(vform, i);
773
    switch (LaneSizeInBitsFromFormat(vform)) {
774
      case 8:
775
        dst_val = internal::MultiplyHigh<8>(val1, val2);
776
        break;
777
      case 16:
778
        dst_val = internal::MultiplyHigh<16>(val1, val2);
779
        break;
780
      case 32:
781
        dst_val = internal::MultiplyHigh<32>(val1, val2);
782
        break;
783
      case 64:
784
        dst_val = internal::MultiplyHigh<64>(val1, val2);
785
        break;
786
      default:
787
        VIXL_UNREACHABLE();
788
        break;
789
    }
790
    dst.SetInt(vform, i, dst_val);
791
  }
792
  return dst;
793
}
794

795

796
LogicVRegister Simulator::umulh(VectorFormat vform,
797
                                LogicVRegister dst,
798
                                const LogicVRegister& src1,
799
                                const LogicVRegister& src2) {
800
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
801
    uint64_t dst_val = 0xbadbeef;
802
    uint64_t val1 = src1.Uint(vform, i);
803
    uint64_t val2 = src2.Uint(vform, i);
804
    switch (LaneSizeInBitsFromFormat(vform)) {
805
      case 8:
806
        dst_val = internal::MultiplyHigh<8>(val1, val2);
807
        break;
808
      case 16:
809
        dst_val = internal::MultiplyHigh<16>(val1, val2);
810
        break;
811
      case 32:
812
        dst_val = internal::MultiplyHigh<32>(val1, val2);
813
        break;
814
      case 64:
815
        dst_val = internal::MultiplyHigh<64>(val1, val2);
816
        break;
817
      default:
818
        VIXL_UNREACHABLE();
819
        break;
820
    }
821
    dst.SetUint(vform, i, dst_val);
822
  }
823
  return dst;
824
}
825

826

827
LogicVRegister Simulator::mla(VectorFormat vform,
828
                              LogicVRegister dst,
829
                              const LogicVRegister& src1,
830
                              const LogicVRegister& src2,
831
                              int index) {
832
  SimVRegister temp;
833
  VectorFormat indexform = VectorFormatFillQ(vform);
834
  return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
835
}
836

837

838
LogicVRegister Simulator::mls(VectorFormat vform,
839
                              LogicVRegister dst,
840
                              const LogicVRegister& src1,
841
                              const LogicVRegister& src2,
842
                              int index) {
843
  SimVRegister temp;
844
  VectorFormat indexform = VectorFormatFillQ(vform);
845
  return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
846
}
847

848
LogicVRegister Simulator::sqdmull(VectorFormat vform,
849
                                  LogicVRegister dst,
850
                                  const LogicVRegister& src1,
851
                                  const LogicVRegister& src2,
852
                                  int index) {
853
  SimVRegister temp;
854
  VectorFormat indexform =
855
      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
856
  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
857
}
858

859
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
860
                                  LogicVRegister dst,
861
                                  const LogicVRegister& src1,
862
                                  const LogicVRegister& src2,
863
                                  int index) {
864
  SimVRegister temp;
865
  VectorFormat indexform =
866
      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867
  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868
}
869

870
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
871
                                  LogicVRegister dst,
872
                                  const LogicVRegister& src1,
873
                                  const LogicVRegister& src2,
874
                                  int index) {
875
  SimVRegister temp;
876
  VectorFormat indexform =
877
      VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
878
  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
879
}
880

881
LogicVRegister Simulator::sqdmulh(VectorFormat vform,
882
                                  LogicVRegister dst,
883
                                  const LogicVRegister& src1,
884
                                  const LogicVRegister& src2,
885
                                  int index) {
886
  SimVRegister temp;
887
  VectorFormat indexform = VectorFormatFillQ(vform);
888
  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
889
}
890

891

892
LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
893
                                   LogicVRegister dst,
894
                                   const LogicVRegister& src1,
895
                                   const LogicVRegister& src2,
896
                                   int index) {
897
  SimVRegister temp;
898
  VectorFormat indexform = VectorFormatFillQ(vform);
899
  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
900
}
901

902

903
LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
904
                                   LogicVRegister dst,
905
                                   const LogicVRegister& src1,
906
                                   const LogicVRegister& src2,
907
                                   int index) {
908
  SimVRegister temp;
909
  VectorFormat indexform = VectorFormatFillQ(vform);
910
  return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
911
}
912

913

914
LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
915
                                   LogicVRegister dst,
916
                                   const LogicVRegister& src1,
917
                                   const LogicVRegister& src2,
918
                                   int index) {
919
  SimVRegister temp;
920
  VectorFormat indexform = VectorFormatFillQ(vform);
921
  return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
922
}
923

924
uint64_t Simulator::PolynomialMult(uint64_t op1,
925
                                   uint64_t op2,
926
                                   int lane_size_in_bits) const {
927
  return PolynomialMult128(op1, op2, lane_size_in_bits).second;
928
}
929

930
LogicVRegister Simulator::pmul(VectorFormat vform,
931
                               LogicVRegister dst,
932
                               const LogicVRegister& src1,
933
                               const LogicVRegister& src2) {
934
  dst.ClearForWrite(vform);
935
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
936
    dst.SetUint(vform,
937
                i,
938
                PolynomialMult(src1.Uint(vform, i),
939
                               src2.Uint(vform, i),
940
                               LaneSizeInBitsFromFormat(vform)));
941
  }
942
  return dst;
943
}
944

945

946
LogicVRegister Simulator::pmull(VectorFormat vform,
947
                                LogicVRegister dst,
948
                                const LogicVRegister& src1,
949
                                const LogicVRegister& src2) {
950
  dst.ClearForWrite(vform);
951
  VectorFormat vform_src = VectorFormatHalfWidth(vform);
952

953
  // Process the elements in reverse to avoid problems when the destination
954
  // register is the same as a source.
955
  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
956
    dst.SetUint(vform,
957
                i,
958
                PolynomialMult128(src1.Uint(vform_src, i),
959
                                  src2.Uint(vform_src, i),
960
                                  LaneSizeInBitsFromFormat(vform_src)));
961
  }
962

963
  return dst;
964
}
965

966

967
LogicVRegister Simulator::pmull2(VectorFormat vform,
968
                                 LogicVRegister dst,
969
                                 const LogicVRegister& src1,
970
                                 const LogicVRegister& src2) {
971
  dst.ClearForWrite(vform);
972
  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
973

974
  int lane_count = LaneCountFromFormat(vform);
975
  for (int i = 0; i < lane_count; i++) {
976
    dst.SetUint(vform,
977
                i,
978
                PolynomialMult128(src1.Uint(vform_src, lane_count + i),
979
                                  src2.Uint(vform_src, lane_count + i),
980
                                  LaneSizeInBitsFromFormat(vform_src)));
981
  }
982

983
  return dst;
984
}
985

986

987
LogicVRegister Simulator::sub(VectorFormat vform,
988
                              LogicVRegister dst,
989
                              const LogicVRegister& src1,
990
                              const LogicVRegister& src2) {
991
  int lane_size = LaneSizeInBitsFromFormat(vform);
992
  dst.ClearForWrite(vform);
993
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
994
    // Test for unsigned saturation.
995
    uint64_t ua = src1.UintLeftJustified(vform, i);
996
    uint64_t ub = src2.UintLeftJustified(vform, i);
997
    uint64_t ur = ua - ub;
998
    if (ub > ua) {
999
      dst.SetUnsignedSat(i, false);
1000
    }
1001

1002
    // Test for signed saturation.
1003
    bool pos_a = (ua >> 63) == 0;
1004
    bool pos_b = (ub >> 63) == 0;
1005
    bool pos_r = (ur >> 63) == 0;
1006
    // If the signs of the operands are different, and the sign of the first
1007
    // operand doesn't match the result, there was an overflow.
1008
    if ((pos_a != pos_b) && (pos_a != pos_r)) {
1009
      dst.SetSignedSat(i, pos_a);
1010
    }
1011

1012
    dst.SetInt(vform, i, ur >> (64 - lane_size));
1013
  }
1014
  return dst;
1015
}
1016

1017
LogicVRegister Simulator::sub_uint(VectorFormat vform,
1018
                                   LogicVRegister dst,
1019
                                   const LogicVRegister& src1,
1020
                                   uint64_t value) {
1021
  int lane_size = LaneSizeInBitsFromFormat(vform);
1022
  VIXL_ASSERT(IsUintN(lane_size, value));
1023
  dst.ClearForWrite(vform);
1024
  // Left-justify `value`.
1025
  uint64_t ub = value << (64 - lane_size);
1026
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1027
    // Test for unsigned saturation.
1028
    uint64_t ua = src1.UintLeftJustified(vform, i);
1029
    uint64_t ur = ua - ub;
1030
    if (ub > ua) {
1031
      dst.SetUnsignedSat(i, false);
1032
    }
1033

1034
    // Test for signed saturation.
1035
    // `value` is always positive, so we have an overflow if the (signed) result
1036
    // is greater than the first operand.
1037
    if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1038
      dst.SetSignedSat(i, false);
1039
    }
1040

1041
    dst.SetInt(vform, i, ur >> (64 - lane_size));
1042
  }
1043
  return dst;
1044
}
1045

1046
LogicVRegister Simulator::and_(VectorFormat vform,
1047
                               LogicVRegister dst,
1048
                               const LogicVRegister& src1,
1049
                               const LogicVRegister& src2) {
1050
  dst.ClearForWrite(vform);
1051
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1052
    dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1053
  }
1054
  return dst;
1055
}
1056

1057

1058
LogicVRegister Simulator::orr(VectorFormat vform,
1059
                              LogicVRegister dst,
1060
                              const LogicVRegister& src1,
1061
                              const LogicVRegister& src2) {
1062
  dst.ClearForWrite(vform);
1063
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1064
    dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1065
  }
1066
  return dst;
1067
}
1068

1069

1070
LogicVRegister Simulator::orn(VectorFormat vform,
1071
                              LogicVRegister dst,
1072
                              const LogicVRegister& src1,
1073
                              const LogicVRegister& src2) {
1074
  dst.ClearForWrite(vform);
1075
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1076
    dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1077
  }
1078
  return dst;
1079
}
1080

1081

1082
LogicVRegister Simulator::eor(VectorFormat vform,
1083
                              LogicVRegister dst,
1084
                              const LogicVRegister& src1,
1085
                              const LogicVRegister& src2) {
1086
  dst.ClearForWrite(vform);
1087
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1088
    dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1089
  }
1090
  return dst;
1091
}
1092

1093

1094
LogicVRegister Simulator::bic(VectorFormat vform,
1095
                              LogicVRegister dst,
1096
                              const LogicVRegister& src1,
1097
                              const LogicVRegister& src2) {
1098
  dst.ClearForWrite(vform);
1099
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100
    dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1101
  }
1102
  return dst;
1103
}
1104

1105

1106
LogicVRegister Simulator::bic(VectorFormat vform,
1107
                              LogicVRegister dst,
1108
                              const LogicVRegister& src,
1109
                              uint64_t imm) {
1110
  uint64_t result[16];
1111
  int lane_count = LaneCountFromFormat(vform);
1112
  for (int i = 0; i < lane_count; ++i) {
1113
    result[i] = src.Uint(vform, i) & ~imm;
1114
  }
1115
  dst.ClearForWrite(vform);
1116
  for (int i = 0; i < lane_count; ++i) {
1117
    dst.SetUint(vform, i, result[i]);
1118
  }
1119
  return dst;
1120
}
1121

1122

1123
LogicVRegister Simulator::bif(VectorFormat vform,
1124
                              LogicVRegister dst,
1125
                              const LogicVRegister& src1,
1126
                              const LogicVRegister& src2) {
1127
  dst.ClearForWrite(vform);
1128
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1129
    uint64_t operand1 = dst.Uint(vform, i);
1130
    uint64_t operand2 = ~src2.Uint(vform, i);
1131
    uint64_t operand3 = src1.Uint(vform, i);
1132
    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1133
    dst.SetUint(vform, i, result);
1134
  }
1135
  return dst;
1136
}
1137

1138

1139
LogicVRegister Simulator::bit(VectorFormat vform,
1140
                              LogicVRegister dst,
1141
                              const LogicVRegister& src1,
1142
                              const LogicVRegister& src2) {
1143
  dst.ClearForWrite(vform);
1144
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145
    uint64_t operand1 = dst.Uint(vform, i);
1146
    uint64_t operand2 = src2.Uint(vform, i);
1147
    uint64_t operand3 = src1.Uint(vform, i);
1148
    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1149
    dst.SetUint(vform, i, result);
1150
  }
1151
  return dst;
1152
}
1153

1154

1155
LogicVRegister Simulator::bsl(VectorFormat vform,
1156
                              LogicVRegister dst,
1157
                              const LogicVRegister& src_mask,
1158
                              const LogicVRegister& src1,
1159
                              const LogicVRegister& src2) {
1160
  dst.ClearForWrite(vform);
1161
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1162
    uint64_t operand1 = src2.Uint(vform, i);
1163
    uint64_t operand2 = src_mask.Uint(vform, i);
1164
    uint64_t operand3 = src1.Uint(vform, i);
1165
    uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1166
    dst.SetUint(vform, i, result);
1167
  }
1168
  return dst;
1169
}
1170

1171

1172
LogicVRegister Simulator::sminmax(VectorFormat vform,
1173
                                  LogicVRegister dst,
1174
                                  const LogicVRegister& src1,
1175
                                  const LogicVRegister& src2,
1176
                                  bool max) {
1177
  dst.ClearForWrite(vform);
1178
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1179
    int64_t src1_val = src1.Int(vform, i);
1180
    int64_t src2_val = src2.Int(vform, i);
1181
    int64_t dst_val;
1182
    if (max) {
1183
      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1184
    } else {
1185
      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1186
    }
1187
    dst.SetInt(vform, i, dst_val);
1188
  }
1189
  return dst;
1190
}
1191

1192

1193
LogicVRegister Simulator::smax(VectorFormat vform,
1194
                               LogicVRegister dst,
1195
                               const LogicVRegister& src1,
1196
                               const LogicVRegister& src2) {
1197
  return sminmax(vform, dst, src1, src2, true);
1198
}
1199

1200

1201
LogicVRegister Simulator::smin(VectorFormat vform,
1202
                               LogicVRegister dst,
1203
                               const LogicVRegister& src1,
1204
                               const LogicVRegister& src2) {
1205
  return sminmax(vform, dst, src1, src2, false);
1206
}
1207

1208

1209
LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1210
                                   LogicVRegister dst,
1211
                                   const LogicVRegister& src1,
1212
                                   const LogicVRegister& src2,
1213
                                   bool max) {
1214
  unsigned lanes = LaneCountFromFormat(vform);
1215
  int64_t result[kZRegMaxSizeInBytes];
1216
  const LogicVRegister* src = &src1;
1217
  for (unsigned j = 0; j < 2; j++) {
1218
    for (unsigned i = 0; i < lanes; i += 2) {
1219
      int64_t first_val = src->Int(vform, i);
1220
      int64_t second_val = src->Int(vform, i + 1);
1221
      int64_t dst_val;
1222
      if (max) {
1223
        dst_val = (first_val > second_val) ? first_val : second_val;
1224
      } else {
1225
        dst_val = (first_val < second_val) ? first_val : second_val;
1226
      }
1227
      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1228
      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1229
    }
1230
    src = &src2;
1231
  }
1232
  dst.SetIntArray(vform, result);
1233
  if (IsSVEFormat(vform)) {
1234
    interleave_top_bottom(vform, dst, dst);
1235
  }
1236
  return dst;
1237
}
1238

1239

1240
LogicVRegister Simulator::smaxp(VectorFormat vform,
1241
                                LogicVRegister dst,
1242
                                const LogicVRegister& src1,
1243
                                const LogicVRegister& src2) {
1244
  return sminmaxp(vform, dst, src1, src2, true);
1245
}
1246

1247

1248
LogicVRegister Simulator::sminp(VectorFormat vform,
1249
                                LogicVRegister dst,
1250
                                const LogicVRegister& src1,
1251
                                const LogicVRegister& src2) {
1252
  return sminmaxp(vform, dst, src1, src2, false);
1253
}
1254

1255

1256
LogicVRegister Simulator::addp(VectorFormat vform,
1257
                               LogicVRegister dst,
1258
                               const LogicVRegister& src) {
1259
  VIXL_ASSERT(vform == kFormatD);
1260

1261
  uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1262
  dst.ClearForWrite(vform);
1263
  dst.SetUint(vform, 0, dst_val);
1264
  return dst;
1265
}
1266

1267

1268
LogicVRegister Simulator::addv(VectorFormat vform,
1269
                               LogicVRegister dst,
1270
                               const LogicVRegister& src) {
1271
  VectorFormat vform_dst =
1272
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1273

1274

1275
  int64_t dst_val = 0;
1276
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277
    dst_val += src.Int(vform, i);
1278
  }
1279

1280
  dst.ClearForWrite(vform_dst);
1281
  dst.SetInt(vform_dst, 0, dst_val);
1282
  return dst;
1283
}
1284

1285

1286
LogicVRegister Simulator::saddlv(VectorFormat vform,
1287
                                 LogicVRegister dst,
1288
                                 const LogicVRegister& src) {
1289
  VectorFormat vform_dst =
1290
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1291

1292
  int64_t dst_val = 0;
1293
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1294
    dst_val += src.Int(vform, i);
1295
  }
1296

1297
  dst.ClearForWrite(vform_dst);
1298
  dst.SetInt(vform_dst, 0, dst_val);
1299
  return dst;
1300
}
1301

1302

1303
LogicVRegister Simulator::uaddlv(VectorFormat vform,
1304
                                 LogicVRegister dst,
1305
                                 const LogicVRegister& src) {
1306
  VectorFormat vform_dst =
1307
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1308

1309
  uint64_t dst_val = 0;
1310
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1311
    dst_val += src.Uint(vform, i);
1312
  }
1313

1314
  dst.ClearForWrite(vform_dst);
1315
  dst.SetUint(vform_dst, 0, dst_val);
1316
  return dst;
1317
}
1318

1319

1320
LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1321
                                   LogicVRegister dst,
1322
                                   const LogicPRegister& pg,
1323
                                   const LogicVRegister& src,
1324
                                   bool max) {
1325
  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1326
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1327
    if (!pg.IsActive(vform, i)) continue;
1328

1329
    int64_t src_val = src.Int(vform, i);
1330
    if (max) {
1331
      dst_val = (src_val > dst_val) ? src_val : dst_val;
1332
    } else {
1333
      dst_val = (src_val < dst_val) ? src_val : dst_val;
1334
    }
1335
  }
1336
  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1337
  dst.SetInt(vform, 0, dst_val);
1338
  return dst;
1339
}
1340

1341

1342
LogicVRegister Simulator::smaxv(VectorFormat vform,
1343
                                LogicVRegister dst,
1344
                                const LogicVRegister& src) {
1345
  sminmaxv(vform, dst, GetPTrue(), src, true);
1346
  return dst;
1347
}
1348

1349

1350
LogicVRegister Simulator::sminv(VectorFormat vform,
1351
                                LogicVRegister dst,
1352
                                const LogicVRegister& src) {
1353
  sminmaxv(vform, dst, GetPTrue(), src, false);
1354
  return dst;
1355
}
1356

1357

1358
LogicVRegister Simulator::smaxv(VectorFormat vform,
1359
                                LogicVRegister dst,
1360
                                const LogicPRegister& pg,
1361
                                const LogicVRegister& src) {
1362
  VIXL_ASSERT(IsSVEFormat(vform));
1363
  sminmaxv(vform, dst, pg, src, true);
1364
  return dst;
1365
}
1366

1367

1368
LogicVRegister Simulator::sminv(VectorFormat vform,
1369
                                LogicVRegister dst,
1370
                                const LogicPRegister& pg,
1371
                                const LogicVRegister& src) {
1372
  VIXL_ASSERT(IsSVEFormat(vform));
1373
  sminmaxv(vform, dst, pg, src, false);
1374
  return dst;
1375
}
1376

1377

1378
LogicVRegister Simulator::uminmax(VectorFormat vform,
1379
                                  LogicVRegister dst,
1380
                                  const LogicVRegister& src1,
1381
                                  const LogicVRegister& src2,
1382
                                  bool max) {
1383
  dst.ClearForWrite(vform);
1384
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1385
    uint64_t src1_val = src1.Uint(vform, i);
1386
    uint64_t src2_val = src2.Uint(vform, i);
1387
    uint64_t dst_val;
1388
    if (max) {
1389
      dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1390
    } else {
1391
      dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1392
    }
1393
    dst.SetUint(vform, i, dst_val);
1394
  }
1395
  return dst;
1396
}
1397

1398

1399
LogicVRegister Simulator::umax(VectorFormat vform,
1400
                               LogicVRegister dst,
1401
                               const LogicVRegister& src1,
1402
                               const LogicVRegister& src2) {
1403
  return uminmax(vform, dst, src1, src2, true);
1404
}
1405

1406

1407
LogicVRegister Simulator::umin(VectorFormat vform,
1408
                               LogicVRegister dst,
1409
                               const LogicVRegister& src1,
1410
                               const LogicVRegister& src2) {
1411
  return uminmax(vform, dst, src1, src2, false);
1412
}
1413

1414

1415
LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1416
                                   LogicVRegister dst,
1417
                                   const LogicVRegister& src1,
1418
                                   const LogicVRegister& src2,
1419
                                   bool max) {
1420
  unsigned lanes = LaneCountFromFormat(vform);
1421
  uint64_t result[kZRegMaxSizeInBytes];
1422
  const LogicVRegister* src = &src1;
1423
  for (unsigned j = 0; j < 2; j++) {
1424
    for (unsigned i = 0; i < lanes; i += 2) {
1425
      uint64_t first_val = src->Uint(vform, i);
1426
      uint64_t second_val = src->Uint(vform, i + 1);
1427
      uint64_t dst_val;
1428
      if (max) {
1429
        dst_val = (first_val > second_val) ? first_val : second_val;
1430
      } else {
1431
        dst_val = (first_val < second_val) ? first_val : second_val;
1432
      }
1433
      VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1434
      result[(i >> 1) + (j * lanes / 2)] = dst_val;
1435
    }
1436
    src = &src2;
1437
  }
1438
  dst.SetUintArray(vform, result);
1439
  if (IsSVEFormat(vform)) {
1440
    interleave_top_bottom(vform, dst, dst);
1441
  }
1442
  return dst;
1443
}
1444

1445

1446
LogicVRegister Simulator::umaxp(VectorFormat vform,
1447
                                LogicVRegister dst,
1448
                                const LogicVRegister& src1,
1449
                                const LogicVRegister& src2) {
1450
  return uminmaxp(vform, dst, src1, src2, true);
1451
}
1452

1453

1454
LogicVRegister Simulator::uminp(VectorFormat vform,
1455
                                LogicVRegister dst,
1456
                                const LogicVRegister& src1,
1457
                                const LogicVRegister& src2) {
1458
  return uminmaxp(vform, dst, src1, src2, false);
1459
}
1460

1461

1462
LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1463
                                   LogicVRegister dst,
1464
                                   const LogicPRegister& pg,
1465
                                   const LogicVRegister& src,
1466
                                   bool max) {
1467
  uint64_t dst_val = max ? 0 : UINT64_MAX;
1468
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1469
    if (!pg.IsActive(vform, i)) continue;
1470

1471
    uint64_t src_val = src.Uint(vform, i);
1472
    if (max) {
1473
      dst_val = (src_val > dst_val) ? src_val : dst_val;
1474
    } else {
1475
      dst_val = (src_val < dst_val) ? src_val : dst_val;
1476
    }
1477
  }
1478
  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1479
  dst.SetUint(vform, 0, dst_val);
1480
  return dst;
1481
}
1482

1483

1484
LogicVRegister Simulator::umaxv(VectorFormat vform,
1485
                                LogicVRegister dst,
1486
                                const LogicVRegister& src) {
1487
  uminmaxv(vform, dst, GetPTrue(), src, true);
1488
  return dst;
1489
}
1490

1491

1492
LogicVRegister Simulator::uminv(VectorFormat vform,
1493
                                LogicVRegister dst,
1494
                                const LogicVRegister& src) {
1495
  uminmaxv(vform, dst, GetPTrue(), src, false);
1496
  return dst;
1497
}
1498

1499

1500
LogicVRegister Simulator::umaxv(VectorFormat vform,
1501
                                LogicVRegister dst,
1502
                                const LogicPRegister& pg,
1503
                                const LogicVRegister& src) {
1504
  VIXL_ASSERT(IsSVEFormat(vform));
1505
  uminmaxv(vform, dst, pg, src, true);
1506
  return dst;
1507
}
1508

1509

1510
LogicVRegister Simulator::uminv(VectorFormat vform,
1511
                                LogicVRegister dst,
1512
                                const LogicPRegister& pg,
1513
                                const LogicVRegister& src) {
1514
  VIXL_ASSERT(IsSVEFormat(vform));
1515
  uminmaxv(vform, dst, pg, src, false);
1516
  return dst;
1517
}
1518

1519

1520
LogicVRegister Simulator::shl(VectorFormat vform,
1521
                              LogicVRegister dst,
1522
                              const LogicVRegister& src,
1523
                              int shift) {
1524
  VIXL_ASSERT(shift >= 0);
1525
  SimVRegister temp;
1526
  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1527
  return ushl(vform, dst, src, shiftreg);
1528
}
1529

1530

1531
LogicVRegister Simulator::sshll(VectorFormat vform,
1532
                                LogicVRegister dst,
1533
                                const LogicVRegister& src,
1534
                                int shift) {
1535
  VIXL_ASSERT(shift >= 0);
1536
  SimVRegister temp1, temp2;
1537
  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1538
  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1539
  return sshl(vform, dst, extendedreg, shiftreg);
1540
}
1541

1542

1543
LogicVRegister Simulator::sshll2(VectorFormat vform,
1544
                                 LogicVRegister dst,
1545
                                 const LogicVRegister& src,
1546
                                 int shift) {
1547
  VIXL_ASSERT(shift >= 0);
1548
  SimVRegister temp1, temp2;
1549
  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1550
  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1551
  return sshl(vform, dst, extendedreg, shiftreg);
1552
}
1553

1554

1555
LogicVRegister Simulator::shll(VectorFormat vform,
1556
                               LogicVRegister dst,
1557
                               const LogicVRegister& src) {
1558
  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1559
  return sshll(vform, dst, src, shift);
1560
}
1561

1562

1563
LogicVRegister Simulator::shll2(VectorFormat vform,
1564
                                LogicVRegister dst,
1565
                                const LogicVRegister& src) {
1566
  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1567
  return sshll2(vform, dst, src, shift);
1568
}
1569

1570

1571
LogicVRegister Simulator::ushll(VectorFormat vform,
1572
                                LogicVRegister dst,
1573
                                const LogicVRegister& src,
1574
                                int shift) {
1575
  VIXL_ASSERT(shift >= 0);
1576
  SimVRegister temp1, temp2;
1577
  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1578
  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1579
  return ushl(vform, dst, extendedreg, shiftreg);
1580
}
1581

1582

1583
LogicVRegister Simulator::ushll2(VectorFormat vform,
1584
                                 LogicVRegister dst,
1585
                                 const LogicVRegister& src,
1586
                                 int shift) {
1587
  VIXL_ASSERT(shift >= 0);
1588
  SimVRegister temp1, temp2;
1589
  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1590
  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1591
  return ushl(vform, dst, extendedreg, shiftreg);
1592
}
1593

1594
std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1595
                                           const LogicPRegister& pg,
1596
                                           const LogicVRegister& src,
1597
                                           int offset_from_last_active) {
1598
  // Untested for any other values.
1599
  VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1600

1601
  int last_active = GetLastActive(vform, pg);
1602
  int lane_count = LaneCountFromFormat(vform);
1603
  int index =
1604
      ((last_active + offset_from_last_active) + lane_count) % lane_count;
1605
  return std::make_pair(last_active >= 0, src.Uint(vform, index));
1606
}
1607

1608
LogicVRegister Simulator::compact(VectorFormat vform,
1609
                                  LogicVRegister dst,
1610
                                  const LogicPRegister& pg,
1611
                                  const LogicVRegister& src) {
1612
  int j = 0;
1613
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1614
    if (pg.IsActive(vform, i)) {
1615
      dst.SetUint(vform, j++, src.Uint(vform, i));
1616
    }
1617
  }
1618
  for (; j < LaneCountFromFormat(vform); j++) {
1619
    dst.SetUint(vform, j, 0);
1620
  }
1621
  return dst;
1622
}
1623

1624
LogicVRegister Simulator::splice(VectorFormat vform,
1625
                                 LogicVRegister dst,
1626
                                 const LogicPRegister& pg,
1627
                                 const LogicVRegister& src1,
1628
                                 const LogicVRegister& src2) {
1629
  int lane_count = LaneCountFromFormat(vform);
1630
  int first_active = GetFirstActive(vform, pg);
1631
  int last_active = GetLastActive(vform, pg);
1632
  int dst_idx = 0;
1633
  uint64_t result[kZRegMaxSizeInBytes];
1634

1635
  if (first_active >= 0) {
1636
    VIXL_ASSERT(last_active >= first_active);
1637
    VIXL_ASSERT(last_active < lane_count);
1638
    for (int i = first_active; i <= last_active; i++) {
1639
      result[dst_idx++] = src1.Uint(vform, i);
1640
    }
1641
  }
1642

1643
  VIXL_ASSERT(dst_idx <= lane_count);
1644
  for (int i = dst_idx; i < lane_count; i++) {
1645
    result[i] = src2.Uint(vform, i - dst_idx);
1646
  }
1647

1648
  dst.SetUintArray(vform, result);
1649

1650
  return dst;
1651
}
1652

1653
LogicVRegister Simulator::sel(VectorFormat vform,
1654
                              LogicVRegister dst,
1655
                              const SimPRegister& pg,
1656
                              const LogicVRegister& src1,
1657
                              const LogicVRegister& src2) {
1658
  int p_reg_bits_per_lane =
1659
      LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1660
  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1661
    uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1662
                              ? src1.Uint(vform, lane)
1663
                              : src2.Uint(vform, lane);
1664
    dst.SetUint(vform, lane, lane_value);
1665
  }
1666
  return dst;
1667
}
1668

1669

1670
LogicPRegister Simulator::sel(LogicPRegister dst,
1671
                              const LogicPRegister& pg,
1672
                              const LogicPRegister& src1,
1673
                              const LogicPRegister& src2) {
1674
  for (int i = 0; i < dst.GetChunkCount(); i++) {
1675
    LogicPRegister::ChunkType mask = pg.GetChunk(i);
1676
    LogicPRegister::ChunkType result =
1677
        (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1678
    dst.SetChunk(i, result);
1679
  }
1680
  return dst;
1681
}
1682

1683

1684
LogicVRegister Simulator::sli(VectorFormat vform,
1685
                              LogicVRegister dst,
1686
                              const LogicVRegister& src,
1687
                              int shift) {
1688
  dst.ClearForWrite(vform);
1689
  int lane_count = LaneCountFromFormat(vform);
1690
  for (int i = 0; i < lane_count; i++) {
1691
    uint64_t src_lane = src.Uint(vform, i);
1692
    uint64_t dst_lane = dst.Uint(vform, i);
1693
    uint64_t shifted = src_lane << shift;
1694
    uint64_t mask = MaxUintFromFormat(vform) << shift;
1695
    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1696
  }
1697
  return dst;
1698
}
1699

1700

1701
LogicVRegister Simulator::sqshl(VectorFormat vform,
1702
                                LogicVRegister dst,
1703
                                const LogicVRegister& src,
1704
                                int shift) {
1705
  VIXL_ASSERT(shift >= 0);
1706
  SimVRegister temp;
1707
  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1708
  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1709
}
1710

1711

1712
LogicVRegister Simulator::uqshl(VectorFormat vform,
1713
                                LogicVRegister dst,
1714
                                const LogicVRegister& src,
1715
                                int shift) {
1716
  VIXL_ASSERT(shift >= 0);
1717
  SimVRegister temp;
1718
  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1719
  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1720
}
1721

1722

1723
LogicVRegister Simulator::sqshlu(VectorFormat vform,
1724
                                 LogicVRegister dst,
1725
                                 const LogicVRegister& src,
1726
                                 int shift) {
1727
  VIXL_ASSERT(shift >= 0);
1728
  SimVRegister temp;
1729
  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1730
  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1731
}
1732

1733

1734
LogicVRegister Simulator::sri(VectorFormat vform,
1735
                              LogicVRegister dst,
1736
                              const LogicVRegister& src,
1737
                              int shift) {
1738
  dst.ClearForWrite(vform);
1739
  int lane_count = LaneCountFromFormat(vform);
1740
  VIXL_ASSERT((shift > 0) &&
1741
              (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1742
  for (int i = 0; i < lane_count; i++) {
1743
    uint64_t src_lane = src.Uint(vform, i);
1744
    uint64_t dst_lane = dst.Uint(vform, i);
1745
    uint64_t shifted;
1746
    uint64_t mask;
1747
    if (shift == 64) {
1748
      shifted = 0;
1749
      mask = 0;
1750
    } else {
1751
      shifted = src_lane >> shift;
1752
      mask = MaxUintFromFormat(vform) >> shift;
1753
    }
1754
    dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1755
  }
1756
  return dst;
1757
}
1758

1759

1760
LogicVRegister Simulator::ushr(VectorFormat vform,
1761
                               LogicVRegister dst,
1762
                               const LogicVRegister& src,
1763
                               int shift) {
1764
  VIXL_ASSERT(shift >= 0);
1765
  SimVRegister temp;
1766
  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1767
  return ushl(vform, dst, src, shiftreg);
1768
}
1769

1770

1771
LogicVRegister Simulator::sshr(VectorFormat vform,
1772
                               LogicVRegister dst,
1773
                               const LogicVRegister& src,
1774
                               int shift) {
1775
  VIXL_ASSERT(shift >= 0);
1776
  SimVRegister temp;
1777
  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1778
  return sshl(vform, dst, src, shiftreg);
1779
}
1780

1781

1782
LogicVRegister Simulator::ssra(VectorFormat vform,
1783
                               LogicVRegister dst,
1784
                               const LogicVRegister& src,
1785
                               int shift) {
1786
  SimVRegister temp;
1787
  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1788
  return add(vform, dst, dst, shifted_reg);
1789
}
1790

1791

1792
LogicVRegister Simulator::usra(VectorFormat vform,
1793
                               LogicVRegister dst,
1794
                               const LogicVRegister& src,
1795
                               int shift) {
1796
  SimVRegister temp;
1797
  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1798
  return add(vform, dst, dst, shifted_reg);
1799
}
1800

1801

1802
LogicVRegister Simulator::srsra(VectorFormat vform,
1803
                                LogicVRegister dst,
1804
                                const LogicVRegister& src,
1805
                                int shift) {
1806
  SimVRegister temp;
1807
  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1808
  return add(vform, dst, dst, shifted_reg);
1809
}
1810

1811

1812
LogicVRegister Simulator::ursra(VectorFormat vform,
1813
                                LogicVRegister dst,
1814
                                const LogicVRegister& src,
1815
                                int shift) {
1816
  SimVRegister temp;
1817
  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1818
  return add(vform, dst, dst, shifted_reg);
1819
}
1820

1821

1822
LogicVRegister Simulator::cls(VectorFormat vform,
1823
                              LogicVRegister dst,
1824
                              const LogicVRegister& src) {
1825
  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1826
  int lane_count = LaneCountFromFormat(vform);
1827

1828
  // Ensure that we can store one result per lane.
1829
  int result[kZRegMaxSizeInBytes];
1830

1831
  for (int i = 0; i < lane_count; i++) {
1832
    result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1833
  }
1834

1835
  dst.ClearForWrite(vform);
1836
  for (int i = 0; i < lane_count; ++i) {
1837
    dst.SetUint(vform, i, result[i]);
1838
  }
1839
  return dst;
1840
}
1841

1842

1843
LogicVRegister Simulator::clz(VectorFormat vform,
1844
                              LogicVRegister dst,
1845
                              const LogicVRegister& src) {
1846
  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1847
  int lane_count = LaneCountFromFormat(vform);
1848

1849
  // Ensure that we can store one result per lane.
1850
  int result[kZRegMaxSizeInBytes];
1851

1852
  for (int i = 0; i < lane_count; i++) {
1853
    result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1854
  }
1855

1856
  dst.ClearForWrite(vform);
1857
  for (int i = 0; i < lane_count; ++i) {
1858
    dst.SetUint(vform, i, result[i]);
1859
  }
1860
  return dst;
1861
}
1862

1863

1864
LogicVRegister Simulator::cnot(VectorFormat vform,
1865
                               LogicVRegister dst,
1866
                               const LogicVRegister& src) {
1867
  dst.ClearForWrite(vform);
1868
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1869
    uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1870
    dst.SetUint(vform, i, value);
1871
  }
1872
  return dst;
1873
}
1874

1875

1876
LogicVRegister Simulator::cnt(VectorFormat vform,
1877
                              LogicVRegister dst,
1878
                              const LogicVRegister& src) {
1879
  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1880
  int lane_count = LaneCountFromFormat(vform);
1881

1882
  // Ensure that we can store one result per lane.
1883
  int result[kZRegMaxSizeInBytes];
1884

1885
  for (int i = 0; i < lane_count; i++) {
1886
    result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1887
  }
1888

1889
  dst.ClearForWrite(vform);
1890
  for (int i = 0; i < lane_count; ++i) {
1891
    dst.SetUint(vform, i, result[i]);
1892
  }
1893
  return dst;
1894
}
1895

1896
static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1897
                                            int esize,
1898
                                            bool shift_in_ls_byte) {
1899
  if (shift_in_ls_byte) {
1900
    // Neon uses the least-significant byte of the lane as the shift distance.
1901
    shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1902
  } else {
1903
    // SVE uses a saturated shift distance in the range
1904
    //  -(esize + 1) ... (esize + 1).
1905
    if (shift_val > (esize + 1)) shift_val = esize + 1;
1906
    if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1907
  }
1908
  return shift_val;
1909
}
1910

1911
LogicVRegister Simulator::sshl(VectorFormat vform,
1912
                               LogicVRegister dst,
1913
                               const LogicVRegister& src1,
1914
                               const LogicVRegister& src2,
1915
                               bool shift_in_ls_byte) {
1916
  dst.ClearForWrite(vform);
1917
  int esize = LaneSizeInBitsFromFormat(vform);
1918
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1919
    int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1920
                                                     esize,
1921
                                                     shift_in_ls_byte);
1922

1923
    int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1924

1925
    // Set signed saturation state.
1926
    if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1927
      dst.SetSignedSat(i, lj_src_val >= 0);
1928
    }
1929

1930
    // Set unsigned saturation state.
1931
    if (lj_src_val < 0) {
1932
      dst.SetUnsignedSat(i, false);
1933
    } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1934
               (lj_src_val != 0)) {
1935
      dst.SetUnsignedSat(i, true);
1936
    }
1937

1938
    int64_t src_val = src1.Int(vform, i);
1939
    bool src_is_negative = src_val < 0;
1940
    if (shift_val > 63) {
1941
      dst.SetInt(vform, i, 0);
1942
    } else if (shift_val < -63) {
1943
      dst.SetRounding(i, src_is_negative);
1944
      dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1945
    } else {
1946
      // Use unsigned types for shifts, as behaviour is undefined for signed
1947
      // lhs.
1948
      uint64_t usrc_val = static_cast<uint64_t>(src_val);
1949

1950
      if (shift_val < 0) {
1951
        // Convert to right shift.
1952
        shift_val = -shift_val;
1953

1954
        // Set rounding state by testing most-significant bit shifted out.
1955
        // Rounding only needed on right shifts.
1956
        if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1957
          dst.SetRounding(i, true);
1958
        }
1959

1960
        usrc_val >>= shift_val;
1961

1962
        if (src_is_negative) {
1963
          // Simulate sign-extension.
1964
          usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1965
        }
1966
      } else {
1967
        usrc_val <<= shift_val;
1968
      }
1969
      dst.SetUint(vform, i, usrc_val);
1970
    }
1971
  }
1972
  return dst;
1973
}
1974

1975

1976
LogicVRegister Simulator::ushl(VectorFormat vform,
1977
                               LogicVRegister dst,
1978
                               const LogicVRegister& src1,
1979
                               const LogicVRegister& src2,
1980
                               bool shift_in_ls_byte) {
1981
  dst.ClearForWrite(vform);
1982
  int esize = LaneSizeInBitsFromFormat(vform);
1983
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1984
    int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1985
                                                     esize,
1986
                                                     shift_in_ls_byte);
1987

1988
    uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1989

1990
    // Set saturation state.
1991
    if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1992
      dst.SetUnsignedSat(i, true);
1993
    }
1994

1995
    uint64_t src_val = src1.Uint(vform, i);
1996
    if ((shift_val > 63) || (shift_val < -64)) {
1997
      dst.SetUint(vform, i, 0);
1998
    } else {
1999
      if (shift_val < 0) {
2000
        // Set rounding state. Rounding only needed on right shifts.
2001
        if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2002
          dst.SetRounding(i, true);
2003
        }
2004

2005
        if (shift_val == -64) {
2006
          src_val = 0;
2007
        } else {
2008
          src_val >>= -shift_val;
2009
        }
2010
      } else {
2011
        src_val <<= shift_val;
2012
      }
2013
      dst.SetUint(vform, i, src_val);
2014
    }
2015
  }
2016
  return dst;
2017
}
2018

2019
LogicVRegister Simulator::sshr(VectorFormat vform,
2020
                               LogicVRegister dst,
2021
                               const LogicVRegister& src1,
2022
                               const LogicVRegister& src2) {
2023
  SimVRegister temp;
2024
  // Saturate to sidestep the min-int problem.
2025
  neg(vform, temp, src2).SignedSaturate(vform);
2026
  sshl(vform, dst, src1, temp, false);
2027
  return dst;
2028
}
2029

2030
LogicVRegister Simulator::ushr(VectorFormat vform,
2031
                               LogicVRegister dst,
2032
                               const LogicVRegister& src1,
2033
                               const LogicVRegister& src2) {
2034
  SimVRegister temp;
2035
  // Saturate to sidestep the min-int problem.
2036
  neg(vform, temp, src2).SignedSaturate(vform);
2037
  ushl(vform, dst, src1, temp, false);
2038
  return dst;
2039
}
2040

2041
LogicVRegister Simulator::neg(VectorFormat vform,
2042
                              LogicVRegister dst,
2043
                              const LogicVRegister& src) {
2044
  dst.ClearForWrite(vform);
2045
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2046
    // Test for signed saturation.
2047
    int64_t sa = src.Int(vform, i);
2048
    if (sa == MinIntFromFormat(vform)) {
2049
      dst.SetSignedSat(i, true);
2050
    }
2051
    dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2052
  }
2053
  return dst;
2054
}
2055

2056

2057
LogicVRegister Simulator::suqadd(VectorFormat vform,
2058
                                 LogicVRegister dst,
2059
                                 const LogicVRegister& src1,
2060
                                 const LogicVRegister& src2) {
2061
  dst.ClearForWrite(vform);
2062
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2063
    int64_t sa = src1.IntLeftJustified(vform, i);
2064
    uint64_t ub = src2.UintLeftJustified(vform, i);
2065
    uint64_t ur = sa + ub;
2066

2067
    int64_t sr;
2068
    memcpy(&sr, &ur, sizeof(sr));
2069
    if (sr < sa) {  // Test for signed positive saturation.
2070
      dst.SetInt(vform, i, MaxIntFromFormat(vform));
2071
    } else {
2072
      dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2073
    }
2074
  }
2075
  return dst;
2076
}
2077

2078

2079
LogicVRegister Simulator::usqadd(VectorFormat vform,
2080
                                 LogicVRegister dst,
2081
                                 const LogicVRegister& src1,
2082
                                 const LogicVRegister& src2) {
2083
  dst.ClearForWrite(vform);
2084
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2085
    uint64_t ua = src1.UintLeftJustified(vform, i);
2086
    int64_t sb = src2.IntLeftJustified(vform, i);
2087
    uint64_t ur = ua + sb;
2088

2089
    if ((sb > 0) && (ur <= ua)) {
2090
      dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2091
    } else if ((sb < 0) && (ur >= ua)) {
2092
      dst.SetUint(vform, i, 0);  // Negative saturation.
2093
    } else {
2094
      dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2095
    }
2096
  }
2097
  return dst;
2098
}
2099

2100

2101
LogicVRegister Simulator::abs(VectorFormat vform,
2102
                              LogicVRegister dst,
2103
                              const LogicVRegister& src) {
2104
  dst.ClearForWrite(vform);
2105
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2106
    // Test for signed saturation.
2107
    int64_t sa = src.Int(vform, i);
2108
    if (sa == MinIntFromFormat(vform)) {
2109
      dst.SetSignedSat(i, true);
2110
    }
2111
    if (sa < 0) {
2112
      dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2113
    } else {
2114
      dst.SetInt(vform, i, sa);
2115
    }
2116
  }
2117
  return dst;
2118
}
2119

2120

2121
LogicVRegister Simulator::andv(VectorFormat vform,
2122
                               LogicVRegister dst,
2123
                               const LogicPRegister& pg,
2124
                               const LogicVRegister& src) {
2125
  VIXL_ASSERT(IsSVEFormat(vform));
2126
  uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2127
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2128
    if (!pg.IsActive(vform, i)) continue;
2129

2130
    result &= src.Uint(vform, i);
2131
  }
2132
  VectorFormat vform_dst =
2133
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2134
  dst.ClearForWrite(vform_dst);
2135
  dst.SetUint(vform_dst, 0, result);
2136
  return dst;
2137
}
2138

2139

2140
LogicVRegister Simulator::eorv(VectorFormat vform,
2141
                               LogicVRegister dst,
2142
                               const LogicPRegister& pg,
2143
                               const LogicVRegister& src) {
2144
  VIXL_ASSERT(IsSVEFormat(vform));
2145
  uint64_t result = 0;
2146
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2147
    if (!pg.IsActive(vform, i)) continue;
2148

2149
    result ^= src.Uint(vform, i);
2150
  }
2151
  VectorFormat vform_dst =
2152
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2153
  dst.ClearForWrite(vform_dst);
2154
  dst.SetUint(vform_dst, 0, result);
2155
  return dst;
2156
}
2157

2158

2159
LogicVRegister Simulator::orv(VectorFormat vform,
2160
                              LogicVRegister dst,
2161
                              const LogicPRegister& pg,
2162
                              const LogicVRegister& src) {
2163
  VIXL_ASSERT(IsSVEFormat(vform));
2164
  uint64_t result = 0;
2165
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2166
    if (!pg.IsActive(vform, i)) continue;
2167

2168
    result |= src.Uint(vform, i);
2169
  }
2170
  VectorFormat vform_dst =
2171
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2172
  dst.ClearForWrite(vform_dst);
2173
  dst.SetUint(vform_dst, 0, result);
2174
  return dst;
2175
}
2176

2177

2178
LogicVRegister Simulator::saddv(VectorFormat vform,
2179
                                LogicVRegister dst,
2180
                                const LogicPRegister& pg,
2181
                                const LogicVRegister& src) {
2182
  VIXL_ASSERT(IsSVEFormat(vform));
2183
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2184
  int64_t result = 0;
2185
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2186
    if (!pg.IsActive(vform, i)) continue;
2187

2188
    // The destination register always has D-lane sizes and the source register
2189
    // always has S-lanes or smaller, so signed integer overflow -- undefined
2190
    // behaviour -- can't occur.
2191
    result += src.Int(vform, i);
2192
  }
2193

2194
  dst.ClearForWrite(kFormatD);
2195
  dst.SetInt(kFormatD, 0, result);
2196
  return dst;
2197
}
2198

2199

2200
LogicVRegister Simulator::uaddv(VectorFormat vform,
2201
                                LogicVRegister dst,
2202
                                const LogicPRegister& pg,
2203
                                const LogicVRegister& src) {
2204
  VIXL_ASSERT(IsSVEFormat(vform));
2205
  uint64_t result = 0;
2206
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2207
    if (!pg.IsActive(vform, i)) continue;
2208

2209
    result += src.Uint(vform, i);
2210
  }
2211

2212
  dst.ClearForWrite(kFormatD);
2213
  dst.SetUint(kFormatD, 0, result);
2214
  return dst;
2215
}
2216

2217

2218
LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2219
                                        LogicVRegister dst,
2220
                                        bool dst_is_signed,
2221
                                        const LogicVRegister& src,
2222
                                        bool src_is_signed) {
2223
  bool upperhalf = false;
2224
  VectorFormat srcform = dstform;
2225
  if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2226
      (dstform == kFormat4S)) {
2227
    upperhalf = true;
2228
    srcform = VectorFormatHalfLanes(srcform);
2229
  }
2230
  srcform = VectorFormatDoubleWidth(srcform);
2231

2232
  LogicVRegister src_copy = src;
2233

2234
  int offset;
2235
  if (upperhalf) {
2236
    offset = LaneCountFromFormat(dstform) / 2;
2237
  } else {
2238
    offset = 0;
2239
  }
2240

2241
  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2242
    int64_t ssrc = src_copy.Int(srcform, i);
2243
    uint64_t usrc = src_copy.Uint(srcform, i);
2244

2245
    // Test for signed saturation
2246
    if (ssrc > MaxIntFromFormat(dstform)) {
2247
      dst.SetSignedSat(offset + i, true);
2248
    } else if (ssrc < MinIntFromFormat(dstform)) {
2249
      dst.SetSignedSat(offset + i, false);
2250
    }
2251

2252
    // Test for unsigned saturation
2253
    if (src_is_signed) {
2254
      if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2255
        dst.SetUnsignedSat(offset + i, true);
2256
      } else if (ssrc < 0) {
2257
        dst.SetUnsignedSat(offset + i, false);
2258
      }
2259
    } else {
2260
      if (usrc > MaxUintFromFormat(dstform)) {
2261
        dst.SetUnsignedSat(offset + i, true);
2262
      }
2263
    }
2264

2265
    int64_t result;
2266
    if (src_is_signed) {
2267
      result = ssrc & MaxUintFromFormat(dstform);
2268
    } else {
2269
      result = usrc & MaxUintFromFormat(dstform);
2270
    }
2271

2272
    if (dst_is_signed) {
2273
      dst.SetInt(dstform, offset + i, result);
2274
    } else {
2275
      dst.SetUint(dstform, offset + i, result);
2276
    }
2277
  }
2278

2279
  if (upperhalf) {
2280
    // Clear any bits beyond a Q register.
2281
    dst.ClearForWrite(kFormat16B);
2282
  } else {
2283
    dst.ClearForWrite(dstform);
2284
  }
2285
  return dst;
2286
}
2287

2288

2289
LogicVRegister Simulator::xtn(VectorFormat vform,
2290
                              LogicVRegister dst,
2291
                              const LogicVRegister& src) {
2292
  return extractnarrow(vform, dst, true, src, true);
2293
}
2294

2295

2296
LogicVRegister Simulator::sqxtn(VectorFormat vform,
2297
                                LogicVRegister dst,
2298
                                const LogicVRegister& src) {
2299
  return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2300
}
2301

2302

2303
LogicVRegister Simulator::sqxtun(VectorFormat vform,
2304
                                 LogicVRegister dst,
2305
                                 const LogicVRegister& src) {
2306
  return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2307
}
2308

2309

2310
LogicVRegister Simulator::uqxtn(VectorFormat vform,
2311
                                LogicVRegister dst,
2312
                                const LogicVRegister& src) {
2313
  return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2314
}
2315

2316

2317
LogicVRegister Simulator::absdiff(VectorFormat vform,
2318
                                  LogicVRegister dst,
2319
                                  const LogicVRegister& src1,
2320
                                  const LogicVRegister& src2,
2321
                                  bool is_signed) {
2322
  dst.ClearForWrite(vform);
2323
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2324
    bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2325
                                  : (src1.Uint(vform, i) > src2.Uint(vform, i));
2326
    // Always calculate the answer using unsigned arithmetic, to avoid
2327
    // implementation-defined signed overflow.
2328
    if (src1_gt_src2) {
2329
      dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2330
    } else {
2331
      dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2332
    }
2333
  }
2334
  return dst;
2335
}
2336

2337

2338
LogicVRegister Simulator::saba(VectorFormat vform,
2339
                               LogicVRegister dst,
2340
                               const LogicVRegister& src1,
2341
                               const LogicVRegister& src2) {
2342
  SimVRegister temp;
2343
  dst.ClearForWrite(vform);
2344
  absdiff(vform, temp, src1, src2, true);
2345
  add(vform, dst, dst, temp);
2346
  return dst;
2347
}
2348

2349

2350
LogicVRegister Simulator::uaba(VectorFormat vform,
2351
                               LogicVRegister dst,
2352
                               const LogicVRegister& src1,
2353
                               const LogicVRegister& src2) {
2354
  SimVRegister temp;
2355
  dst.ClearForWrite(vform);
2356
  absdiff(vform, temp, src1, src2, false);
2357
  add(vform, dst, dst, temp);
2358
  return dst;
2359
}
2360

2361

2362
LogicVRegister Simulator::not_(VectorFormat vform,
2363
                               LogicVRegister dst,
2364
                               const LogicVRegister& src) {
2365
  dst.ClearForWrite(vform);
2366
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2367
    dst.SetUint(vform, i, ~src.Uint(vform, i));
2368
  }
2369
  return dst;
2370
}
2371

2372

2373
LogicVRegister Simulator::rbit(VectorFormat vform,
2374
                               LogicVRegister dst,
2375
                               const LogicVRegister& src) {
2376
  uint64_t result[kZRegMaxSizeInBytes];
2377
  int lane_count = LaneCountFromFormat(vform);
2378
  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2379
  uint64_t reversed_value;
2380
  uint64_t value;
2381
  for (int i = 0; i < lane_count; i++) {
2382
    value = src.Uint(vform, i);
2383
    reversed_value = 0;
2384
    for (int j = 0; j < lane_size_in_bits; j++) {
2385
      reversed_value = (reversed_value << 1) | (value & 1);
2386
      value >>= 1;
2387
    }
2388
    result[i] = reversed_value;
2389
  }
2390

2391
  dst.ClearForWrite(vform);
2392
  for (int i = 0; i < lane_count; ++i) {
2393
    dst.SetUint(vform, i, result[i]);
2394
  }
2395
  return dst;
2396
}
2397

2398

2399
LogicVRegister Simulator::rev(VectorFormat vform,
2400
                              LogicVRegister dst,
2401
                              const LogicVRegister& src) {
2402
  VIXL_ASSERT(IsSVEFormat(vform));
2403
  int lane_count = LaneCountFromFormat(vform);
2404
  for (int i = 0; i < lane_count / 2; i++) {
2405
    uint64_t t = src.Uint(vform, i);
2406
    dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2407
    dst.SetUint(vform, lane_count - i - 1, t);
2408
  }
2409
  return dst;
2410
}
2411

2412

2413
LogicVRegister Simulator::rev_byte(VectorFormat vform,
2414
                                   LogicVRegister dst,
2415
                                   const LogicVRegister& src,
2416
                                   int rev_size) {
2417
  uint64_t result[kZRegMaxSizeInBytes] = {};
2418
  int lane_count = LaneCountFromFormat(vform);
2419
  int lane_size = LaneSizeInBytesFromFormat(vform);
2420
  int lanes_per_loop = rev_size / lane_size;
2421
  for (int i = 0; i < lane_count; i += lanes_per_loop) {
2422
    for (int j = 0; j < lanes_per_loop; j++) {
2423
      result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2424
    }
2425
  }
2426
  dst.ClearForWrite(vform);
2427
  for (int i = 0; i < lane_count; ++i) {
2428
    dst.SetUint(vform, i, result[i]);
2429
  }
2430
  return dst;
2431
}
2432

2433

2434
LogicVRegister Simulator::rev16(VectorFormat vform,
2435
                                LogicVRegister dst,
2436
                                const LogicVRegister& src) {
2437
  return rev_byte(vform, dst, src, 2);
2438
}
2439

2440

2441
LogicVRegister Simulator::rev32(VectorFormat vform,
2442
                                LogicVRegister dst,
2443
                                const LogicVRegister& src) {
2444
  return rev_byte(vform, dst, src, 4);
2445
}
2446

2447

2448
LogicVRegister Simulator::rev64(VectorFormat vform,
2449
                                LogicVRegister dst,
2450
                                const LogicVRegister& src) {
2451
  return rev_byte(vform, dst, src, 8);
2452
}
2453

2454
LogicVRegister Simulator::addlp(VectorFormat vform,
2455
                                LogicVRegister dst,
2456
                                const LogicVRegister& src,
2457
                                bool is_signed,
2458
                                bool do_accumulate) {
2459
  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2460
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2461

2462
  uint64_t result[kZRegMaxSizeInBytes];
2463
  int lane_count = LaneCountFromFormat(vform);
2464
  for (int i = 0; i < lane_count; i++) {
2465
    if (is_signed) {
2466
      result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2467
                                        src.Int(vformsrc, 2 * i + 1));
2468
    } else {
2469
      result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2470
    }
2471
  }
2472

2473
  dst.ClearForWrite(vform);
2474
  for (int i = 0; i < lane_count; ++i) {
2475
    if (do_accumulate) {
2476
      result[i] += dst.Uint(vform, i);
2477
    }
2478
    dst.SetUint(vform, i, result[i]);
2479
  }
2480

2481
  return dst;
2482
}
2483

2484

2485
LogicVRegister Simulator::saddlp(VectorFormat vform,
2486
                                 LogicVRegister dst,
2487
                                 const LogicVRegister& src) {
2488
  return addlp(vform, dst, src, true, false);
2489
}
2490

2491

2492
LogicVRegister Simulator::uaddlp(VectorFormat vform,
2493
                                 LogicVRegister dst,
2494
                                 const LogicVRegister& src) {
2495
  return addlp(vform, dst, src, false, false);
2496
}
2497

2498

2499
LogicVRegister Simulator::sadalp(VectorFormat vform,
2500
                                 LogicVRegister dst,
2501
                                 const LogicVRegister& src) {
2502
  return addlp(vform, dst, src, true, true);
2503
}
2504

2505

2506
LogicVRegister Simulator::uadalp(VectorFormat vform,
2507
                                 LogicVRegister dst,
2508
                                 const LogicVRegister& src) {
2509
  return addlp(vform, dst, src, false, true);
2510
}
2511

2512
LogicVRegister Simulator::ror(VectorFormat vform,
2513
                              LogicVRegister dst,
2514
                              const LogicVRegister& src,
2515
                              int rotation) {
2516
  int width = LaneSizeInBitsFromFormat(vform);
2517
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2518
    uint64_t value = src.Uint(vform, i);
2519
    dst.SetUint(vform, i, RotateRight(value, rotation, width));
2520
  }
2521
  return dst;
2522
}
2523

2524
LogicVRegister Simulator::ext(VectorFormat vform,
2525
                              LogicVRegister dst,
2526
                              const LogicVRegister& src1,
2527
                              const LogicVRegister& src2,
2528
                              int index) {
2529
  uint8_t result[kZRegMaxSizeInBytes] = {};
2530
  int lane_count = LaneCountFromFormat(vform);
2531
  for (int i = 0; i < lane_count - index; ++i) {
2532
    result[i] = src1.Uint(vform, i + index);
2533
  }
2534
  for (int i = 0; i < index; ++i) {
2535
    result[lane_count - index + i] = src2.Uint(vform, i);
2536
  }
2537
  dst.ClearForWrite(vform);
2538
  for (int i = 0; i < lane_count; ++i) {
2539
    dst.SetUint(vform, i, result[i]);
2540
  }
2541
  return dst;
2542
}
2543

2544
LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2545
                                                LogicVRegister dst,
2546
                                                const LogicVRegister& src,
2547
                                                int index) {
2548
  if (index < 0) index += LaneCountFromFormat(vform);
2549
  VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2550
  index *= LaneSizeInBytesFromFormat(vform);
2551
  return ext(kFormatVnB, dst, src, src, index);
2552
}
2553

2554

2555
template <typename T>
2556
LogicVRegister Simulator::fadda(VectorFormat vform,
2557
                                LogicVRegister acc,
2558
                                const LogicPRegister& pg,
2559
                                const LogicVRegister& src) {
2560
  T result = acc.Float<T>(0);
2561
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2562
    if (!pg.IsActive(vform, i)) continue;
2563

2564
    result = FPAdd(result, src.Float<T>(i));
2565
  }
2566
  VectorFormat vform_dst =
2567
      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2568
  acc.ClearForWrite(vform_dst);
2569
  acc.SetFloat(0, result);
2570
  return acc;
2571
}
2572

2573
LogicVRegister Simulator::fadda(VectorFormat vform,
2574
                                LogicVRegister acc,
2575
                                const LogicPRegister& pg,
2576
                                const LogicVRegister& src) {
2577
  switch (LaneSizeInBitsFromFormat(vform)) {
2578
    case kHRegSize:
2579
      fadda<SimFloat16>(vform, acc, pg, src);
2580
      break;
2581
    case kSRegSize:
2582
      fadda<float>(vform, acc, pg, src);
2583
      break;
2584
    case kDRegSize:
2585
      fadda<double>(vform, acc, pg, src);
2586
      break;
2587
    default:
2588
      VIXL_UNREACHABLE();
2589
  }
2590
  return acc;
2591
}
2592

2593
template <typename T>
2594
LogicVRegister Simulator::fcadd(VectorFormat vform,
2595
                                LogicVRegister dst,          // d
2596
                                const LogicVRegister& src1,  // n
2597
                                const LogicVRegister& src2,  // m
2598
                                int rot) {
2599
  int elements = LaneCountFromFormat(vform);
2600

2601
  T element1, element3;
2602
  rot = (rot == 1) ? 270 : 90;
2603

2604
  // Loop example:
2605
  // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2606
  // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2607

2608
  for (int e = 0; e <= (elements / 2) - 1; e++) {
2609
    switch (rot) {
2610
      case 90:
2611
        element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2612
        element3 = src2.Float<T>(e * 2);
2613
        break;
2614
      case 270:
2615
        element1 = src2.Float<T>(e * 2 + 1);
2616
        element3 = FPNeg(src2.Float<T>(e * 2));
2617
        break;
2618
      default:
2619
        VIXL_UNREACHABLE();
2620
        return dst;  // prevents "element(n) may be unintialized" errors
2621
    }
2622
    dst.ClearForWrite(vform);
2623
    dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2624
    dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2625
  }
2626
  return dst;
2627
}
2628

2629

2630
LogicVRegister Simulator::fcadd(VectorFormat vform,
2631
                                LogicVRegister dst,          // d
2632
                                const LogicVRegister& src1,  // n
2633
                                const LogicVRegister& src2,  // m
2634
                                int rot) {
2635
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2636
    fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2637
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2638
    fcadd<float>(vform, dst, src1, src2, rot);
2639
  } else {
2640
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2641
    fcadd<double>(vform, dst, src1, src2, rot);
2642
  }
2643
  return dst;
2644
}
2645

2646
template <typename T>
2647
LogicVRegister Simulator::fcmla(VectorFormat vform,
2648
                                LogicVRegister dst,
2649
                                const LogicVRegister& src1,
2650
                                const LogicVRegister& src2,
2651
                                const LogicVRegister& acc,
2652
                                int index,
2653
                                int rot) {
2654
  int elements = LaneCountFromFormat(vform);
2655

2656
  T element1, element2, element3, element4;
2657
  rot *= 90;
2658

2659
  // Loop example:
2660
  // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2661
  // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2662

2663
  for (int e = 0; e <= (elements / 2) - 1; e++) {
2664
    // Index == -1 indicates a vector/vector rather than vector/indexed-element
2665
    // operation.
2666
    int f = (index < 0) ? e : index;
2667

2668
    switch (rot) {
2669
      case 0:
2670
        element1 = src2.Float<T>(f * 2);
2671
        element2 = src1.Float<T>(e * 2);
2672
        element3 = src2.Float<T>(f * 2 + 1);
2673
        element4 = src1.Float<T>(e * 2);
2674
        break;
2675
      case 90:
2676
        element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2677
        element2 = src1.Float<T>(e * 2 + 1);
2678
        element3 = src2.Float<T>(f * 2);
2679
        element4 = src1.Float<T>(e * 2 + 1);
2680
        break;
2681
      case 180:
2682
        element1 = FPNeg(src2.Float<T>(f * 2));
2683
        element2 = src1.Float<T>(e * 2);
2684
        element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2685
        element4 = src1.Float<T>(e * 2);
2686
        break;
2687
      case 270:
2688
        element1 = src2.Float<T>(f * 2 + 1);
2689
        element2 = src1.Float<T>(e * 2 + 1);
2690
        element3 = FPNeg(src2.Float<T>(f * 2));
2691
        element4 = src1.Float<T>(e * 2 + 1);
2692
        break;
2693
      default:
2694
        VIXL_UNREACHABLE();
2695
        return dst;  // prevents "element(n) may be unintialized" errors
2696
    }
2697
    dst.ClearForWrite(vform);
2698
    dst.SetFloat<T>(vform,
2699
                    e * 2,
2700
                    FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2701
    dst.SetFloat<T>(vform,
2702
                    e * 2 + 1,
2703
                    FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2704
  }
2705
  return dst;
2706
}
2707

2708
LogicVRegister Simulator::fcmla(VectorFormat vform,
2709
                                LogicVRegister dst,
2710
                                const LogicVRegister& src1,
2711
                                const LogicVRegister& src2,
2712
                                const LogicVRegister& acc,
2713
                                int rot) {
2714
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2715
    fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2716
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2717
    fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2718
  } else {
2719
    fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2720
  }
2721
  return dst;
2722
}
2723

2724

2725
LogicVRegister Simulator::fcmla(VectorFormat vform,
2726
                                LogicVRegister dst,          // d
2727
                                const LogicVRegister& src1,  // n
2728
                                const LogicVRegister& src2,  // m
2729
                                int index,
2730
                                int rot) {
2731
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2732
    fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);
2733
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2734
    fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2735
  } else {
2736
    fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2737
  }
2738
  return dst;
2739
}
2740

2741
LogicVRegister Simulator::cadd(VectorFormat vform,
2742
                               LogicVRegister dst,
2743
                               const LogicVRegister& src1,
2744
                               const LogicVRegister& src2,
2745
                               int rot,
2746
                               bool saturate) {
2747
  SimVRegister src1_r, src1_i;
2748
  SimVRegister src2_r, src2_i;
2749
  SimVRegister zero;
2750
  zero.Clear();
2751
  uzp1(vform, src1_r, src1, zero);
2752
  uzp2(vform, src1_i, src1, zero);
2753
  uzp1(vform, src2_r, src2, zero);
2754
  uzp2(vform, src2_i, src2, zero);
2755

2756
  if (rot == 90) {
2757
    if (saturate) {
2758
      sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2759
      add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2760
    } else {
2761
      sub(vform, src1_r, src1_r, src2_i);
2762
      add(vform, src1_i, src1_i, src2_r);
2763
    }
2764
  } else {
2765
    VIXL_ASSERT(rot == 270);
2766
    if (saturate) {
2767
      add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2768
      sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2769
    } else {
2770
      add(vform, src1_r, src1_r, src2_i);
2771
      sub(vform, src1_i, src1_i, src2_r);
2772
    }
2773
  }
2774

2775
  zip1(vform, dst, src1_r, src1_i);
2776
  return dst;
2777
}
2778

2779
LogicVRegister Simulator::cmla(VectorFormat vform,
2780
                               LogicVRegister dst,
2781
                               const LogicVRegister& srca,
2782
                               const LogicVRegister& src1,
2783
                               const LogicVRegister& src2,
2784
                               int rot) {
2785
  SimVRegister src1_a;
2786
  SimVRegister src2_a, src2_b;
2787
  SimVRegister srca_i, srca_r;
2788
  SimVRegister zero, temp;
2789
  zero.Clear();
2790

2791
  if ((rot == 0) || (rot == 180)) {
2792
    uzp1(vform, src1_a, src1, zero);
2793
    uzp1(vform, src2_a, src2, zero);
2794
    uzp2(vform, src2_b, src2, zero);
2795
  } else {
2796
    uzp2(vform, src1_a, src1, zero);
2797
    uzp2(vform, src2_a, src2, zero);
2798
    uzp1(vform, src2_b, src2, zero);
2799
  }
2800

2801
  uzp1(vform, srca_r, srca, zero);
2802
  uzp2(vform, srca_i, srca, zero);
2803

2804
  bool sub_r = (rot == 90) || (rot == 180);
2805
  bool sub_i = (rot == 180) || (rot == 270);
2806

2807
  mul(vform, temp, src1_a, src2_a);
2808
  if (sub_r) {
2809
    sub(vform, srca_r, srca_r, temp);
2810
  } else {
2811
    add(vform, srca_r, srca_r, temp);
2812
  }
2813

2814
  mul(vform, temp, src1_a, src2_b);
2815
  if (sub_i) {
2816
    sub(vform, srca_i, srca_i, temp);
2817
  } else {
2818
    add(vform, srca_i, srca_i, temp);
2819
  }
2820

2821
  zip1(vform, dst, srca_r, srca_i);
2822
  return dst;
2823
}
2824

2825
LogicVRegister Simulator::cmla(VectorFormat vform,
2826
                               LogicVRegister dst,
2827
                               const LogicVRegister& srca,
2828
                               const LogicVRegister& src1,
2829
                               const LogicVRegister& src2,
2830
                               int index,
2831
                               int rot) {
2832
  SimVRegister temp;
2833
  dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2834
  return cmla(vform, dst, srca, src1, temp, rot);
2835
}
2836

2837
LogicVRegister Simulator::bgrp(VectorFormat vform,
2838
                               LogicVRegister dst,
2839
                               const LogicVRegister& src1,
2840
                               const LogicVRegister& src2,
2841
                               bool do_bext) {
2842
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2843
    uint64_t value = src1.Uint(vform, i);
2844
    uint64_t mask = src2.Uint(vform, i);
2845
    int high_pos = 0;
2846
    int low_pos = 0;
2847
    uint64_t result_high = 0;
2848
    uint64_t result_low = 0;
2849
    for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2850
      if ((mask & 1) == 0) {
2851
        result_high |= (value & 1) << high_pos;
2852
        high_pos++;
2853
      } else {
2854
        result_low |= (value & 1) << low_pos;
2855
        low_pos++;
2856
      }
2857
      mask >>= 1;
2858
      value >>= 1;
2859
    }
2860

2861
    if (!do_bext) {
2862
      result_low |= result_high << low_pos;
2863
    }
2864

2865
    dst.SetUint(vform, i, result_low);
2866
  }
2867
  return dst;
2868
}
2869

2870
LogicVRegister Simulator::bdep(VectorFormat vform,
2871
                               LogicVRegister dst,
2872
                               const LogicVRegister& src1,
2873
                               const LogicVRegister& src2) {
2874
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2875
    uint64_t value = src1.Uint(vform, i);
2876
    uint64_t mask = src2.Uint(vform, i);
2877
    uint64_t result = 0;
2878
    for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2879
      if ((mask & 1) == 1) {
2880
        result |= (value & 1) << j;
2881
        value >>= 1;
2882
      }
2883
      mask >>= 1;
2884
    }
2885
    dst.SetUint(vform, i, result);
2886
  }
2887
  return dst;
2888
}
2889

2890
LogicVRegister Simulator::histogram(VectorFormat vform,
2891
                                    LogicVRegister dst,
2892
                                    const LogicPRegister& pg,
2893
                                    const LogicVRegister& src1,
2894
                                    const LogicVRegister& src2,
2895
                                    bool do_segmented) {
2896
  int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2897
  uint64_t result[kZRegMaxSizeInBytes];
2898

2899
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2900
    uint64_t count = 0;
2901
    uint64_t value = src1.Uint(vform, i);
2902

2903
    int segment = do_segmented ? (i / elements_per_segment) : 0;
2904
    int segment_offset = segment * elements_per_segment;
2905
    int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2906
    for (int j = 0; j < hist_limit; j++) {
2907
      if (pg.IsActive(vform, j) &&
2908
          (value == src2.Uint(vform, j + segment_offset))) {
2909
        count++;
2910
      }
2911
    }
2912
    result[i] = count;
2913
  }
2914
  dst.SetUintArray(vform, result);
2915
  return dst;
2916
}
2917

2918
LogicVRegister Simulator::dup_element(VectorFormat vform,
2919
                                      LogicVRegister dst,
2920
                                      const LogicVRegister& src,
2921
                                      int src_index) {
2922
  if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2923
    // When duplicating an element larger than 64 bits, split the element into
2924
    // 64-bit parts, and duplicate the parts across the destination.
2925
    uint64_t d[4];
2926
    int count = (vform == kFormatVnQ) ? 2 : 4;
2927
    for (int i = 0; i < count; i++) {
2928
      d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2929
    }
2930
    dst.Clear();
2931
    for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2932
      dst.SetUint(kFormatVnD, i, d[i % count]);
2933
    }
2934
  } else {
2935
    int lane_count = LaneCountFromFormat(vform);
2936
    uint64_t value = src.Uint(vform, src_index);
2937
    dst.ClearForWrite(vform);
2938
    for (int i = 0; i < lane_count; ++i) {
2939
      dst.SetUint(vform, i, value);
2940
    }
2941
  }
2942
  return dst;
2943
}
2944

2945
LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2946
                                                   LogicVRegister dst,
2947
                                                   const LogicVRegister& src,
2948
                                                   int src_index) {
2949
  // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2950
  // whereas in NEON, the size of segment is equal to the size of register
2951
  // itself.
2952
  int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2953
  VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2954
  int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2955

2956
  VIXL_ASSERT(src_index >= 0);
2957
  VIXL_ASSERT(src_index < lanes_per_segment);
2958

2959
  dst.ClearForWrite(vform);
2960
  for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2961
    uint64_t value = src.Uint(vform, j + src_index);
2962
    for (int i = 0; i < lanes_per_segment; i++) {
2963
      dst.SetUint(vform, j + i, value);
2964
    }
2965
  }
2966
  return dst;
2967
}
2968

2969
LogicVRegister Simulator::dup_elements_to_segments(
2970
    VectorFormat vform,
2971
    LogicVRegister dst,
2972
    const std::pair<int, int>& src_and_index) {
2973
  return dup_elements_to_segments(vform,
2974
                                  dst,
2975
                                  ReadVRegister(src_and_index.first),
2976
                                  src_and_index.second);
2977
}
2978

2979
LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2980
                                        LogicVRegister dst,
2981
                                        uint64_t imm) {
2982
  int lane_count = LaneCountFromFormat(vform);
2983
  uint64_t value = imm & MaxUintFromFormat(vform);
2984
  dst.ClearForWrite(vform);
2985
  for (int i = 0; i < lane_count; ++i) {
2986
    dst.SetUint(vform, i, value);
2987
  }
2988
  return dst;
2989
}
2990

2991

2992
LogicVRegister Simulator::ins_element(VectorFormat vform,
2993
                                      LogicVRegister dst,
2994
                                      int dst_index,
2995
                                      const LogicVRegister& src,
2996
                                      int src_index) {
2997
  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2998
  return dst;
2999
}
3000

3001

3002
LogicVRegister Simulator::ins_immediate(VectorFormat vform,
3003
                                        LogicVRegister dst,
3004
                                        int dst_index,
3005
                                        uint64_t imm) {
3006
  uint64_t value = imm & MaxUintFromFormat(vform);
3007
  dst.SetUint(vform, dst_index, value);
3008
  return dst;
3009
}
3010

3011

3012
LogicVRegister Simulator::index(VectorFormat vform,
3013
                                LogicVRegister dst,
3014
                                uint64_t start,
3015
                                uint64_t step) {
3016
  VIXL_ASSERT(IsSVEFormat(vform));
3017
  uint64_t value = start;
3018
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3019
    dst.SetUint(vform, i, value);
3020
    value += step;
3021
  }
3022
  return dst;
3023
}
3024

3025

3026
LogicVRegister Simulator::insr(VectorFormat vform,
3027
                               LogicVRegister dst,
3028
                               uint64_t imm) {
3029
  VIXL_ASSERT(IsSVEFormat(vform));
3030
  for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3031
    dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3032
  }
3033
  dst.SetUint(vform, 0, imm);
3034
  return dst;
3035
}
3036

3037

3038
LogicVRegister Simulator::mov(VectorFormat vform,
3039
                              LogicVRegister dst,
3040
                              const LogicVRegister& src) {
3041
  dst.ClearForWrite(vform);
3042
  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3043
    dst.SetUint(vform, lane, src.Uint(vform, lane));
3044
  }
3045
  return dst;
3046
}
3047

3048

3049
LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3050
  // Avoid a copy if the registers already alias.
3051
  if (dst.Aliases(src)) return dst;
3052

3053
  for (int i = 0; i < dst.GetChunkCount(); i++) {
3054
    dst.SetChunk(i, src.GetChunk(i));
3055
  }
3056
  return dst;
3057
}
3058

3059

3060
LogicVRegister Simulator::mov_merging(VectorFormat vform,
3061
                                      LogicVRegister dst,
3062
                                      const SimPRegister& pg,
3063
                                      const LogicVRegister& src) {
3064
  return sel(vform, dst, pg, src, dst);
3065
}
3066

3067
LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3068
                                      LogicVRegister dst,
3069
                                      const SimPRegister& pg,
3070
                                      const LogicVRegister& src) {
3071
  SimVRegister zero;
3072
  dup_immediate(vform, zero, 0);
3073
  return sel(vform, dst, pg, src, zero);
3074
}
3075

3076
LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3077
                                          LogicVRegister dst,
3078
                                          const LogicVRegister& src,
3079
                                          int start_at) {
3080
  VIXL_ASSERT((start_at == 0) || (start_at == 1));
3081
  for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3082
    dst.SetUint(vform, i, src.Uint(vform, i));
3083
  }
3084
  return dst;
3085
}
3086

3087
LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3088
                                      const LogicPRegister& pg,
3089
                                      const LogicPRegister& src) {
3090
  return sel(dst, pg, src, dst);
3091
}
3092

3093
LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3094
                                      const LogicPRegister& pg,
3095
                                      const LogicPRegister& src) {
3096
  SimPRegister all_false;
3097
  return sel(dst, pg, src, pfalse(all_false));
3098
}
3099

3100
LogicVRegister Simulator::movi(VectorFormat vform,
3101
                               LogicVRegister dst,
3102
                               uint64_t imm) {
3103
  int lane_count = LaneCountFromFormat(vform);
3104
  dst.ClearForWrite(vform);
3105
  for (int i = 0; i < lane_count; ++i) {
3106
    dst.SetUint(vform, i, imm);
3107
  }
3108
  return dst;
3109
}
3110

3111

3112
LogicVRegister Simulator::mvni(VectorFormat vform,
3113
                               LogicVRegister dst,
3114
                               uint64_t imm) {
3115
  int lane_count = LaneCountFromFormat(vform);
3116
  dst.ClearForWrite(vform);
3117
  for (int i = 0; i < lane_count; ++i) {
3118
    dst.SetUint(vform, i, ~imm);
3119
  }
3120
  return dst;
3121
}
3122

3123

3124
LogicVRegister Simulator::orr(VectorFormat vform,
3125
                              LogicVRegister dst,
3126
                              const LogicVRegister& src,
3127
                              uint64_t imm) {
3128
  uint64_t result[16];
3129
  int lane_count = LaneCountFromFormat(vform);
3130
  for (int i = 0; i < lane_count; ++i) {
3131
    result[i] = src.Uint(vform, i) | imm;
3132
  }
3133
  dst.ClearForWrite(vform);
3134
  for (int i = 0; i < lane_count; ++i) {
3135
    dst.SetUint(vform, i, result[i]);
3136
  }
3137
  return dst;
3138
}
3139

3140

3141
LogicVRegister Simulator::uxtl(VectorFormat vform,
3142
                               LogicVRegister dst,
3143
                               const LogicVRegister& src,
3144
                               bool is_2) {
3145
  VectorFormat vform_half = VectorFormatHalfWidth(vform);
3146
  int lane_count = LaneCountFromFormat(vform);
3147
  int src_offset = is_2 ? lane_count : 0;
3148

3149
  dst.ClearForWrite(vform);
3150
  for (int i = 0; i < lane_count; i++) {
3151
    dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3152
  }
3153
  return dst;
3154
}
3155

3156

3157
LogicVRegister Simulator::sxtl(VectorFormat vform,
3158
                               LogicVRegister dst,
3159
                               const LogicVRegister& src,
3160
                               bool is_2) {
3161
  VectorFormat vform_half = VectorFormatHalfWidth(vform);
3162
  int lane_count = LaneCountFromFormat(vform);
3163
  int src_offset = is_2 ? lane_count : 0;
3164

3165
  dst.ClearForWrite(vform);
3166
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3167
    dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3168
  }
3169
  return dst;
3170
}
3171

3172

3173
LogicVRegister Simulator::uxtl2(VectorFormat vform,
3174
                                LogicVRegister dst,
3175
                                const LogicVRegister& src) {
3176
  return uxtl(vform, dst, src, /* is_2 = */ true);
3177
}
3178

3179

3180
LogicVRegister Simulator::sxtl2(VectorFormat vform,
3181
                                LogicVRegister dst,
3182
                                const LogicVRegister& src) {
3183
  return sxtl(vform, dst, src, /* is_2 = */ true);
3184
}
3185

3186

3187
LogicVRegister Simulator::uxt(VectorFormat vform,
3188
                              LogicVRegister dst,
3189
                              const LogicVRegister& src,
3190
                              unsigned from_size_in_bits) {
3191
  int lane_count = LaneCountFromFormat(vform);
3192
  uint64_t mask = GetUintMask(from_size_in_bits);
3193

3194
  dst.ClearForWrite(vform);
3195
  for (int i = 0; i < lane_count; i++) {
3196
    dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3197
  }
3198
  return dst;
3199
}
3200

3201

3202
LogicVRegister Simulator::sxt(VectorFormat vform,
3203
                              LogicVRegister dst,
3204
                              const LogicVRegister& src,
3205
                              unsigned from_size_in_bits) {
3206
  int lane_count = LaneCountFromFormat(vform);
3207

3208
  dst.ClearForWrite(vform);
3209
  for (int i = 0; i < lane_count; i++) {
3210
    uint64_t value =
3211
        ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3212
    dst.SetInt(vform, i, value);
3213
  }
3214
  return dst;
3215
}
3216

3217

3218
LogicVRegister Simulator::shrn(VectorFormat vform,
3219
                               LogicVRegister dst,
3220
                               const LogicVRegister& src,
3221
                               int shift) {
3222
  SimVRegister temp;
3223
  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3224
  VectorFormat vform_dst = vform;
3225
  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3226
  return extractnarrow(vform_dst, dst, false, shifted_src, false);
3227
}
3228

3229

3230
LogicVRegister Simulator::shrn2(VectorFormat vform,
3231
                                LogicVRegister dst,
3232
                                const LogicVRegister& src,
3233
                                int shift) {
3234
  SimVRegister temp;
3235
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3236
  VectorFormat vformdst = vform;
3237
  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3238
  return extractnarrow(vformdst, dst, false, shifted_src, false);
3239
}
3240

3241

3242
LogicVRegister Simulator::rshrn(VectorFormat vform,
3243
                                LogicVRegister dst,
3244
                                const LogicVRegister& src,
3245
                                int shift) {
3246
  SimVRegister temp;
3247
  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3248
  VectorFormat vformdst = vform;
3249
  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3250
  return extractnarrow(vformdst, dst, false, shifted_src, false);
3251
}
3252

3253

3254
LogicVRegister Simulator::rshrn2(VectorFormat vform,
3255
                                 LogicVRegister dst,
3256
                                 const LogicVRegister& src,
3257
                                 int shift) {
3258
  SimVRegister temp;
3259
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3260
  VectorFormat vformdst = vform;
3261
  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3262
  return extractnarrow(vformdst, dst, false, shifted_src, false);
3263
}
3264

3265
LogicVRegister Simulator::Table(VectorFormat vform,
3266
                                LogicVRegister dst,
3267
                                const LogicVRegister& ind,
3268
                                bool zero_out_of_bounds,
3269
                                const LogicVRegister* tab1,
3270
                                const LogicVRegister* tab2,
3271
                                const LogicVRegister* tab3,
3272
                                const LogicVRegister* tab4) {
3273
  VIXL_ASSERT(tab1 != NULL);
3274
  int lane_count = LaneCountFromFormat(vform);
3275
  VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3276
  uint64_t table[kZRegMaxSizeInBytes * 2];
3277
  uint64_t result[kZRegMaxSizeInBytes];
3278

3279
  // For Neon, the table source registers are always 16B, and Neon allows only
3280
  // 8B or 16B vform for the destination, so infer the table format from the
3281
  // destination.
3282
  VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3283

3284
  uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3285
  if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3286
  if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3287
  if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3288

3289
  for (int i = 0; i < lane_count; i++) {
3290
    uint64_t index = ind.Uint(vform, i);
3291
    result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3292
    if (index < tab_size) result[i] = table[index];
3293
  }
3294
  dst.SetUintArray(vform, result);
3295
  return dst;
3296
}
3297

3298
LogicVRegister Simulator::tbl(VectorFormat vform,
3299
                              LogicVRegister dst,
3300
                              const LogicVRegister& tab,
3301
                              const LogicVRegister& ind) {
3302
  return Table(vform, dst, ind, true, &tab);
3303
}
3304

3305

3306
LogicVRegister Simulator::tbl(VectorFormat vform,
3307
                              LogicVRegister dst,
3308
                              const LogicVRegister& tab,
3309
                              const LogicVRegister& tab2,
3310
                              const LogicVRegister& ind) {
3311
  return Table(vform, dst, ind, true, &tab, &tab2);
3312
}
3313

3314

3315
LogicVRegister Simulator::tbl(VectorFormat vform,
3316
                              LogicVRegister dst,
3317
                              const LogicVRegister& tab,
3318
                              const LogicVRegister& tab2,
3319
                              const LogicVRegister& tab3,
3320
                              const LogicVRegister& ind) {
3321
  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3322
}
3323

3324

3325
LogicVRegister Simulator::tbl(VectorFormat vform,
3326
                              LogicVRegister dst,
3327
                              const LogicVRegister& tab,
3328
                              const LogicVRegister& tab2,
3329
                              const LogicVRegister& tab3,
3330
                              const LogicVRegister& tab4,
3331
                              const LogicVRegister& ind) {
3332
  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3333
}
3334

3335

3336
LogicVRegister Simulator::tbx(VectorFormat vform,
3337
                              LogicVRegister dst,
3338
                              const LogicVRegister& tab,
3339
                              const LogicVRegister& ind) {
3340
  return Table(vform, dst, ind, false, &tab);
3341
}
3342

3343

3344
LogicVRegister Simulator::tbx(VectorFormat vform,
3345
                              LogicVRegister dst,
3346
                              const LogicVRegister& tab,
3347
                              const LogicVRegister& tab2,
3348
                              const LogicVRegister& ind) {
3349
  return Table(vform, dst, ind, false, &tab, &tab2);
3350
}
3351

3352

3353
LogicVRegister Simulator::tbx(VectorFormat vform,
3354
                              LogicVRegister dst,
3355
                              const LogicVRegister& tab,
3356
                              const LogicVRegister& tab2,
3357
                              const LogicVRegister& tab3,
3358
                              const LogicVRegister& ind) {
3359
  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3360
}
3361

3362

3363
LogicVRegister Simulator::tbx(VectorFormat vform,
3364
                              LogicVRegister dst,
3365
                              const LogicVRegister& tab,
3366
                              const LogicVRegister& tab2,
3367
                              const LogicVRegister& tab3,
3368
                              const LogicVRegister& tab4,
3369
                              const LogicVRegister& ind) {
3370
  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3371
}
3372

3373

3374
LogicVRegister Simulator::uqshrn(VectorFormat vform,
3375
                                 LogicVRegister dst,
3376
                                 const LogicVRegister& src,
3377
                                 int shift) {
3378
  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3379
}
3380

3381

3382
LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3383
                                  LogicVRegister dst,
3384
                                  const LogicVRegister& src,
3385
                                  int shift) {
3386
  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3387
}
3388

3389

3390
LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3391
                                  LogicVRegister dst,
3392
                                  const LogicVRegister& src,
3393
                                  int shift) {
3394
  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3395
}
3396

3397

3398
LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3399
                                   LogicVRegister dst,
3400
                                   const LogicVRegister& src,
3401
                                   int shift) {
3402
  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3403
}
3404

3405

3406
LogicVRegister Simulator::sqshrn(VectorFormat vform,
3407
                                 LogicVRegister dst,
3408
                                 const LogicVRegister& src,
3409
                                 int shift) {
3410
  SimVRegister temp;
3411
  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3412
  VectorFormat vformdst = vform;
3413
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3414
  return sqxtn(vformdst, dst, shifted_src);
3415
}
3416

3417

3418
LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3419
                                  LogicVRegister dst,
3420
                                  const LogicVRegister& src,
3421
                                  int shift) {
3422
  SimVRegister temp;
3423
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3424
  VectorFormat vformdst = vform;
3425
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3426
  return sqxtn(vformdst, dst, shifted_src);
3427
}
3428

3429

3430
LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3431
                                  LogicVRegister dst,
3432
                                  const LogicVRegister& src,
3433
                                  int shift) {
3434
  SimVRegister temp;
3435
  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3436
  VectorFormat vformdst = vform;
3437
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3438
  return sqxtn(vformdst, dst, shifted_src);
3439
}
3440

3441

3442
LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3443
                                   LogicVRegister dst,
3444
                                   const LogicVRegister& src,
3445
                                   int shift) {
3446
  SimVRegister temp;
3447
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3448
  VectorFormat vformdst = vform;
3449
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3450
  return sqxtn(vformdst, dst, shifted_src);
3451
}
3452

3453

3454
LogicVRegister Simulator::sqshrun(VectorFormat vform,
3455
                                  LogicVRegister dst,
3456
                                  const LogicVRegister& src,
3457
                                  int shift) {
3458
  SimVRegister temp;
3459
  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3460
  VectorFormat vformdst = vform;
3461
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3462
  return sqxtun(vformdst, dst, shifted_src);
3463
}
3464

3465

3466
LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3467
                                   LogicVRegister dst,
3468
                                   const LogicVRegister& src,
3469
                                   int shift) {
3470
  SimVRegister temp;
3471
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3472
  VectorFormat vformdst = vform;
3473
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3474
  return sqxtun(vformdst, dst, shifted_src);
3475
}
3476

3477

3478
LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3479
                                   LogicVRegister dst,
3480
                                   const LogicVRegister& src,
3481
                                   int shift) {
3482
  SimVRegister temp;
3483
  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3484
  VectorFormat vformdst = vform;
3485
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3486
  return sqxtun(vformdst, dst, shifted_src);
3487
}
3488

3489

3490
LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3491
                                    LogicVRegister dst,
3492
                                    const LogicVRegister& src,
3493
                                    int shift) {
3494
  SimVRegister temp;
3495
  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3496
  VectorFormat vformdst = vform;
3497
  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3498
  return sqxtun(vformdst, dst, shifted_src);
3499
}
3500

3501

3502
LogicVRegister Simulator::uaddl(VectorFormat vform,
3503
                                LogicVRegister dst,
3504
                                const LogicVRegister& src1,
3505
                                const LogicVRegister& src2) {
3506
  SimVRegister temp1, temp2;
3507
  uxtl(vform, temp1, src1);
3508
  uxtl(vform, temp2, src2);
3509
  add(vform, dst, temp1, temp2);
3510
  return dst;
3511
}
3512

3513

3514
LogicVRegister Simulator::uaddl2(VectorFormat vform,
3515
                                 LogicVRegister dst,
3516
                                 const LogicVRegister& src1,
3517
                                 const LogicVRegister& src2) {
3518
  SimVRegister temp1, temp2;
3519
  uxtl2(vform, temp1, src1);
3520
  uxtl2(vform, temp2, src2);
3521
  add(vform, dst, temp1, temp2);
3522
  return dst;
3523
}
3524

3525

3526
LogicVRegister Simulator::uaddw(VectorFormat vform,
3527
                                LogicVRegister dst,
3528
                                const LogicVRegister& src1,
3529
                                const LogicVRegister& src2) {
3530
  SimVRegister temp;
3531
  uxtl(vform, temp, src2);
3532
  add(vform, dst, src1, temp);
3533
  return dst;
3534
}
3535

3536

3537
LogicVRegister Simulator::uaddw2(VectorFormat vform,
3538
                                 LogicVRegister dst,
3539
                                 const LogicVRegister& src1,
3540
                                 const LogicVRegister& src2) {
3541
  SimVRegister temp;
3542
  uxtl2(vform, temp, src2);
3543
  add(vform, dst, src1, temp);
3544
  return dst;
3545
}
3546

3547

3548
LogicVRegister Simulator::saddl(VectorFormat vform,
3549
                                LogicVRegister dst,
3550
                                const LogicVRegister& src1,
3551
                                const LogicVRegister& src2) {
3552
  SimVRegister temp1, temp2;
3553
  sxtl(vform, temp1, src1);
3554
  sxtl(vform, temp2, src2);
3555
  add(vform, dst, temp1, temp2);
3556
  return dst;
3557
}
3558

3559

3560
LogicVRegister Simulator::saddl2(VectorFormat vform,
3561
                                 LogicVRegister dst,
3562
                                 const LogicVRegister& src1,
3563
                                 const LogicVRegister& src2) {
3564
  SimVRegister temp1, temp2;
3565
  sxtl2(vform, temp1, src1);
3566
  sxtl2(vform, temp2, src2);
3567
  add(vform, dst, temp1, temp2);
3568
  return dst;
3569
}
3570

3571

3572
LogicVRegister Simulator::saddw(VectorFormat vform,
3573
                                LogicVRegister dst,
3574
                                const LogicVRegister& src1,
3575
                                const LogicVRegister& src2) {
3576
  SimVRegister temp;
3577
  sxtl(vform, temp, src2);
3578
  add(vform, dst, src1, temp);
3579
  return dst;
3580
}
3581

3582

3583
LogicVRegister Simulator::saddw2(VectorFormat vform,
3584
                                 LogicVRegister dst,
3585
                                 const LogicVRegister& src1,
3586
                                 const LogicVRegister& src2) {
3587
  SimVRegister temp;
3588
  sxtl2(vform, temp, src2);
3589
  add(vform, dst, src1, temp);
3590
  return dst;
3591
}
3592

3593

3594
LogicVRegister Simulator::usubl(VectorFormat vform,
3595
                                LogicVRegister dst,
3596
                                const LogicVRegister& src1,
3597
                                const LogicVRegister& src2) {
3598
  SimVRegister temp1, temp2;
3599
  uxtl(vform, temp1, src1);
3600
  uxtl(vform, temp2, src2);
3601
  sub(vform, dst, temp1, temp2);
3602
  return dst;
3603
}
3604

3605

3606
LogicVRegister Simulator::usubl2(VectorFormat vform,
3607
                                 LogicVRegister dst,
3608
                                 const LogicVRegister& src1,
3609
                                 const LogicVRegister& src2) {
3610
  SimVRegister temp1, temp2;
3611
  uxtl2(vform, temp1, src1);
3612
  uxtl2(vform, temp2, src2);
3613
  sub(vform, dst, temp1, temp2);
3614
  return dst;
3615
}
3616

3617

3618
LogicVRegister Simulator::usubw(VectorFormat vform,
3619
                                LogicVRegister dst,
3620
                                const LogicVRegister& src1,
3621
                                const LogicVRegister& src2) {
3622
  SimVRegister temp;
3623
  uxtl(vform, temp, src2);
3624
  sub(vform, dst, src1, temp);
3625
  return dst;
3626
}
3627

3628

3629
LogicVRegister Simulator::usubw2(VectorFormat vform,
3630
                                 LogicVRegister dst,
3631
                                 const LogicVRegister& src1,
3632
                                 const LogicVRegister& src2) {
3633
  SimVRegister temp;
3634
  uxtl2(vform, temp, src2);
3635
  sub(vform, dst, src1, temp);
3636
  return dst;
3637
}
3638

3639

3640
LogicVRegister Simulator::ssubl(VectorFormat vform,
3641
                                LogicVRegister dst,
3642
                                const LogicVRegister& src1,
3643
                                const LogicVRegister& src2) {
3644
  SimVRegister temp1, temp2;
3645
  sxtl(vform, temp1, src1);
3646
  sxtl(vform, temp2, src2);
3647
  sub(vform, dst, temp1, temp2);
3648
  return dst;
3649
}
3650

3651

3652
LogicVRegister Simulator::ssubl2(VectorFormat vform,
3653
                                 LogicVRegister dst,
3654
                                 const LogicVRegister& src1,
3655
                                 const LogicVRegister& src2) {
3656
  SimVRegister temp1, temp2;
3657
  sxtl2(vform, temp1, src1);
3658
  sxtl2(vform, temp2, src2);
3659
  sub(vform, dst, temp1, temp2);
3660
  return dst;
3661
}
3662

3663

3664
LogicVRegister Simulator::ssubw(VectorFormat vform,
3665
                                LogicVRegister dst,
3666
                                const LogicVRegister& src1,
3667
                                const LogicVRegister& src2) {
3668
  SimVRegister temp;
3669
  sxtl(vform, temp, src2);
3670
  sub(vform, dst, src1, temp);
3671
  return dst;
3672
}
3673

3674

3675
LogicVRegister Simulator::ssubw2(VectorFormat vform,
3676
                                 LogicVRegister dst,
3677
                                 const LogicVRegister& src1,
3678
                                 const LogicVRegister& src2) {
3679
  SimVRegister temp;
3680
  sxtl2(vform, temp, src2);
3681
  sub(vform, dst, src1, temp);
3682
  return dst;
3683
}
3684

3685

3686
LogicVRegister Simulator::uabal(VectorFormat vform,
3687
                                LogicVRegister dst,
3688
                                const LogicVRegister& src1,
3689
                                const LogicVRegister& src2) {
3690
  SimVRegister temp1, temp2;
3691
  uxtl(vform, temp1, src1);
3692
  uxtl(vform, temp2, src2);
3693
  uaba(vform, dst, temp1, temp2);
3694
  return dst;
3695
}
3696

3697

3698
LogicVRegister Simulator::uabal2(VectorFormat vform,
3699
                                 LogicVRegister dst,
3700
                                 const LogicVRegister& src1,
3701
                                 const LogicVRegister& src2) {
3702
  SimVRegister temp1, temp2;
3703
  uxtl2(vform, temp1, src1);
3704
  uxtl2(vform, temp2, src2);
3705
  uaba(vform, dst, temp1, temp2);
3706
  return dst;
3707
}
3708

3709

3710
LogicVRegister Simulator::sabal(VectorFormat vform,
3711
                                LogicVRegister dst,
3712
                                const LogicVRegister& src1,
3713
                                const LogicVRegister& src2) {
3714
  SimVRegister temp1, temp2;
3715
  sxtl(vform, temp1, src1);
3716
  sxtl(vform, temp2, src2);
3717
  saba(vform, dst, temp1, temp2);
3718
  return dst;
3719
}
3720

3721

3722
LogicVRegister Simulator::sabal2(VectorFormat vform,
3723
                                 LogicVRegister dst,
3724
                                 const LogicVRegister& src1,
3725
                                 const LogicVRegister& src2) {
3726
  SimVRegister temp1, temp2;
3727
  sxtl2(vform, temp1, src1);
3728
  sxtl2(vform, temp2, src2);
3729
  saba(vform, dst, temp1, temp2);
3730
  return dst;
3731
}
3732

3733

3734
LogicVRegister Simulator::uabdl(VectorFormat vform,
3735
                                LogicVRegister dst,
3736
                                const LogicVRegister& src1,
3737
                                const LogicVRegister& src2) {
3738
  SimVRegister temp1, temp2;
3739
  uxtl(vform, temp1, src1);
3740
  uxtl(vform, temp2, src2);
3741
  absdiff(vform, dst, temp1, temp2, false);
3742
  return dst;
3743
}
3744

3745

3746
LogicVRegister Simulator::uabdl2(VectorFormat vform,
3747
                                 LogicVRegister dst,
3748
                                 const LogicVRegister& src1,
3749
                                 const LogicVRegister& src2) {
3750
  SimVRegister temp1, temp2;
3751
  uxtl2(vform, temp1, src1);
3752
  uxtl2(vform, temp2, src2);
3753
  absdiff(vform, dst, temp1, temp2, false);
3754
  return dst;
3755
}
3756

3757

3758
LogicVRegister Simulator::sabdl(VectorFormat vform,
3759
                                LogicVRegister dst,
3760
                                const LogicVRegister& src1,
3761
                                const LogicVRegister& src2) {
3762
  SimVRegister temp1, temp2;
3763
  sxtl(vform, temp1, src1);
3764
  sxtl(vform, temp2, src2);
3765
  absdiff(vform, dst, temp1, temp2, true);
3766
  return dst;
3767
}
3768

3769

3770
LogicVRegister Simulator::sabdl2(VectorFormat vform,
3771
                                 LogicVRegister dst,
3772
                                 const LogicVRegister& src1,
3773
                                 const LogicVRegister& src2) {
3774
  SimVRegister temp1, temp2;
3775
  sxtl2(vform, temp1, src1);
3776
  sxtl2(vform, temp2, src2);
3777
  absdiff(vform, dst, temp1, temp2, true);
3778
  return dst;
3779
}
3780

3781

3782
LogicVRegister Simulator::umull(VectorFormat vform,
3783
                                LogicVRegister dst,
3784
                                const LogicVRegister& src1,
3785
                                const LogicVRegister& src2,
3786
                                bool is_2) {
3787
  SimVRegister temp1, temp2;
3788
  uxtl(vform, temp1, src1, is_2);
3789
  uxtl(vform, temp2, src2, is_2);
3790
  mul(vform, dst, temp1, temp2);
3791
  return dst;
3792
}
3793

3794

3795
LogicVRegister Simulator::umull2(VectorFormat vform,
3796
                                 LogicVRegister dst,
3797
                                 const LogicVRegister& src1,
3798
                                 const LogicVRegister& src2) {
3799
  return umull(vform, dst, src1, src2, /* is_2 = */ true);
3800
}
3801

3802

3803
LogicVRegister Simulator::smull(VectorFormat vform,
3804
                                LogicVRegister dst,
3805
                                const LogicVRegister& src1,
3806
                                const LogicVRegister& src2,
3807
                                bool is_2) {
3808
  SimVRegister temp1, temp2;
3809
  sxtl(vform, temp1, src1, is_2);
3810
  sxtl(vform, temp2, src2, is_2);
3811
  mul(vform, dst, temp1, temp2);
3812
  return dst;
3813
}
3814

3815

3816
LogicVRegister Simulator::smull2(VectorFormat vform,
3817
                                 LogicVRegister dst,
3818
                                 const LogicVRegister& src1,
3819
                                 const LogicVRegister& src2) {
3820
  return smull(vform, dst, src1, src2, /* is_2 = */ true);
3821
}
3822

3823

3824
LogicVRegister Simulator::umlsl(VectorFormat vform,
3825
                                LogicVRegister dst,
3826
                                const LogicVRegister& src1,
3827
                                const LogicVRegister& src2,
3828
                                bool is_2) {
3829
  SimVRegister temp1, temp2;
3830
  uxtl(vform, temp1, src1, is_2);
3831
  uxtl(vform, temp2, src2, is_2);
3832
  mls(vform, dst, dst, temp1, temp2);
3833
  return dst;
3834
}
3835

3836

3837
LogicVRegister Simulator::umlsl2(VectorFormat vform,
3838
                                 LogicVRegister dst,
3839
                                 const LogicVRegister& src1,
3840
                                 const LogicVRegister& src2) {
3841
  return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3842
}
3843

3844

3845
LogicVRegister Simulator::smlsl(VectorFormat vform,
3846
                                LogicVRegister dst,
3847
                                const LogicVRegister& src1,
3848
                                const LogicVRegister& src2,
3849
                                bool is_2) {
3850
  SimVRegister temp1, temp2;
3851
  sxtl(vform, temp1, src1, is_2);
3852
  sxtl(vform, temp2, src2, is_2);
3853
  mls(vform, dst, dst, temp1, temp2);
3854
  return dst;
3855
}
3856

3857

3858
LogicVRegister Simulator::smlsl2(VectorFormat vform,
3859
                                 LogicVRegister dst,
3860
                                 const LogicVRegister& src1,
3861
                                 const LogicVRegister& src2) {
3862
  return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3863
}
3864

3865

3866
LogicVRegister Simulator::umlal(VectorFormat vform,
3867
                                LogicVRegister dst,
3868
                                const LogicVRegister& src1,
3869
                                const LogicVRegister& src2,
3870
                                bool is_2) {
3871
  SimVRegister temp1, temp2;
3872
  uxtl(vform, temp1, src1, is_2);
3873
  uxtl(vform, temp2, src2, is_2);
3874
  mla(vform, dst, dst, temp1, temp2);
3875
  return dst;
3876
}
3877

3878

3879
LogicVRegister Simulator::umlal2(VectorFormat vform,
3880
                                 LogicVRegister dst,
3881
                                 const LogicVRegister& src1,
3882
                                 const LogicVRegister& src2) {
3883
  return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3884
}
3885

3886

3887
LogicVRegister Simulator::smlal(VectorFormat vform,
3888
                                LogicVRegister dst,
3889
                                const LogicVRegister& src1,
3890
                                const LogicVRegister& src2,
3891
                                bool is_2) {
3892
  SimVRegister temp1, temp2;
3893
  sxtl(vform, temp1, src1, is_2);
3894
  sxtl(vform, temp2, src2, is_2);
3895
  mla(vform, dst, dst, temp1, temp2);
3896
  return dst;
3897
}
3898

3899

3900
LogicVRegister Simulator::smlal2(VectorFormat vform,
3901
                                 LogicVRegister dst,
3902
                                 const LogicVRegister& src1,
3903
                                 const LogicVRegister& src2) {
3904
  return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3905
}
3906

3907

3908
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3909
                                  LogicVRegister dst,
3910
                                  const LogicVRegister& src1,
3911
                                  const LogicVRegister& src2,
3912
                                  bool is_2) {
3913
  SimVRegister temp;
3914
  LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3915
  return add(vform, dst, dst, product).SignedSaturate(vform);
3916
}
3917

3918

3919
LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3920
                                   LogicVRegister dst,
3921
                                   const LogicVRegister& src1,
3922
                                   const LogicVRegister& src2) {
3923
  return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3924
}
3925

3926

3927
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3928
                                  LogicVRegister dst,
3929
                                  const LogicVRegister& src1,
3930
                                  const LogicVRegister& src2,
3931
                                  bool is_2) {
3932
  SimVRegister temp;
3933
  LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3934
  return sub(vform, dst, dst, product).SignedSaturate(vform);
3935
}
3936

3937

3938
LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3939
                                   LogicVRegister dst,
3940
                                   const LogicVRegister& src1,
3941
                                   const LogicVRegister& src2) {
3942
  return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3943
}
3944

3945

3946
LogicVRegister Simulator::sqdmull(VectorFormat vform,
3947
                                  LogicVRegister dst,
3948
                                  const LogicVRegister& src1,
3949
                                  const LogicVRegister& src2,
3950
                                  bool is_2) {
3951
  SimVRegister temp;
3952
  LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3953
  return add(vform, dst, product, product).SignedSaturate(vform);
3954
}
3955

3956

3957
LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3958
                                   LogicVRegister dst,
3959
                                   const LogicVRegister& src1,
3960
                                   const LogicVRegister& src2) {
3961
  return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3962
}
3963

3964
LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3965
                                   LogicVRegister dst,
3966
                                   const LogicVRegister& src1,
3967
                                   const LogicVRegister& src2,
3968
                                   bool round) {
3969
  int esize = LaneSizeInBitsFromFormat(vform);
3970

3971
  SimVRegister temp_lo, temp_hi;
3972

3973
  // Compute low and high multiplication results.
3974
  mul(vform, temp_lo, src1, src2);
3975
  smulh(vform, temp_hi, src1, src2);
3976

3977
  // Double by shifting high half, and adding in most-significant bit of low
3978
  // half.
3979
  shl(vform, temp_hi, temp_hi, 1);
3980
  usra(vform, temp_hi, temp_lo, esize - 1);
3981

3982
  if (round) {
3983
    // Add the second (due to doubling) most-significant bit of the low half
3984
    // into the result.
3985
    shl(vform, temp_lo, temp_lo, 1);
3986
    usra(vform, temp_hi, temp_lo, esize - 1);
3987
  }
3988

3989
  SimPRegister not_sat;
3990
  LogicPRegister ptemp(not_sat);
3991
  dst.ClearForWrite(vform);
3992
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3993
    // Saturation only occurs when src1 = src2 = minimum representable value.
3994
    // Check this as a special case.
3995
    ptemp.SetActive(vform, i, true);
3996
    if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3997
        (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3998
      ptemp.SetActive(vform, i, false);
3999
    }
4000
    dst.SetInt(vform, i, MaxIntFromFormat(vform));
4001
  }
4002

4003
  mov_merging(vform, dst, not_sat, temp_hi);
4004
  return dst;
4005
}
4006

4007

4008
LogicVRegister Simulator::dot(VectorFormat vform,
4009
                              LogicVRegister dst,
4010
                              const LogicVRegister& src1,
4011
                              const LogicVRegister& src2,
4012
                              bool is_src1_signed,
4013
                              bool is_src2_signed) {
4014
  VectorFormat quarter_vform =
4015
      VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4016

4017
  dst.ClearForWrite(vform);
4018
  for (int e = 0; e < LaneCountFromFormat(vform); e++) {
4019
    uint64_t result = 0;
4020
    int64_t element1, element2;
4021
    for (int i = 0; i < 4; i++) {
4022
      int index = 4 * e + i;
4023
      if (is_src1_signed) {
4024
        element1 = src1.Int(quarter_vform, index);
4025
      } else {
4026
        element1 = src1.Uint(quarter_vform, index);
4027
      }
4028
      if (is_src2_signed) {
4029
        element2 = src2.Int(quarter_vform, index);
4030
      } else {
4031
        element2 = src2.Uint(quarter_vform, index);
4032
      }
4033
      result += element1 * element2;
4034
    }
4035
    dst.SetUint(vform, e, result + dst.Uint(vform, e));
4036
  }
4037
  return dst;
4038
}
4039

4040

4041
LogicVRegister Simulator::sdot(VectorFormat vform,
4042
                               LogicVRegister dst,
4043
                               const LogicVRegister& src1,
4044
                               const LogicVRegister& src2) {
4045
  return dot(vform, dst, src1, src2, true, true);
4046
}
4047

4048

4049
LogicVRegister Simulator::udot(VectorFormat vform,
4050
                               LogicVRegister dst,
4051
                               const LogicVRegister& src1,
4052
                               const LogicVRegister& src2) {
4053
  return dot(vform, dst, src1, src2, false, false);
4054
}
4055

4056
LogicVRegister Simulator::usdot(VectorFormat vform,
4057
                                LogicVRegister dst,
4058
                                const LogicVRegister& src1,
4059
                                const LogicVRegister& src2) {
4060
  return dot(vform, dst, src1, src2, false, true);
4061
}
4062

4063
LogicVRegister Simulator::cdot(VectorFormat vform,
4064
                               LogicVRegister dst,
4065
                               const LogicVRegister& acc,
4066
                               const LogicVRegister& src1,
4067
                               const LogicVRegister& src2,
4068
                               int rot) {
4069
  VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4070
  VectorFormat quarter_vform =
4071
      VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4072

4073
  int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4074
  int sel_b = 1 - sel_a;
4075
  int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4076

4077
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4078
    int64_t result = acc.Int(vform, i);
4079
    for (int j = 0; j < 2; j++) {
4080
      int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4081
      int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4082
      int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4083
      int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4084
      result += (r1 * r2) + (sub_i * i1 * i2);
4085
    }
4086
    dst.SetInt(vform, i, result);
4087
  }
4088
  return dst;
4089
}
4090

4091
LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4092
                                    LogicVRegister dst,
4093
                                    const LogicVRegister& srca,
4094
                                    const LogicVRegister& src1,
4095
                                    const LogicVRegister& src2,
4096
                                    int rot) {
4097
  SimVRegister src1_a, src1_b;
4098
  SimVRegister src2_a, src2_b;
4099
  SimVRegister srca_i, srca_r;
4100
  SimVRegister zero, temp;
4101
  zero.Clear();
4102

4103
  if ((rot == 0) || (rot == 180)) {
4104
    uzp1(vform, src1_a, src1, zero);
4105
    uzp1(vform, src2_a, src2, zero);
4106
    uzp2(vform, src2_b, src2, zero);
4107
  } else {
4108
    uzp2(vform, src1_a, src1, zero);
4109
    uzp2(vform, src2_a, src2, zero);
4110
    uzp1(vform, src2_b, src2, zero);
4111
  }
4112

4113
  uzp1(vform, srca_r, srca, zero);
4114
  uzp2(vform, srca_i, srca, zero);
4115

4116
  bool sub_r = (rot == 90) || (rot == 180);
4117
  bool sub_i = (rot == 180) || (rot == 270);
4118

4119
  const bool round = true;
4120
  sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4121
  sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4122
  zip1(vform, dst, srca_r, srca_i);
4123
  return dst;
4124
}
4125

4126
LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4127
                                    LogicVRegister dst,
4128
                                    const LogicVRegister& srca,
4129
                                    const LogicVRegister& src1,
4130
                                    const LogicVRegister& src2,
4131
                                    int index,
4132
                                    int rot) {
4133
  SimVRegister temp;
4134
  dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4135
  return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4136
}
4137

4138
LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4139
                                      LogicVRegister dst,
4140
                                      const LogicVRegister& src1,
4141
                                      const LogicVRegister& src2,
4142
                                      bool round,
4143
                                      bool sub_op) {
4144
  // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4145
  // To avoid this, we use:
4146
  //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4147
  // which is same as:
4148
  //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4149

4150
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4151
  int esize = kDRegSize;
4152
  vixl_uint128_t round_const, accum;
4153
  round_const.first = 0;
4154
  if (round) {
4155
    round_const.second = UINT64_C(1) << (esize - 2);
4156
  } else {
4157
    round_const.second = 0;
4158
  }
4159

4160
  dst.ClearForWrite(vform);
4161
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4162
    // Shift the whole value left by `esize - 1` bits.
4163
    accum.first = dst.Int(vform, i) >> 1;
4164
    accum.second = dst.Int(vform, i) << (esize - 1);
4165

4166
    vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4167

4168
    if (sub_op) {
4169
      product = Neg128(product);
4170
    }
4171
    accum = Add128(accum, product);
4172

4173
    // Perform rounding.
4174
    accum = Add128(accum, round_const);
4175

4176
    // Arithmetic shift the whole value right by `esize - 1` bits.
4177
    accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4178
    accum.first = -(accum.first >> (esize - 1));
4179

4180
    // Perform saturation.
4181
    bool is_pos = (accum.first == 0) ? true : false;
4182
    if (is_pos &&
4183
        (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4184
      accum.second = MaxIntFromFormat(vform);
4185
    } else if (!is_pos && (accum.second <
4186
                           static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4187
      accum.second = MinIntFromFormat(vform);
4188
    }
4189

4190
    dst.SetInt(vform, i, accum.second);
4191
  }
4192

4193
  return dst;
4194
}
4195

4196
LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4197
                                    LogicVRegister dst,
4198
                                    const LogicVRegister& src1,
4199
                                    const LogicVRegister& src2,
4200
                                    bool round,
4201
                                    bool sub_op) {
4202
  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4203
  // To avoid this, we use:
4204
  //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4205
  // which is same as:
4206
  //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4207

4208
  if (vform == kFormatVnD) {
4209
    return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4210
  }
4211

4212
  int esize = LaneSizeInBitsFromFormat(vform);
4213
  int round_const = round ? (1 << (esize - 2)) : 0;
4214
  int64_t accum;
4215

4216
  dst.ClearForWrite(vform);
4217
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4218
    accum = dst.Int(vform, i) << (esize - 1);
4219
    if (sub_op) {
4220
      accum -= src1.Int(vform, i) * src2.Int(vform, i);
4221
    } else {
4222
      accum += src1.Int(vform, i) * src2.Int(vform, i);
4223
    }
4224
    accum += round_const;
4225
    accum = accum >> (esize - 1);
4226

4227
    if (accum > MaxIntFromFormat(vform)) {
4228
      accum = MaxIntFromFormat(vform);
4229
    } else if (accum < MinIntFromFormat(vform)) {
4230
      accum = MinIntFromFormat(vform);
4231
    }
4232
    dst.SetInt(vform, i, accum);
4233
  }
4234
  return dst;
4235
}
4236

4237

4238
LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4239
                                   LogicVRegister dst,
4240
                                   const LogicVRegister& src1,
4241
                                   const LogicVRegister& src2,
4242
                                   bool round) {
4243
  return sqrdmlash(vform, dst, src1, src2, round, false);
4244
}
4245

4246

4247
LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4248
                                   LogicVRegister dst,
4249
                                   const LogicVRegister& src1,
4250
                                   const LogicVRegister& src2,
4251
                                   bool round) {
4252
  return sqrdmlash(vform, dst, src1, src2, round, true);
4253
}
4254

4255

4256
LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4257
                                  LogicVRegister dst,
4258
                                  const LogicVRegister& src1,
4259
                                  const LogicVRegister& src2) {
4260
  return sqrdmulh(vform, dst, src1, src2, false);
4261
}
4262

4263

4264
LogicVRegister Simulator::addhn(VectorFormat vform,
4265
                                LogicVRegister dst,
4266
                                const LogicVRegister& src1,
4267
                                const LogicVRegister& src2) {
4268
  SimVRegister temp;
4269
  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4270
  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4271
  return dst;
4272
}
4273

4274

4275
LogicVRegister Simulator::addhn2(VectorFormat vform,
4276
                                 LogicVRegister dst,
4277
                                 const LogicVRegister& src1,
4278
                                 const LogicVRegister& src2) {
4279
  SimVRegister temp;
4280
  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4281
  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4282
  return dst;
4283
}
4284

4285

4286
LogicVRegister Simulator::raddhn(VectorFormat vform,
4287
                                 LogicVRegister dst,
4288
                                 const LogicVRegister& src1,
4289
                                 const LogicVRegister& src2) {
4290
  SimVRegister temp;
4291
  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4292
  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4293
  return dst;
4294
}
4295

4296

4297
LogicVRegister Simulator::raddhn2(VectorFormat vform,
4298
                                  LogicVRegister dst,
4299
                                  const LogicVRegister& src1,
4300
                                  const LogicVRegister& src2) {
4301
  SimVRegister temp;
4302
  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4303
  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4304
  return dst;
4305
}
4306

4307

4308
LogicVRegister Simulator::subhn(VectorFormat vform,
4309
                                LogicVRegister dst,
4310
                                const LogicVRegister& src1,
4311
                                const LogicVRegister& src2) {
4312
  SimVRegister temp;
4313
  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4314
  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4315
  return dst;
4316
}
4317

4318

4319
LogicVRegister Simulator::subhn2(VectorFormat vform,
4320
                                 LogicVRegister dst,
4321
                                 const LogicVRegister& src1,
4322
                                 const LogicVRegister& src2) {
4323
  SimVRegister temp;
4324
  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4325
  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4326
  return dst;
4327
}
4328

4329

4330
LogicVRegister Simulator::rsubhn(VectorFormat vform,
4331
                                 LogicVRegister dst,
4332
                                 const LogicVRegister& src1,
4333
                                 const LogicVRegister& src2) {
4334
  SimVRegister temp;
4335
  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4336
  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4337
  return dst;
4338
}
4339

4340

4341
LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4342
                                  LogicVRegister dst,
4343
                                  const LogicVRegister& src1,
4344
                                  const LogicVRegister& src2) {
4345
  SimVRegister temp;
4346
  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4347
  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4348
  return dst;
4349
}
4350

4351

4352
LogicVRegister Simulator::trn1(VectorFormat vform,
4353
                               LogicVRegister dst,
4354
                               const LogicVRegister& src1,
4355
                               const LogicVRegister& src2) {
4356
  uint64_t result[kZRegMaxSizeInBytes] = {};
4357
  int lane_count = LaneCountFromFormat(vform);
4358
  int pairs = lane_count / 2;
4359
  for (int i = 0; i < pairs; ++i) {
4360
    result[2 * i] = src1.Uint(vform, 2 * i);
4361
    result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4362
  }
4363

4364
  dst.ClearForWrite(vform);
4365
  for (int i = 0; i < lane_count; ++i) {
4366
    dst.SetUint(vform, i, result[i]);
4367
  }
4368
  return dst;
4369
}
4370

4371

4372
LogicVRegister Simulator::trn2(VectorFormat vform,
4373
                               LogicVRegister dst,
4374
                               const LogicVRegister& src1,
4375
                               const LogicVRegister& src2) {
4376
  uint64_t result[kZRegMaxSizeInBytes] = {};
4377
  int lane_count = LaneCountFromFormat(vform);
4378
  int pairs = lane_count / 2;
4379
  for (int i = 0; i < pairs; ++i) {
4380
    result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4381
    result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4382
  }
4383

4384
  dst.ClearForWrite(vform);
4385
  for (int i = 0; i < lane_count; ++i) {
4386
    dst.SetUint(vform, i, result[i]);
4387
  }
4388
  return dst;
4389
}
4390

4391

4392
LogicVRegister Simulator::zip1(VectorFormat vform,
4393
                               LogicVRegister dst,
4394
                               const LogicVRegister& src1,
4395
                               const LogicVRegister& src2) {
4396
  uint64_t result[kZRegMaxSizeInBytes] = {};
4397
  int lane_count = LaneCountFromFormat(vform);
4398
  int pairs = lane_count / 2;
4399
  for (int i = 0; i < pairs; ++i) {
4400
    result[2 * i] = src1.Uint(vform, i);
4401
    result[(2 * i) + 1] = src2.Uint(vform, i);
4402
  }
4403

4404
  dst.ClearForWrite(vform);
4405
  for (int i = 0; i < lane_count; ++i) {
4406
    dst.SetUint(vform, i, result[i]);
4407
  }
4408
  return dst;
4409
}
4410

4411

4412
LogicVRegister Simulator::zip2(VectorFormat vform,
4413
                               LogicVRegister dst,
4414
                               const LogicVRegister& src1,
4415
                               const LogicVRegister& src2) {
4416
  uint64_t result[kZRegMaxSizeInBytes] = {};
4417
  int lane_count = LaneCountFromFormat(vform);
4418
  int pairs = lane_count / 2;
4419
  for (int i = 0; i < pairs; ++i) {
4420
    result[2 * i] = src1.Uint(vform, pairs + i);
4421
    result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4422
  }
4423

4424
  dst.ClearForWrite(vform);
4425
  for (int i = 0; i < lane_count; ++i) {
4426
    dst.SetUint(vform, i, result[i]);
4427
  }
4428
  return dst;
4429
}
4430

4431

4432
LogicVRegister Simulator::uzp1(VectorFormat vform,
4433
                               LogicVRegister dst,
4434
                               const LogicVRegister& src1,
4435
                               const LogicVRegister& src2) {
4436
  uint64_t result[kZRegMaxSizeInBytes * 2];
4437
  int lane_count = LaneCountFromFormat(vform);
4438
  for (int i = 0; i < lane_count; ++i) {
4439
    result[i] = src1.Uint(vform, i);
4440
    result[lane_count + i] = src2.Uint(vform, i);
4441
  }
4442

4443
  dst.ClearForWrite(vform);
4444
  for (int i = 0; i < lane_count; ++i) {
4445
    dst.SetUint(vform, i, result[2 * i]);
4446
  }
4447
  return dst;
4448
}
4449

4450

4451
LogicVRegister Simulator::uzp2(VectorFormat vform,
4452
                               LogicVRegister dst,
4453
                               const LogicVRegister& src1,
4454
                               const LogicVRegister& src2) {
4455
  uint64_t result[kZRegMaxSizeInBytes * 2];
4456
  int lane_count = LaneCountFromFormat(vform);
4457
  for (int i = 0; i < lane_count; ++i) {
4458
    result[i] = src1.Uint(vform, i);
4459
    result[lane_count + i] = src2.Uint(vform, i);
4460
  }
4461

4462
  dst.ClearForWrite(vform);
4463
  for (int i = 0; i < lane_count; ++i) {
4464
    dst.SetUint(vform, i, result[(2 * i) + 1]);
4465
  }
4466
  return dst;
4467
}
4468

4469
LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4470
                                                LogicVRegister dst,
4471
                                                const LogicVRegister& src) {
4472
  // Interleave the top and bottom half of a vector, ie. for a vector:
4473
  //
4474
  //   [ ... | F | D | B | ... | E | C | A ]
4475
  //
4476
  // where B is the first element in the top half of the vector, produce a
4477
  // result vector:
4478
  //
4479
  //   [ ... | ... | F | E | D | C | B | A ]
4480

4481
  uint64_t result[kZRegMaxSizeInBytes] = {};
4482
  int lane_count = LaneCountFromFormat(vform);
4483
  for (int i = 0; i < lane_count; i += 2) {
4484
    result[i] = src.Uint(vform, i / 2);
4485
    result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4486
  }
4487
  dst.SetUintArray(vform, result);
4488
  return dst;
4489
}
4490

4491
template <typename T>
4492
T Simulator::FPNeg(T op) {
4493
  return -op;
4494
}
4495

4496
template <typename T>
4497
T Simulator::FPAdd(T op1, T op2) {
4498
  T result = FPProcessNaNs(op1, op2);
4499
  if (IsNaN(result)) {
4500
    return result;
4501
  }
4502

4503
  if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4504
    // inf + -inf returns the default NaN.
4505
    FPProcessException();
4506
    return FPDefaultNaN<T>();
4507
  } else {
4508
    // Other cases should be handled by standard arithmetic.
4509
    return op1 + op2;
4510
  }
4511
}
4512

4513

4514
template <typename T>
4515
T Simulator::FPSub(T op1, T op2) {
4516
  // NaNs should be handled elsewhere.
4517
  VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4518

4519
  if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4520
    // inf - inf returns the default NaN.
4521
    FPProcessException();
4522
    return FPDefaultNaN<T>();
4523
  } else {
4524
    // Other cases should be handled by standard arithmetic.
4525
    return op1 - op2;
4526
  }
4527
}
4528

4529
template <typename T>
4530
T Simulator::FPMulNaNs(T op1, T op2) {
4531
  T result = FPProcessNaNs(op1, op2);
4532
  return IsNaN(result) ? result : FPMul(op1, op2);
4533
}
4534

4535
template <typename T>
4536
T Simulator::FPMul(T op1, T op2) {
4537
  // NaNs should be handled elsewhere.
4538
  VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4539

4540
  if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4541
    // inf * 0.0 returns the default NaN.
4542
    FPProcessException();
4543
    return FPDefaultNaN<T>();
4544
  } else {
4545
    // Other cases should be handled by standard arithmetic.
4546
    return op1 * op2;
4547
  }
4548
}
4549

4550

4551
template <typename T>
4552
T Simulator::FPMulx(T op1, T op2) {
4553
  if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4554
    // inf * 0.0 returns +/-2.0.
4555
    T two = 2.0;
4556
    return copysign(1.0, op1) * copysign(1.0, op2) * two;
4557
  }
4558
  return FPMul(op1, op2);
4559
}
4560

4561

4562
template <typename T>
4563
T Simulator::FPMulAdd(T a, T op1, T op2) {
4564
  T result = FPProcessNaNs3(a, op1, op2);
4565

4566
  T sign_a = copysign(1.0, a);
4567
  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4568
  bool isinf_prod = IsInf(op1) || IsInf(op2);
4569
  bool operation_generates_nan =
4570
      (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4571
      (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4572
      (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4573

4574
  if (IsNaN(result)) {
4575
    // Generated NaNs override quiet NaNs propagated from a.
4576
    if (operation_generates_nan && IsQuietNaN(a)) {
4577
      FPProcessException();
4578
      return FPDefaultNaN<T>();
4579
    } else {
4580
      return result;
4581
    }
4582
  }
4583

4584
  // If the operation would produce a NaN, return the default NaN.
4585
  if (operation_generates_nan) {
4586
    FPProcessException();
4587
    return FPDefaultNaN<T>();
4588
  }
4589

4590
  // Work around broken fma implementations for exact zero results: The sign of
4591
  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4592
  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4593
    return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4594
  }
4595

4596
  result = FusedMultiplyAdd(op1, op2, a);
4597
  VIXL_ASSERT(!IsNaN(result));
4598

4599
  // Work around broken fma implementations for rounded zero results: If a is
4600
  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4601
  if ((a == 0.0) && (result == 0.0)) {
4602
    return copysign(0.0, sign_prod);
4603
  }
4604

4605
  return result;
4606
}
4607

4608

4609
template <typename T>
4610
T Simulator::FPDiv(T op1, T op2) {
4611
  // NaNs should be handled elsewhere.
4612
  VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4613

4614
  if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4615
    // inf / inf and 0.0 / 0.0 return the default NaN.
4616
    FPProcessException();
4617
    return FPDefaultNaN<T>();
4618
  } else {
4619
    if (op2 == 0.0) {
4620
      FPProcessException();
4621
      if (!IsNaN(op1)) {
4622
        double op1_sign = copysign(1.0, op1);
4623
        double op2_sign = copysign(1.0, op2);
4624
        return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4625
      }
4626
    }
4627

4628
    // Other cases should be handled by standard arithmetic.
4629
    return op1 / op2;
4630
  }
4631
}
4632

4633

4634
template <typename T>
4635
T Simulator::FPSqrt(T op) {
4636
  if (IsNaN(op)) {
4637
    return FPProcessNaN(op);
4638
  } else if (op < T(0.0)) {
4639
    FPProcessException();
4640
    return FPDefaultNaN<T>();
4641
  } else {
4642
    return sqrt(op);
4643
  }
4644
}
4645

4646

4647
template <typename T>
4648
T Simulator::FPMax(T a, T b) {
4649
  T result = FPProcessNaNs(a, b);
4650
  if (IsNaN(result)) return result;
4651

4652
  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4653
    // a and b are zero, and the sign differs: return +0.0.
4654
    return 0.0;
4655
  } else {
4656
    return (a > b) ? a : b;
4657
  }
4658
}
4659

4660

4661
template <typename T>
4662
T Simulator::FPMaxNM(T a, T b) {
4663
  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4664
    a = kFP64NegativeInfinity;
4665
  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4666
    b = kFP64NegativeInfinity;
4667
  }
4668

4669
  T result = FPProcessNaNs(a, b);
4670
  return IsNaN(result) ? result : FPMax(a, b);
4671
}
4672

4673

4674
template <typename T>
4675
T Simulator::FPMin(T a, T b) {
4676
  T result = FPProcessNaNs(a, b);
4677
  if (IsNaN(result)) return result;
4678

4679
  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4680
    // a and b are zero, and the sign differs: return -0.0.
4681
    return -0.0;
4682
  } else {
4683
    return (a < b) ? a : b;
4684
  }
4685
}
4686

4687

4688
template <typename T>
4689
T Simulator::FPMinNM(T a, T b) {
4690
  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4691
    a = kFP64PositiveInfinity;
4692
  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4693
    b = kFP64PositiveInfinity;
4694
  }
4695

4696
  T result = FPProcessNaNs(a, b);
4697
  return IsNaN(result) ? result : FPMin(a, b);
4698
}
4699

4700

4701
template <typename T>
4702
T Simulator::FPRecipStepFused(T op1, T op2) {
4703
  const T two = 2.0;
4704
  if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4705
    return two;
4706
  } else if (IsInf(op1) || IsInf(op2)) {
4707
    // Return +inf if signs match, otherwise -inf.
4708
    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4709
                                          : kFP64NegativeInfinity;
4710
  } else {
4711
    return FusedMultiplyAdd(op1, op2, two);
4712
  }
4713
}
4714

4715
template <typename T>
4716
bool IsNormal(T value) {
4717
  return std::isnormal(value);
4718
}
4719

4720
template <>
4721
bool IsNormal(SimFloat16 value) {
4722
  uint16_t rawbits = Float16ToRawbits(value);
4723
  uint16_t exp_mask = 0x7c00;
4724
  // Check that the exponent is neither all zeroes or all ones.
4725
  return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4726
}
4727

4728

4729
template <typename T>
4730
T Simulator::FPRSqrtStepFused(T op1, T op2) {
4731
  const T one_point_five = 1.5;
4732
  const T two = 2.0;
4733

4734
  if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4735
    return one_point_five;
4736
  } else if (IsInf(op1) || IsInf(op2)) {
4737
    // Return +inf if signs match, otherwise -inf.
4738
    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4739
                                          : kFP64NegativeInfinity;
4740
  } else {
4741
    // The multiply-add-halve operation must be fully fused, so avoid interim
4742
    // rounding by checking which operand can be losslessly divided by two
4743
    // before doing the multiply-add.
4744
    if (IsNormal(op1 / two)) {
4745
      return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4746
    } else if (IsNormal(op2 / two)) {
4747
      return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4748
    } else {
4749
      // Neither operand is normal after halving: the result is dominated by
4750
      // the addition term, so just return that.
4751
      return one_point_five;
4752
    }
4753
  }
4754
}
4755

4756
int32_t Simulator::FPToFixedJS(double value) {
4757
  // The Z-flag is set when the conversion from double precision floating-point
4758
  // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4759
  // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4760
  // Z-flag is unset.
4761
  int Z = 1;
4762
  int32_t result;
4763

4764
  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4765
      (value == kFP64NegativeInfinity)) {
4766
    // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4767
    // unset the Z-flag.
4768
    result = 0.0;
4769
    if ((value != 0.0) || std::signbit(value)) {
4770
      Z = 0;
4771
    }
4772
  } else if (std::isnan(value)) {
4773
    // NaN values unset the Z-flag and set the result to 0.
4774
    FPProcessNaN(value);
4775
    result = 0;
4776
    Z = 0;
4777
  } else {
4778
    // All other values are converted to an integer representation, rounded
4779
    // toward zero.
4780
    double int_result = std::floor(value);
4781
    double error = value - int_result;
4782

4783
    if ((error != 0.0) && (int_result < 0.0)) {
4784
      int_result++;
4785
    }
4786

4787
    // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4788
    // write a one-liner with std::round, but the behaviour on ties is incorrect
4789
    // for our purposes.
4790
    double mod_const = static_cast<double>(UINT64_C(1) << 32);
4791
    double mod_error =
4792
        (int_result / mod_const) - std::floor(int_result / mod_const);
4793
    double constrained;
4794
    if (mod_error == 0.5) {
4795
      constrained = INT32_MIN;
4796
    } else {
4797
      constrained = int_result - mod_const * round(int_result / mod_const);
4798
    }
4799

4800
    VIXL_ASSERT(std::floor(constrained) == constrained);
4801
    VIXL_ASSERT(constrained >= INT32_MIN);
4802
    VIXL_ASSERT(constrained <= INT32_MAX);
4803

4804
    // Take the bottom 32 bits of the result as a 32-bit integer.
4805
    result = static_cast<int32_t>(constrained);
4806

4807
    if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4808
        (error != 0.0)) {
4809
      // If the integer result is out of range or the conversion isn't exact,
4810
      // take exception and unset the Z-flag.
4811
      FPProcessException();
4812
      Z = 0;
4813
    }
4814
  }
4815

4816
  ReadNzcv().SetN(0);
4817
  ReadNzcv().SetZ(Z);
4818
  ReadNzcv().SetC(0);
4819
  ReadNzcv().SetV(0);
4820

4821
  return result;
4822
}
4823

4824
double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4825
  VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4826
              (value != kFP64NegativeInfinity));
4827
  VIXL_ASSERT(!IsNaN(value));
4828

4829
  double int_result = std::floor(value);
4830
  double error = value - int_result;
4831
  switch (round_mode) {
4832
    case FPTieAway: {
4833
      // Take care of correctly handling the range ]-0.5, -0.0], which must
4834
      // yield -0.0.
4835
      if ((-0.5 < value) && (value < 0.0)) {
4836
        int_result = -0.0;
4837

4838
      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4839
        // If the error is greater than 0.5, or is equal to 0.5 and the integer
4840
        // result is positive, round up.
4841
        int_result++;
4842
      }
4843
      break;
4844
    }
4845
    case FPTieEven: {
4846
      // Take care of correctly handling the range [-0.5, -0.0], which must
4847
      // yield -0.0.
4848
      if ((-0.5 <= value) && (value < 0.0)) {
4849
        int_result = -0.0;
4850

4851
        // If the error is greater than 0.5, or is equal to 0.5 and the integer
4852
        // result is odd, round up.
4853
      } else if ((error > 0.5) ||
4854
                 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4855
        int_result++;
4856
      }
4857
      break;
4858
    }
4859
    case FPZero: {
4860
      // If value>0 then we take floor(value)
4861
      // otherwise, ceil(value).
4862
      if (value < 0) {
4863
        int_result = ceil(value);
4864
      }
4865
      break;
4866
    }
4867
    case FPNegativeInfinity: {
4868
      // We always use floor(value).
4869
      break;
4870
    }
4871
    case FPPositiveInfinity: {
4872
      // Take care of correctly handling the range ]-1.0, -0.0], which must
4873
      // yield -0.0.
4874
      if ((-1.0 < value) && (value < 0.0)) {
4875
        int_result = -0.0;
4876

4877
        // If the error is non-zero, round up.
4878
      } else if (error > 0.0) {
4879
        int_result++;
4880
      }
4881
      break;
4882
    }
4883
    default:
4884
      VIXL_UNIMPLEMENTED();
4885
  }
4886
  return int_result;
4887
}
4888

4889
double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4890
  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4891
      (value == kFP64NegativeInfinity)) {
4892
    return value;
4893
  } else if (IsNaN(value)) {
4894
    return FPProcessNaN(value);
4895
  }
4896
  return FPRoundIntCommon(value, round_mode);
4897
}
4898

4899
double Simulator::FPRoundInt(double value,
4900
                             FPRounding round_mode,
4901
                             FrintMode frint_mode) {
4902
  if (frint_mode == kFrintToInteger) {
4903
    return FPRoundInt(value, round_mode);
4904
  }
4905

4906
  VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4907

4908
  if (value == 0.0) {
4909
    return value;
4910
  }
4911

4912
  if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4913
      IsNaN(value)) {
4914
    if (frint_mode == kFrintToInt32) {
4915
      return INT32_MIN;
4916
    } else {
4917
      return INT64_MIN;
4918
    }
4919
  }
4920

4921
  double result = FPRoundIntCommon(value, round_mode);
4922

4923
  // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4924
  // representable as a double, and is rounded to (INT64_MAX + 1) when
4925
  // converted. To avoid this, we compare `result >= int64_max_plus_one`
4926
  // instead; this is safe because `result` is known to be integral, and
4927
  // `int64_max_plus_one` is exactly representable as a double.
4928
  constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4929
  VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4930
                         int64_max_plus_one)) == int64_max_plus_one);
4931

4932
  if (frint_mode == kFrintToInt32) {
4933
    if ((result > INT32_MAX) || (result < INT32_MIN)) {
4934
      return INT32_MIN;
4935
    }
4936
  } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4937
    return INT64_MIN;
4938
  }
4939

4940
  return result;
4941
}
4942

4943
int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4944
  value = FPRoundInt(value, rmode);
4945
  if (value >= kHMaxInt) {
4946
    return kHMaxInt;
4947
  } else if (value < kHMinInt) {
4948
    return kHMinInt;
4949
  }
4950
  return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4951
}
4952

4953

4954
int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4955
  value = FPRoundInt(value, rmode);
4956
  if (value >= kWMaxInt) {
4957
    return kWMaxInt;
4958
  } else if (value < kWMinInt) {
4959
    return kWMinInt;
4960
  }
4961
  return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4962
}
4963

4964

4965
int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4966
  value = FPRoundInt(value, rmode);
4967
  // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4968
  // as a result of kMaxInt not being representable as a double.
4969
  if (value >= 9223372036854775808.) {
4970
    return kXMaxInt;
4971
  } else if (value < kXMinInt) {
4972
    return kXMinInt;
4973
  }
4974
  return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4975
}
4976

4977

4978
uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4979
  value = FPRoundInt(value, rmode);
4980
  if (value >= kHMaxUInt) {
4981
    return kHMaxUInt;
4982
  } else if (value < 0.0) {
4983
    return 0;
4984
  }
4985
  return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4986
}
4987

4988

4989
uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4990
  value = FPRoundInt(value, rmode);
4991
  if (value >= kWMaxUInt) {
4992
    return kWMaxUInt;
4993
  } else if (value < 0.0) {
4994
    return 0;
4995
  }
4996
  return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4997
}
4998

4999

5000
uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
5001
  value = FPRoundInt(value, rmode);
5002
  // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
5003
  // as a result of kMaxUInt not being representable as a double.
5004
  if (value >= 18446744073709551616.) {
5005
    return kXMaxUInt;
5006
  } else if (value < 0.0) {
5007
    return 0;
5008
  }
5009
  return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
5010
}
5011

5012

5013
#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
5014
  template <typename T>                                          \
5015
  LogicVRegister Simulator::FN(VectorFormat vform,               \
5016
                               LogicVRegister dst,               \
5017
                               const LogicVRegister& src1,       \
5018
                               const LogicVRegister& src2) {     \
5019
    dst.ClearForWrite(vform);                                    \
5020
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
5021
      T op1 = src1.Float<T>(i);                                  \
5022
      T op2 = src2.Float<T>(i);                                  \
5023
      T result;                                                  \
5024
      if (PROCNAN) {                                             \
5025
        result = FPProcessNaNs(op1, op2);                        \
5026
        if (!IsNaN(result)) {                                    \
5027
          result = OP(op1, op2);                                 \
5028
        }                                                        \
5029
      } else {                                                   \
5030
        result = OP(op1, op2);                                   \
5031
      }                                                          \
5032
      dst.SetFloat(vform, i, result);                            \
5033
    }                                                            \
5034
    return dst;                                                  \
5035
  }                                                              \
5036
                                                                 \
5037
  LogicVRegister Simulator::FN(VectorFormat vform,               \
5038
                               LogicVRegister dst,               \
5039
                               const LogicVRegister& src1,       \
5040
                               const LogicVRegister& src2) {     \
5041
    if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
5042
      FN<SimFloat16>(vform, dst, src1, src2);                    \
5043
    } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
5044
      FN<float>(vform, dst, src1, src2);                         \
5045
    } else {                                                     \
5046
      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5047
      FN<double>(vform, dst, src1, src2);                        \
5048
    }                                                            \
5049
    return dst;                                                  \
5050
  }
5051
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5052
#undef DEFINE_NEON_FP_VECTOR_OP
5053

5054

5055
LogicVRegister Simulator::fnmul(VectorFormat vform,
5056
                                LogicVRegister dst,
5057
                                const LogicVRegister& src1,
5058
                                const LogicVRegister& src2) {
5059
  SimVRegister temp;
5060
  LogicVRegister product = fmul(vform, temp, src1, src2);
5061
  return fneg(vform, dst, product);
5062
}
5063

5064

5065
template <typename T>
5066
LogicVRegister Simulator::frecps(VectorFormat vform,
5067
                                 LogicVRegister dst,
5068
                                 const LogicVRegister& src1,
5069
                                 const LogicVRegister& src2) {
5070
  dst.ClearForWrite(vform);
5071
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5072
    T op1 = -src1.Float<T>(i);
5073
    T op2 = src2.Float<T>(i);
5074
    T result = FPProcessNaNs(op1, op2);
5075
    dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5076
  }
5077
  return dst;
5078
}
5079

5080

5081
LogicVRegister Simulator::frecps(VectorFormat vform,
5082
                                 LogicVRegister dst,
5083
                                 const LogicVRegister& src1,
5084
                                 const LogicVRegister& src2) {
5085
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5086
    frecps<SimFloat16>(vform, dst, src1, src2);
5087
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5088
    frecps<float>(vform, dst, src1, src2);
5089
  } else {
5090
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5091
    frecps<double>(vform, dst, src1, src2);
5092
  }
5093
  return dst;
5094
}
5095

5096

5097
template <typename T>
5098
LogicVRegister Simulator::frsqrts(VectorFormat vform,
5099
                                  LogicVRegister dst,
5100
                                  const LogicVRegister& src1,
5101
                                  const LogicVRegister& src2) {
5102
  dst.ClearForWrite(vform);
5103
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5104
    T op1 = -src1.Float<T>(i);
5105
    T op2 = src2.Float<T>(i);
5106
    T result = FPProcessNaNs(op1, op2);
5107
    dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5108
  }
5109
  return dst;
5110
}
5111

5112

5113
LogicVRegister Simulator::frsqrts(VectorFormat vform,
5114
                                  LogicVRegister dst,
5115
                                  const LogicVRegister& src1,
5116
                                  const LogicVRegister& src2) {
5117
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5118
    frsqrts<SimFloat16>(vform, dst, src1, src2);
5119
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5120
    frsqrts<float>(vform, dst, src1, src2);
5121
  } else {
5122
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5123
    frsqrts<double>(vform, dst, src1, src2);
5124
  }
5125
  return dst;
5126
}
5127

5128

5129
template <typename T>
5130
LogicVRegister Simulator::fcmp(VectorFormat vform,
5131
                               LogicVRegister dst,
5132
                               const LogicVRegister& src1,
5133
                               const LogicVRegister& src2,
5134
                               Condition cond) {
5135
  dst.ClearForWrite(vform);
5136
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5137
    bool result = false;
5138
    T op1 = src1.Float<T>(i);
5139
    T op2 = src2.Float<T>(i);
5140
    bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5141

5142
    switch (cond) {
5143
      case eq:
5144
        result = (op1 == op2);
5145
        break;
5146
      case ge:
5147
        result = (op1 >= op2);
5148
        break;
5149
      case gt:
5150
        result = (op1 > op2);
5151
        break;
5152
      case le:
5153
        result = (op1 <= op2);
5154
        break;
5155
      case lt:
5156
        result = (op1 < op2);
5157
        break;
5158
      case ne:
5159
        result = (op1 != op2);
5160
        break;
5161
      case uo:
5162
        result = unordered;
5163
        break;
5164
      default:
5165
        // Other conditions are defined in terms of those above.
5166
        VIXL_UNREACHABLE();
5167
        break;
5168
    }
5169

5170
    if (result && unordered) {
5171
      // Only `uo` and `ne` can be true for unordered comparisons.
5172
      VIXL_ASSERT((cond == uo) || (cond == ne));
5173
    }
5174

5175
    dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5176
  }
5177
  return dst;
5178
}
5179

5180

5181
LogicVRegister Simulator::fcmp(VectorFormat vform,
5182
                               LogicVRegister dst,
5183
                               const LogicVRegister& src1,
5184
                               const LogicVRegister& src2,
5185
                               Condition cond) {
5186
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5187
    fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5188
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5189
    fcmp<float>(vform, dst, src1, src2, cond);
5190
  } else {
5191
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5192
    fcmp<double>(vform, dst, src1, src2, cond);
5193
  }
5194
  return dst;
5195
}
5196

5197

5198
LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5199
                                    LogicVRegister dst,
5200
                                    const LogicVRegister& src,
5201
                                    Condition cond) {
5202
  SimVRegister temp;
5203
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5204
    LogicVRegister zero_reg =
5205
        dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5206
    fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5207
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5208
    LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5209
    fcmp<float>(vform, dst, src, zero_reg, cond);
5210
  } else {
5211
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5212
    LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5213
    fcmp<double>(vform, dst, src, zero_reg, cond);
5214
  }
5215
  return dst;
5216
}
5217

5218

5219
LogicVRegister Simulator::fabscmp(VectorFormat vform,
5220
                                  LogicVRegister dst,
5221
                                  const LogicVRegister& src1,
5222
                                  const LogicVRegister& src2,
5223
                                  Condition cond) {
5224
  SimVRegister temp1, temp2;
5225
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5226
    LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5227
    LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5228
    fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5229
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5230
    LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5231
    LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5232
    fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5233
  } else {
5234
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5235
    LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5236
    LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5237
    fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5238
  }
5239
  return dst;
5240
}
5241

5242

5243
template <typename T>
5244
LogicVRegister Simulator::fmla(VectorFormat vform,
5245
                               LogicVRegister dst,
5246
                               const LogicVRegister& srca,
5247
                               const LogicVRegister& src1,
5248
                               const LogicVRegister& src2) {
5249
  dst.ClearForWrite(vform);
5250
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5251
    T op1 = src1.Float<T>(i);
5252
    T op2 = src2.Float<T>(i);
5253
    T acc = srca.Float<T>(i);
5254
    T result = FPMulAdd(acc, op1, op2);
5255
    dst.SetFloat(vform, i, result);
5256
  }
5257
  return dst;
5258
}
5259

5260

5261
LogicVRegister Simulator::fmla(VectorFormat vform,
5262
                               LogicVRegister dst,
5263
                               const LogicVRegister& srca,
5264
                               const LogicVRegister& src1,
5265
                               const LogicVRegister& src2) {
5266
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5267
    fmla<SimFloat16>(vform, dst, srca, src1, src2);
5268
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5269
    fmla<float>(vform, dst, srca, src1, src2);
5270
  } else {
5271
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5272
    fmla<double>(vform, dst, srca, src1, src2);
5273
  }
5274
  return dst;
5275
}
5276

5277

5278
template <typename T>
5279
LogicVRegister Simulator::fmls(VectorFormat vform,
5280
                               LogicVRegister dst,
5281
                               const LogicVRegister& srca,
5282
                               const LogicVRegister& src1,
5283
                               const LogicVRegister& src2) {
5284
  dst.ClearForWrite(vform);
5285
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5286
    T op1 = -src1.Float<T>(i);
5287
    T op2 = src2.Float<T>(i);
5288
    T acc = srca.Float<T>(i);
5289
    T result = FPMulAdd(acc, op1, op2);
5290
    dst.SetFloat(i, result);
5291
  }
5292
  return dst;
5293
}
5294

5295

5296
LogicVRegister Simulator::fmls(VectorFormat vform,
5297
                               LogicVRegister dst,
5298
                               const LogicVRegister& srca,
5299
                               const LogicVRegister& src1,
5300
                               const LogicVRegister& src2) {
5301
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5302
    fmls<SimFloat16>(vform, dst, srca, src1, src2);
5303
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5304
    fmls<float>(vform, dst, srca, src1, src2);
5305
  } else {
5306
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5307
    fmls<double>(vform, dst, srca, src1, src2);
5308
  }
5309
  return dst;
5310
}
5311

5312

5313
LogicVRegister Simulator::fmlal(VectorFormat vform,
5314
                                LogicVRegister dst,
5315
                                const LogicVRegister& src1,
5316
                                const LogicVRegister& src2) {
5317
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5318
  dst.ClearForWrite(vform);
5319
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5320
    float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5321
    float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5322
    float acc = dst.Float<float>(i);
5323
    float result = FPMulAdd(acc, op1, op2);
5324
    dst.SetFloat(i, result);
5325
  }
5326
  return dst;
5327
}
5328

5329

5330
LogicVRegister Simulator::fmlal2(VectorFormat vform,
5331
                                 LogicVRegister dst,
5332
                                 const LogicVRegister& src1,
5333
                                 const LogicVRegister& src2) {
5334
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5335
  dst.ClearForWrite(vform);
5336
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5337
    int src = i + LaneCountFromFormat(vform);
5338
    float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5339
    float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5340
    float acc = dst.Float<float>(i);
5341
    float result = FPMulAdd(acc, op1, op2);
5342
    dst.SetFloat(i, result);
5343
  }
5344
  return dst;
5345
}
5346

5347

5348
LogicVRegister Simulator::fmlsl(VectorFormat vform,
5349
                                LogicVRegister dst,
5350
                                const LogicVRegister& src1,
5351
                                const LogicVRegister& src2) {
5352
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5353
  dst.ClearForWrite(vform);
5354
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5355
    float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5356
    float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5357
    float acc = dst.Float<float>(i);
5358
    float result = FPMulAdd(acc, op1, op2);
5359
    dst.SetFloat(i, result);
5360
  }
5361
  return dst;
5362
}
5363

5364

5365
LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5366
                                 LogicVRegister dst,
5367
                                 const LogicVRegister& src1,
5368
                                 const LogicVRegister& src2) {
5369
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370
  dst.ClearForWrite(vform);
5371
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5372
    int src = i + LaneCountFromFormat(vform);
5373
    float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5374
    float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5375
    float acc = dst.Float<float>(i);
5376
    float result = FPMulAdd(acc, op1, op2);
5377
    dst.SetFloat(i, result);
5378
  }
5379
  return dst;
5380
}
5381

5382

5383
LogicVRegister Simulator::fmlal(VectorFormat vform,
5384
                                LogicVRegister dst,
5385
                                const LogicVRegister& src1,
5386
                                const LogicVRegister& src2,
5387
                                int index) {
5388
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5389
  dst.ClearForWrite(vform);
5390
  float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5391
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5392
    float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5393
    float acc = dst.Float<float>(i);
5394
    float result = FPMulAdd(acc, op1, op2);
5395
    dst.SetFloat(i, result);
5396
  }
5397
  return dst;
5398
}
5399

5400

5401
LogicVRegister Simulator::fmlal2(VectorFormat vform,
5402
                                 LogicVRegister dst,
5403
                                 const LogicVRegister& src1,
5404
                                 const LogicVRegister& src2,
5405
                                 int index) {
5406
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407
  dst.ClearForWrite(vform);
5408
  float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410
    int src = i + LaneCountFromFormat(vform);
5411
    float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5412
    float acc = dst.Float<float>(i);
5413
    float result = FPMulAdd(acc, op1, op2);
5414
    dst.SetFloat(i, result);
5415
  }
5416
  return dst;
5417
}
5418

5419

5420
LogicVRegister Simulator::fmlsl(VectorFormat vform,
5421
                                LogicVRegister dst,
5422
                                const LogicVRegister& src1,
5423
                                const LogicVRegister& src2,
5424
                                int index) {
5425
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5426
  dst.ClearForWrite(vform);
5427
  float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5428
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5429
    float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5430
    float acc = dst.Float<float>(i);
5431
    float result = FPMulAdd(acc, op1, op2);
5432
    dst.SetFloat(i, result);
5433
  }
5434
  return dst;
5435
}
5436

5437

5438
LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5439
                                 LogicVRegister dst,
5440
                                 const LogicVRegister& src1,
5441
                                 const LogicVRegister& src2,
5442
                                 int index) {
5443
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5444
  dst.ClearForWrite(vform);
5445
  float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5446
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5447
    int src = i + LaneCountFromFormat(vform);
5448
    float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5449
    float acc = dst.Float<float>(i);
5450
    float result = FPMulAdd(acc, op1, op2);
5451
    dst.SetFloat(i, result);
5452
  }
5453
  return dst;
5454
}
5455

5456

5457
template <typename T>
5458
LogicVRegister Simulator::fneg(VectorFormat vform,
5459
                               LogicVRegister dst,
5460
                               const LogicVRegister& src) {
5461
  dst.ClearForWrite(vform);
5462
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5463
    T op = src.Float<T>(i);
5464
    op = -op;
5465
    dst.SetFloat(i, op);
5466
  }
5467
  return dst;
5468
}
5469

5470

5471
LogicVRegister Simulator::fneg(VectorFormat vform,
5472
                               LogicVRegister dst,
5473
                               const LogicVRegister& src) {
5474
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5475
    fneg<SimFloat16>(vform, dst, src);
5476
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5477
    fneg<float>(vform, dst, src);
5478
  } else {
5479
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5480
    fneg<double>(vform, dst, src);
5481
  }
5482
  return dst;
5483
}
5484

5485

5486
template <typename T>
5487
LogicVRegister Simulator::fabs_(VectorFormat vform,
5488
                                LogicVRegister dst,
5489
                                const LogicVRegister& src) {
5490
  dst.ClearForWrite(vform);
5491
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5492
    T op = src.Float<T>(i);
5493
    if (copysign(1.0, op) < 0.0) {
5494
      op = -op;
5495
    }
5496
    dst.SetFloat(i, op);
5497
  }
5498
  return dst;
5499
}
5500

5501

5502
LogicVRegister Simulator::fabs_(VectorFormat vform,
5503
                                LogicVRegister dst,
5504
                                const LogicVRegister& src) {
5505
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5506
    fabs_<SimFloat16>(vform, dst, src);
5507
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5508
    fabs_<float>(vform, dst, src);
5509
  } else {
5510
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5511
    fabs_<double>(vform, dst, src);
5512
  }
5513
  return dst;
5514
}
5515

5516

5517
LogicVRegister Simulator::fabd(VectorFormat vform,
5518
                               LogicVRegister dst,
5519
                               const LogicVRegister& src1,
5520
                               const LogicVRegister& src2) {
5521
  SimVRegister temp;
5522
  fsub(vform, temp, src1, src2);
5523
  fabs_(vform, dst, temp);
5524
  return dst;
5525
}
5526

5527

5528
LogicVRegister Simulator::fsqrt(VectorFormat vform,
5529
                                LogicVRegister dst,
5530
                                const LogicVRegister& src) {
5531
  dst.ClearForWrite(vform);
5532
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5533
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5534
      SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5535
      dst.SetFloat(i, result);
5536
    }
5537
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5538
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5539
      float result = FPSqrt(src.Float<float>(i));
5540
      dst.SetFloat(i, result);
5541
    }
5542
  } else {
5543
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5544
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5545
      double result = FPSqrt(src.Float<double>(i));
5546
      dst.SetFloat(i, result);
5547
    }
5548
  }
5549
  return dst;
5550
}
5551

5552

5553
#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
5554
  LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5555
                                LogicVRegister dst,                            \
5556
                                const LogicVRegister& src1,                    \
5557
                                const LogicVRegister& src2) {                  \
5558
    SimVRegister temp1, temp2;                                                 \
5559
    uzp1(vform, temp1, src1, src2);                                            \
5560
    uzp2(vform, temp2, src1, src2);                                            \
5561
    FN(vform, dst, temp1, temp2);                                              \
5562
    if (IsSVEFormat(vform)) {                                                  \
5563
      interleave_top_bottom(vform, dst, dst);                                  \
5564
    }                                                                          \
5565
    return dst;                                                                \
5566
  }                                                                            \
5567
                                                                               \
5568
  LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5569
                                LogicVRegister dst,                            \
5570
                                const LogicVRegister& src) {                   \
5571
    if (vform == kFormatH) {                                                   \
5572
      SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
5573
                           SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5574
      dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
5575
    } else if (vform == kFormatS) {                                            \
5576
      float result = OP(src.Float<float>(0), src.Float<float>(1));             \
5577
      dst.SetFloat(0, result);                                                 \
5578
    } else {                                                                   \
5579
      VIXL_ASSERT(vform == kFormatD);                                          \
5580
      double result = OP(src.Float<double>(0), src.Float<double>(1));          \
5581
      dst.SetFloat(0, result);                                                 \
5582
    }                                                                          \
5583
    dst.ClearForWrite(vform);                                                  \
5584
    return dst;                                                                \
5585
  }
5586
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5587
#undef DEFINE_NEON_FP_PAIR_OP
5588

5589
template <typename T>
5590
LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5591
                                               LogicVRegister dst,
5592
                                               const LogicVRegister& src,
5593
                                               typename TFPPairOp<T>::type fn,
5594
                                               uint64_t inactive_value) {
5595
  int lane_count = LaneCountFromFormat(vform);
5596
  T result[kZRegMaxSizeInBytes / sizeof(T)];
5597
  // Copy the source vector into a working array. Initialise the unused elements
5598
  // at the end of the array to the same value that a false predicate would set.
5599
  for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5600
    result[i] = (i < lane_count)
5601
                    ? src.Float<T>(i)
5602
                    : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5603
  }
5604

5605
  // Pairwise reduce the elements to a single value, using the pair op function
5606
  // argument.
5607
  for (int step = 1; step < lane_count; step *= 2) {
5608
    for (int i = 0; i < lane_count; i += step * 2) {
5609
      result[i] = (this->*fn)(result[i], result[i + step]);
5610
    }
5611
  }
5612
  dst.ClearForWrite(ScalarFormatFromFormat(vform));
5613
  dst.SetFloat<T>(0, result[0]);
5614
  return dst;
5615
}
5616

5617
LogicVRegister Simulator::FPPairedAcrossHelper(
5618
    VectorFormat vform,
5619
    LogicVRegister dst,
5620
    const LogicVRegister& src,
5621
    typename TFPPairOp<SimFloat16>::type fn16,
5622
    typename TFPPairOp<float>::type fn32,
5623
    typename TFPPairOp<double>::type fn64,
5624
    uint64_t inactive_value) {
5625
  switch (LaneSizeInBitsFromFormat(vform)) {
5626
    case kHRegSize:
5627
      return FPPairedAcrossHelper<SimFloat16>(vform,
5628
                                              dst,
5629
                                              src,
5630
                                              fn16,
5631
                                              inactive_value);
5632
    case kSRegSize:
5633
      return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5634
    default:
5635
      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5636
      return FPPairedAcrossHelper<double>(vform,
5637
                                          dst,
5638
                                          src,
5639
                                          fn64,
5640
                                          inactive_value);
5641
  }
5642
}
5643

5644
LogicVRegister Simulator::faddv(VectorFormat vform,
5645
                                LogicVRegister dst,
5646
                                const LogicVRegister& src) {
5647
  return FPPairedAcrossHelper(vform,
5648
                              dst,
5649
                              src,
5650
                              &Simulator::FPAdd<SimFloat16>,
5651
                              &Simulator::FPAdd<float>,
5652
                              &Simulator::FPAdd<double>,
5653
                              0);
5654
}
5655

5656
LogicVRegister Simulator::fmaxv(VectorFormat vform,
5657
                                LogicVRegister dst,
5658
                                const LogicVRegister& src) {
5659
  int lane_size = LaneSizeInBitsFromFormat(vform);
5660
  uint64_t inactive_value =
5661
      FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5662
  return FPPairedAcrossHelper(vform,
5663
                              dst,
5664
                              src,
5665
                              &Simulator::FPMax<SimFloat16>,
5666
                              &Simulator::FPMax<float>,
5667
                              &Simulator::FPMax<double>,
5668
                              inactive_value);
5669
}
5670

5671

5672
LogicVRegister Simulator::fminv(VectorFormat vform,
5673
                                LogicVRegister dst,
5674
                                const LogicVRegister& src) {
5675
  int lane_size = LaneSizeInBitsFromFormat(vform);
5676
  uint64_t inactive_value =
5677
      FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5678
  return FPPairedAcrossHelper(vform,
5679
                              dst,
5680
                              src,
5681
                              &Simulator::FPMin<SimFloat16>,
5682
                              &Simulator::FPMin<float>,
5683
                              &Simulator::FPMin<double>,
5684
                              inactive_value);
5685
}
5686

5687

5688
LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5689
                                  LogicVRegister dst,
5690
                                  const LogicVRegister& src) {
5691
  int lane_size = LaneSizeInBitsFromFormat(vform);
5692
  uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5693
  return FPPairedAcrossHelper(vform,
5694
                              dst,
5695
                              src,
5696
                              &Simulator::FPMaxNM<SimFloat16>,
5697
                              &Simulator::FPMaxNM<float>,
5698
                              &Simulator::FPMaxNM<double>,
5699
                              inactive_value);
5700
}
5701

5702

5703
LogicVRegister Simulator::fminnmv(VectorFormat vform,
5704
                                  LogicVRegister dst,
5705
                                  const LogicVRegister& src) {
5706
  int lane_size = LaneSizeInBitsFromFormat(vform);
5707
  uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5708
  return FPPairedAcrossHelper(vform,
5709
                              dst,
5710
                              src,
5711
                              &Simulator::FPMinNM<SimFloat16>,
5712
                              &Simulator::FPMinNM<float>,
5713
                              &Simulator::FPMinNM<double>,
5714
                              inactive_value);
5715
}
5716

5717

5718
LogicVRegister Simulator::fmul(VectorFormat vform,
5719
                               LogicVRegister dst,
5720
                               const LogicVRegister& src1,
5721
                               const LogicVRegister& src2,
5722
                               int index) {
5723
  dst.ClearForWrite(vform);
5724
  SimVRegister temp;
5725
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5726
    LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5727
    fmul<SimFloat16>(vform, dst, src1, index_reg);
5728
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5729
    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5730
    fmul<float>(vform, dst, src1, index_reg);
5731
  } else {
5732
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5733
    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5734
    fmul<double>(vform, dst, src1, index_reg);
5735
  }
5736
  return dst;
5737
}
5738

5739

5740
LogicVRegister Simulator::fmla(VectorFormat vform,
5741
                               LogicVRegister dst,
5742
                               const LogicVRegister& src1,
5743
                               const LogicVRegister& src2,
5744
                               int index) {
5745
  dst.ClearForWrite(vform);
5746
  SimVRegister temp;
5747
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5748
    LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5749
    fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5750
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5751
    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5752
    fmla<float>(vform, dst, dst, src1, index_reg);
5753
  } else {
5754
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5755
    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5756
    fmla<double>(vform, dst, dst, src1, index_reg);
5757
  }
5758
  return dst;
5759
}
5760

5761

5762
LogicVRegister Simulator::fmls(VectorFormat vform,
5763
                               LogicVRegister dst,
5764
                               const LogicVRegister& src1,
5765
                               const LogicVRegister& src2,
5766
                               int index) {
5767
  dst.ClearForWrite(vform);
5768
  SimVRegister temp;
5769
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5770
    LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5771
    fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5772
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5773
    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5774
    fmls<float>(vform, dst, dst, src1, index_reg);
5775
  } else {
5776
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5777
    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5778
    fmls<double>(vform, dst, dst, src1, index_reg);
5779
  }
5780
  return dst;
5781
}
5782

5783

5784
LogicVRegister Simulator::fmulx(VectorFormat vform,
5785
                                LogicVRegister dst,
5786
                                const LogicVRegister& src1,
5787
                                const LogicVRegister& src2,
5788
                                int index) {
5789
  dst.ClearForWrite(vform);
5790
  SimVRegister temp;
5791
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5792
    LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5793
    fmulx<SimFloat16>(vform, dst, src1, index_reg);
5794
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5795
    LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5796
    fmulx<float>(vform, dst, src1, index_reg);
5797
  } else {
5798
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5799
    LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5800
    fmulx<double>(vform, dst, src1, index_reg);
5801
  }
5802
  return dst;
5803
}
5804

5805

5806
LogicVRegister Simulator::frint(VectorFormat vform,
5807
                                LogicVRegister dst,
5808
                                const LogicVRegister& src,
5809
                                FPRounding rounding_mode,
5810
                                bool inexact_exception,
5811
                                FrintMode frint_mode) {
5812
  dst.ClearForWrite(vform);
5813
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5814
    VIXL_ASSERT(frint_mode == kFrintToInteger);
5815
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5816
      SimFloat16 input = src.Float<SimFloat16>(i);
5817
      SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5818
      if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5819
        FPProcessException();
5820
      }
5821
      dst.SetFloat<SimFloat16>(i, rounded);
5822
    }
5823
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5824
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5825
      float input = src.Float<float>(i);
5826
      float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5827

5828
      if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5829
        FPProcessException();
5830
      }
5831
      dst.SetFloat<float>(i, rounded);
5832
    }
5833
  } else {
5834
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5835
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5836
      double input = src.Float<double>(i);
5837
      double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5838
      if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5839
        FPProcessException();
5840
      }
5841
      dst.SetFloat<double>(i, rounded);
5842
    }
5843
  }
5844
  return dst;
5845
}
5846

5847
LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5848
                               VectorFormat src_vform,
5849
                               LogicVRegister dst,
5850
                               const LogicPRegister& pg,
5851
                               const LogicVRegister& src) {
5852
  unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5853
  unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5854
  VectorFormat vform = SVEFormatFromLaneSizeInBits(
5855
      std::max(dst_data_size_in_bits, src_data_size_in_bits));
5856

5857
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5858
    if (!pg.IsActive(vform, i)) continue;
5859

5860
    uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5861
                                                      0,
5862
                                                      src.Uint(vform, i));
5863
    double dst_value =
5864
        RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5865

5866
    uint64_t dst_raw_bits =
5867
        FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5868

5869
    dst.SetUint(vform, i, dst_raw_bits);
5870
  }
5871

5872
  return dst;
5873
}
5874

5875
LogicVRegister Simulator::fcvts(VectorFormat vform,
5876
                                unsigned dst_data_size_in_bits,
5877
                                unsigned src_data_size_in_bits,
5878
                                LogicVRegister dst,
5879
                                const LogicPRegister& pg,
5880
                                const LogicVRegister& src,
5881
                                FPRounding round,
5882
                                int fbits) {
5883
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5884
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5885

5886
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5887
    if (!pg.IsActive(vform, i)) continue;
5888

5889
    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5890
                                               0,
5891
                                               src.Uint(vform, i));
5892
    double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5893
                    std::pow(2.0, fbits);
5894

5895
    switch (dst_data_size_in_bits) {
5896
      case kHRegSize:
5897
        dst.SetInt(vform, i, FPToInt16(result, round));
5898
        break;
5899
      case kSRegSize:
5900
        dst.SetInt(vform, i, FPToInt32(result, round));
5901
        break;
5902
      case kDRegSize:
5903
        dst.SetInt(vform, i, FPToInt64(result, round));
5904
        break;
5905
      default:
5906
        VIXL_UNIMPLEMENTED();
5907
        break;
5908
    }
5909
  }
5910

5911
  return dst;
5912
}
5913

5914
LogicVRegister Simulator::fcvts(VectorFormat vform,
5915
                                LogicVRegister dst,
5916
                                const LogicVRegister& src,
5917
                                FPRounding round,
5918
                                int fbits) {
5919
  dst.ClearForWrite(vform);
5920
  return fcvts(vform,
5921
               LaneSizeInBitsFromFormat(vform),
5922
               LaneSizeInBitsFromFormat(vform),
5923
               dst,
5924
               GetPTrue(),
5925
               src,
5926
               round,
5927
               fbits);
5928
}
5929

5930
LogicVRegister Simulator::fcvtu(VectorFormat vform,
5931
                                unsigned dst_data_size_in_bits,
5932
                                unsigned src_data_size_in_bits,
5933
                                LogicVRegister dst,
5934
                                const LogicPRegister& pg,
5935
                                const LogicVRegister& src,
5936
                                FPRounding round,
5937
                                int fbits) {
5938
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5939
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5940

5941
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5942
    if (!pg.IsActive(vform, i)) continue;
5943

5944
    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5945
                                               0,
5946
                                               src.Uint(vform, i));
5947
    double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5948
                    std::pow(2.0, fbits);
5949

5950
    switch (dst_data_size_in_bits) {
5951
      case kHRegSize:
5952
        dst.SetUint(vform, i, FPToUInt16(result, round));
5953
        break;
5954
      case kSRegSize:
5955
        dst.SetUint(vform, i, FPToUInt32(result, round));
5956
        break;
5957
      case kDRegSize:
5958
        dst.SetUint(vform, i, FPToUInt64(result, round));
5959
        break;
5960
      default:
5961
        VIXL_UNIMPLEMENTED();
5962
        break;
5963
    }
5964
  }
5965

5966
  return dst;
5967
}
5968

5969
LogicVRegister Simulator::fcvtu(VectorFormat vform,
5970
                                LogicVRegister dst,
5971
                                const LogicVRegister& src,
5972
                                FPRounding round,
5973
                                int fbits) {
5974
  dst.ClearForWrite(vform);
5975
  return fcvtu(vform,
5976
               LaneSizeInBitsFromFormat(vform),
5977
               LaneSizeInBitsFromFormat(vform),
5978
               dst,
5979
               GetPTrue(),
5980
               src,
5981
               round,
5982
               fbits);
5983
}
5984

5985
LogicVRegister Simulator::fcvtl(VectorFormat vform,
5986
                                LogicVRegister dst,
5987
                                const LogicVRegister& src) {
5988
  dst.ClearForWrite(vform);
5989
  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5990
    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5991
      // TODO: Full support for SimFloat16 in SimRegister(s).
5992
      dst.SetFloat(i,
5993
                   FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5994
                             ReadDN()));
5995
    }
5996
  } else {
5997
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5998
    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5999
      dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
6000
    }
6001
  }
6002
  return dst;
6003
}
6004

6005

6006
LogicVRegister Simulator::fcvtl2(VectorFormat vform,
6007
                                 LogicVRegister dst,
6008
                                 const LogicVRegister& src) {
6009
  dst.ClearForWrite(vform);
6010
  int lane_count = LaneCountFromFormat(vform);
6011
  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6012
    for (int i = 0; i < lane_count; i++) {
6013
      // TODO: Full support for SimFloat16 in SimRegister(s).
6014
      dst.SetFloat(i,
6015
                   FPToFloat(RawbitsToFloat16(
6016
                                 src.Float<uint16_t>(i + lane_count)),
6017
                             ReadDN()));
6018
    }
6019
  } else {
6020
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6021
    for (int i = 0; i < lane_count; i++) {
6022
      dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6023
    }
6024
  }
6025
  return dst;
6026
}
6027

6028

6029
LogicVRegister Simulator::fcvtn(VectorFormat vform,
6030
                                LogicVRegister dst,
6031
                                const LogicVRegister& src) {
6032
  SimVRegister tmp;
6033
  LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6034
  dst.ClearForWrite(vform);
6035
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6036
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6037
      dst.SetFloat(i,
6038
                   Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6039
                                                FPTieEven,
6040
                                                ReadDN())));
6041
    }
6042
  } else {
6043
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6044
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6045
      dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6046
    }
6047
  }
6048
  return dst;
6049
}
6050

6051

6052
LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6053
                                 LogicVRegister dst,
6054
                                 const LogicVRegister& src) {
6055
  dst.ClearForWrite(vform);
6056
  int lane_count = LaneCountFromFormat(vform) / 2;
6057
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6058
    for (int i = lane_count - 1; i >= 0; i--) {
6059
      dst.SetFloat(i + lane_count,
6060
                   Float16ToRawbits(
6061
                       FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6062
    }
6063
  } else {
6064
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6065
    for (int i = lane_count - 1; i >= 0; i--) {
6066
      dst.SetFloat(i + lane_count,
6067
                   FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6068
    }
6069
  }
6070
  return dst;
6071
}
6072

6073

6074
LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6075
                                 LogicVRegister dst,
6076
                                 const LogicVRegister& src) {
6077
  SimVRegister tmp;
6078
  LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6079
  int input_lane_count = LaneCountFromFormat(vform);
6080
  if (IsSVEFormat(vform)) {
6081
    mov(kFormatVnB, tmp, src);
6082
    input_lane_count /= 2;
6083
  }
6084

6085
  dst.ClearForWrite(vform);
6086
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6087

6088
  for (int i = 0; i < input_lane_count; i++) {
6089
    dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6090
  }
6091
  return dst;
6092
}
6093

6094

6095
LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6096
                                  LogicVRegister dst,
6097
                                  const LogicVRegister& src) {
6098
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6099
  dst.ClearForWrite(vform);
6100
  int lane_count = LaneCountFromFormat(vform) / 2;
6101
  for (int i = lane_count - 1; i >= 0; i--) {
6102
    dst.SetFloat(i + lane_count,
6103
                 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6104
  }
6105
  return dst;
6106
}
6107

6108

6109
// Based on reference C function recip_sqrt_estimate from ARM ARM.
6110
double Simulator::recip_sqrt_estimate(double a) {
6111
  int quot0, quot1, s;
6112
  double r;
6113
  if (a < 0.5) {
6114
    quot0 = static_cast<int>(a * 512.0);
6115
    r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6116
  } else {
6117
    quot1 = static_cast<int>(a * 256.0);
6118
    r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6119
  }
6120
  s = static_cast<int>(256.0 * r + 0.5);
6121
  return static_cast<double>(s) / 256.0;
6122
}
6123

6124

6125
static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6126
  return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6127
}
6128

6129

6130
template <typename T>
6131
T Simulator::FPRecipSqrtEstimate(T op) {
6132
  if (IsNaN(op)) {
6133
    return FPProcessNaN(op);
6134
  } else if (op == 0.0) {
6135
    if (copysign(1.0, op) < 0.0) {
6136
      return kFP64NegativeInfinity;
6137
    } else {
6138
      return kFP64PositiveInfinity;
6139
    }
6140
  } else if (copysign(1.0, op) < 0.0) {
6141
    FPProcessException();
6142
    return FPDefaultNaN<T>();
6143
  } else if (IsInf(op)) {
6144
    return 0.0;
6145
  } else {
6146
    uint64_t fraction;
6147
    int exp, result_exp;
6148

6149
    if (IsFloat16<T>()) {
6150
      exp = Float16Exp(op);
6151
      fraction = Float16Mantissa(op);
6152
      fraction <<= 42;
6153
    } else if (IsFloat32<T>()) {
6154
      exp = FloatExp(op);
6155
      fraction = FloatMantissa(op);
6156
      fraction <<= 29;
6157
    } else {
6158
      VIXL_ASSERT(IsFloat64<T>());
6159
      exp = DoubleExp(op);
6160
      fraction = DoubleMantissa(op);
6161
    }
6162

6163
    if (exp == 0) {
6164
      while (Bits(fraction, 51, 51) == 0) {
6165
        fraction = Bits(fraction, 50, 0) << 1;
6166
        exp -= 1;
6167
      }
6168
      fraction = Bits(fraction, 50, 0) << 1;
6169
    }
6170

6171
    double scaled;
6172
    if (Bits(exp, 0, 0) == 0) {
6173
      scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6174
    } else {
6175
      scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6176
    }
6177

6178
    if (IsFloat16<T>()) {
6179
      result_exp = (44 - exp) / 2;
6180
    } else if (IsFloat32<T>()) {
6181
      result_exp = (380 - exp) / 2;
6182
    } else {
6183
      VIXL_ASSERT(IsFloat64<T>());
6184
      result_exp = (3068 - exp) / 2;
6185
    }
6186

6187
    uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6188

6189
    if (IsFloat16<T>()) {
6190
      uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6191
      uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6192
      return Float16Pack(0, exp_bits, est_bits);
6193
    } else if (IsFloat32<T>()) {
6194
      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6195
      uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6196
      return FloatPack(0, exp_bits, est_bits);
6197
    } else {
6198
      VIXL_ASSERT(IsFloat64<T>());
6199
      return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6200
    }
6201
  }
6202
}
6203

6204

6205
LogicVRegister Simulator::frsqrte(VectorFormat vform,
6206
                                  LogicVRegister dst,
6207
                                  const LogicVRegister& src) {
6208
  dst.ClearForWrite(vform);
6209
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6210
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6211
      SimFloat16 input = src.Float<SimFloat16>(i);
6212
      dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6213
    }
6214
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6215
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6216
      float input = src.Float<float>(i);
6217
      dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6218
    }
6219
  } else {
6220
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6221
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6222
      double input = src.Float<double>(i);
6223
      dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6224
    }
6225
  }
6226
  return dst;
6227
}
6228

6229
template <typename T>
6230
T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6231
  uint32_t sign;
6232

6233
  if (IsFloat16<T>()) {
6234
    sign = Float16Sign(op);
6235
  } else if (IsFloat32<T>()) {
6236
    sign = FloatSign(op);
6237
  } else {
6238
    VIXL_ASSERT(IsFloat64<T>());
6239
    sign = DoubleSign(op);
6240
  }
6241

6242
  if (IsNaN(op)) {
6243
    return FPProcessNaN(op);
6244
  } else if (IsInf(op)) {
6245
    return (sign == 1) ? -0.0 : 0.0;
6246
  } else if (op == 0.0) {
6247
    FPProcessException();  // FPExc_DivideByZero exception.
6248
    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6249
  } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6250
             (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6251
             (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6252
    bool overflow_to_inf = false;
6253
    switch (rounding) {
6254
      case FPTieEven:
6255
        overflow_to_inf = true;
6256
        break;
6257
      case FPPositiveInfinity:
6258
        overflow_to_inf = (sign == 0);
6259
        break;
6260
      case FPNegativeInfinity:
6261
        overflow_to_inf = (sign == 1);
6262
        break;
6263
      case FPZero:
6264
        overflow_to_inf = false;
6265
        break;
6266
      default:
6267
        break;
6268
    }
6269
    FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6270
    if (overflow_to_inf) {
6271
      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6272
    } else {
6273
      // Return FPMaxNormal(sign).
6274
      if (IsFloat16<T>()) {
6275
        return Float16Pack(sign, 0x1f, 0x3ff);
6276
      } else if (IsFloat32<T>()) {
6277
        return FloatPack(sign, 0xfe, 0x07fffff);
6278
      } else {
6279
        VIXL_ASSERT(IsFloat64<T>());
6280
        return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6281
      }
6282
    }
6283
  } else {
6284
    uint64_t fraction;
6285
    int exp, result_exp;
6286

6287
    if (IsFloat16<T>()) {
6288
      sign = Float16Sign(op);
6289
      exp = Float16Exp(op);
6290
      fraction = Float16Mantissa(op);
6291
      fraction <<= 42;
6292
    } else if (IsFloat32<T>()) {
6293
      sign = FloatSign(op);
6294
      exp = FloatExp(op);
6295
      fraction = FloatMantissa(op);
6296
      fraction <<= 29;
6297
    } else {
6298
      VIXL_ASSERT(IsFloat64<T>());
6299
      sign = DoubleSign(op);
6300
      exp = DoubleExp(op);
6301
      fraction = DoubleMantissa(op);
6302
    }
6303

6304
    if (exp == 0) {
6305
      if (Bits(fraction, 51, 51) == 0) {
6306
        exp -= 1;
6307
        fraction = Bits(fraction, 49, 0) << 2;
6308
      } else {
6309
        fraction = Bits(fraction, 50, 0) << 1;
6310
      }
6311
    }
6312

6313
    double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6314

6315
    if (IsFloat16<T>()) {
6316
      result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6317
    } else if (IsFloat32<T>()) {
6318
      result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6319
    } else {
6320
      VIXL_ASSERT(IsFloat64<T>());
6321
      result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6322
    }
6323

6324
    double estimate = recip_estimate(scaled);
6325

6326
    fraction = DoubleMantissa(estimate);
6327
    if (result_exp == 0) {
6328
      fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6329
    } else if (result_exp == -1) {
6330
      fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6331
      result_exp = 0;
6332
    }
6333
    if (IsFloat16<T>()) {
6334
      uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6335
      uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6336
      return Float16Pack(sign, exp_bits, frac_bits);
6337
    } else if (IsFloat32<T>()) {
6338
      uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6339
      uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6340
      return FloatPack(sign, exp_bits, frac_bits);
6341
    } else {
6342
      VIXL_ASSERT(IsFloat64<T>());
6343
      return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6344
    }
6345
  }
6346
}
6347

6348

6349
LogicVRegister Simulator::frecpe(VectorFormat vform,
6350
                                 LogicVRegister dst,
6351
                                 const LogicVRegister& src,
6352
                                 FPRounding round) {
6353
  dst.ClearForWrite(vform);
6354
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6355
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6356
      SimFloat16 input = src.Float<SimFloat16>(i);
6357
      dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6358
    }
6359
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6360
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6361
      float input = src.Float<float>(i);
6362
      dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6363
    }
6364
  } else {
6365
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6366
    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6367
      double input = src.Float<double>(i);
6368
      dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6369
    }
6370
  }
6371
  return dst;
6372
}
6373

6374

6375
LogicVRegister Simulator::ursqrte(VectorFormat vform,
6376
                                  LogicVRegister dst,
6377
                                  const LogicVRegister& src) {
6378
  dst.ClearForWrite(vform);
6379
  uint64_t operand;
6380
  uint32_t result;
6381
  double dp_operand, dp_result;
6382
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6383
    operand = src.Uint(vform, i);
6384
    if (operand <= 0x3FFFFFFF) {
6385
      result = 0xFFFFFFFF;
6386
    } else {
6387
      dp_operand = operand * std::pow(2.0, -32);
6388
      dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6389
      result = static_cast<uint32_t>(dp_result);
6390
    }
6391
    dst.SetUint(vform, i, result);
6392
  }
6393
  return dst;
6394
}
6395

6396

6397
// Based on reference C function recip_estimate from ARM ARM.
6398
double Simulator::recip_estimate(double a) {
6399
  int q, s;
6400
  double r;
6401
  q = static_cast<int>(a * 512.0);
6402
  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6403
  s = static_cast<int>(256.0 * r + 0.5);
6404
  return static_cast<double>(s) / 256.0;
6405
}
6406

6407

6408
LogicVRegister Simulator::urecpe(VectorFormat vform,
6409
                                 LogicVRegister dst,
6410
                                 const LogicVRegister& src) {
6411
  dst.ClearForWrite(vform);
6412
  uint64_t operand;
6413
  uint32_t result;
6414
  double dp_operand, dp_result;
6415
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6416
    operand = src.Uint(vform, i);
6417
    if (operand <= 0x7FFFFFFF) {
6418
      result = 0xFFFFFFFF;
6419
    } else {
6420
      dp_operand = operand * std::pow(2.0, -32);
6421
      dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6422
      result = static_cast<uint32_t>(dp_result);
6423
    }
6424
    dst.SetUint(vform, i, result);
6425
  }
6426
  return dst;
6427
}
6428

6429
LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6430
  dst.Clear();
6431
  return dst;
6432
}
6433

6434
LogicPRegister Simulator::pfirst(LogicPRegister dst,
6435
                                 const LogicPRegister& pg,
6436
                                 const LogicPRegister& src) {
6437
  int first_pg = GetFirstActive(kFormatVnB, pg);
6438
  VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6439
  mov(dst, src);
6440
  if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6441
  return dst;
6442
}
6443

6444
LogicPRegister Simulator::ptrue(VectorFormat vform,
6445
                                LogicPRegister dst,
6446
                                int pattern) {
6447
  int count = GetPredicateConstraintLaneCount(vform, pattern);
6448
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6449
    dst.SetActive(vform, i, i < count);
6450
  }
6451
  return dst;
6452
}
6453

6454
LogicPRegister Simulator::pnext(VectorFormat vform,
6455
                                LogicPRegister dst,
6456
                                const LogicPRegister& pg,
6457
                                const LogicPRegister& src) {
6458
  int next = GetLastActive(vform, src) + 1;
6459
  while (next < LaneCountFromFormat(vform)) {
6460
    if (pg.IsActive(vform, next)) break;
6461
    next++;
6462
  }
6463

6464
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6465
    dst.SetActive(vform, i, (i == next));
6466
  }
6467
  return dst;
6468
}
6469

6470
template <typename T>
6471
LogicVRegister Simulator::frecpx(VectorFormat vform,
6472
                                 LogicVRegister dst,
6473
                                 const LogicVRegister& src) {
6474
  dst.ClearForWrite(vform);
6475
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6476
    T op = src.Float<T>(i);
6477
    T result;
6478
    if (IsNaN(op)) {
6479
      result = FPProcessNaN(op);
6480
    } else {
6481
      int exp;
6482
      uint32_t sign;
6483
      if (IsFloat16<T>()) {
6484
        sign = Float16Sign(op);
6485
        exp = Float16Exp(op);
6486
        exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6487
        result = Float16Pack(sign, exp, 0);
6488
      } else if (IsFloat32<T>()) {
6489
        sign = FloatSign(op);
6490
        exp = FloatExp(op);
6491
        exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6492
        result = FloatPack(sign, exp, 0);
6493
      } else {
6494
        VIXL_ASSERT(IsFloat64<T>());
6495
        sign = DoubleSign(op);
6496
        exp = DoubleExp(op);
6497
        exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6498
        result = DoublePack(sign, exp, 0);
6499
      }
6500
    }
6501
    dst.SetFloat(i, result);
6502
  }
6503
  return dst;
6504
}
6505

6506

6507
LogicVRegister Simulator::frecpx(VectorFormat vform,
6508
                                 LogicVRegister dst,
6509
                                 const LogicVRegister& src) {
6510
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6511
    frecpx<SimFloat16>(vform, dst, src);
6512
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6513
    frecpx<float>(vform, dst, src);
6514
  } else {
6515
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6516
    frecpx<double>(vform, dst, src);
6517
  }
6518
  return dst;
6519
}
6520

6521
LogicVRegister Simulator::flogb(VectorFormat vform,
6522
                                LogicVRegister dst,
6523
                                const LogicVRegister& src) {
6524
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6525
    double op = 0.0;
6526
    switch (vform) {
6527
      case kFormatVnH:
6528
        op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6529
        break;
6530
      case kFormatVnS:
6531
        op = src.Float<float>(i);
6532
        break;
6533
      case kFormatVnD:
6534
        op = src.Float<double>(i);
6535
        break;
6536
      default:
6537
        VIXL_UNREACHABLE();
6538
    }
6539

6540
    switch (std::fpclassify(op)) {
6541
      case FP_INFINITE:
6542
        dst.SetInt(vform, i, MaxIntFromFormat(vform));
6543
        break;
6544
      case FP_NAN:
6545
      case FP_ZERO:
6546
        dst.SetInt(vform, i, MinIntFromFormat(vform));
6547
        break;
6548
      case FP_SUBNORMAL: {
6549
        // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6550
        // bits where the sign and exponent would be. We subtract 12 to
6551
        // find the number of leading zero bits in the mantissa itself.
6552
        int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6553
        // Log2 of a subnormal is the lowest exponent a normal number can
6554
        // represent, together with the zeros in the mantissa.
6555
        dst.SetInt(vform, i, -1023 - mant_zero_count);
6556
        break;
6557
      }
6558
      case FP_NORMAL:
6559
        // Log2 of a normal number is the exponent minus the bias.
6560
        dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6561
        break;
6562
    }
6563
  }
6564
  return dst;
6565
}
6566

6567
LogicVRegister Simulator::ftsmul(VectorFormat vform,
6568
                                 LogicVRegister dst,
6569
                                 const LogicVRegister& src1,
6570
                                 const LogicVRegister& src2) {
6571
  SimVRegister maybe_neg_src1;
6572

6573
  // The bottom bit of src2 controls the sign of the result. Use it to
6574
  // conditionally invert the sign of one `fmul` operand.
6575
  shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6576
  eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6577

6578
  // Multiply src1 by the modified neg_src1, which is potentially its negation.
6579
  // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6580
  // rather than neg_src1, must be the first source argument.
6581
  fmul(vform, dst, src1, maybe_neg_src1);
6582

6583
  return dst;
6584
}
6585

6586
LogicVRegister Simulator::ftssel(VectorFormat vform,
6587
                                 LogicVRegister dst,
6588
                                 const LogicVRegister& src1,
6589
                                 const LogicVRegister& src2) {
6590
  unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6591
  uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6592
  uint64_t one;
6593

6594
  if (lane_bits == kHRegSize) {
6595
    one = Float16ToRawbits(Float16(1.0));
6596
  } else if (lane_bits == kSRegSize) {
6597
    one = FloatToRawbits(1.0);
6598
  } else {
6599
    VIXL_ASSERT(lane_bits == kDRegSize);
6600
    one = DoubleToRawbits(1.0);
6601
  }
6602

6603
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6604
    // Use integer accessors for this operation, as this is a data manipulation
6605
    // task requiring no calculation.
6606
    uint64_t op = src1.Uint(vform, i);
6607

6608
    // Only the bottom two bits of the src2 register are significant, indicating
6609
    // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6610
    // determines the sign of the value written to dst.
6611
    uint64_t q = src2.Uint(vform, i);
6612
    if ((q & 1) == 1) op = one;
6613
    if ((q & 2) == 2) op ^= sign_bit;
6614

6615
    dst.SetUint(vform, i, op);
6616
  }
6617

6618
  return dst;
6619
}
6620

6621
template <typename T>
6622
LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6623
                                       LogicVRegister dst,
6624
                                       const LogicVRegister& src1,
6625
                                       const LogicVRegister& src2,
6626
                                       uint64_t coeff_pos,
6627
                                       uint64_t coeff_neg) {
6628
  SimVRegister zero;
6629
  dup_immediate(kFormatVnB, zero, 0);
6630

6631
  SimVRegister cf;
6632
  SimVRegister cfn;
6633
  dup_immediate(vform, cf, coeff_pos);
6634
  dup_immediate(vform, cfn, coeff_neg);
6635

6636
  // The specification requires testing the top bit of the raw value, rather
6637
  // than the sign of the floating point number, so use an integer comparison
6638
  // here.
6639
  SimPRegister is_neg;
6640
  SVEIntCompareVectorsHelper(lt,
6641
                             vform,
6642
                             is_neg,
6643
                             GetPTrue(),
6644
                             src2,
6645
                             zero,
6646
                             false,
6647
                             LeaveFlags);
6648
  mov_merging(vform, cf, is_neg, cfn);
6649

6650
  SimVRegister temp;
6651
  fabs_<T>(vform, temp, src2);
6652
  fmla<T>(vform, cf, cf, src1, temp);
6653
  mov(vform, dst, cf);
6654
  return dst;
6655
}
6656

6657

6658
LogicVRegister Simulator::ftmad(VectorFormat vform,
6659
                                LogicVRegister dst,
6660
                                const LogicVRegister& src1,
6661
                                const LogicVRegister& src2,
6662
                                unsigned index) {
6663
  static const uint64_t ftmad_coeff16[] = {0x3c00,
6664
                                           0xb155,
6665
                                           0x2030,
6666
                                           0x0000,
6667
                                           0x0000,
6668
                                           0x0000,
6669
                                           0x0000,
6670
                                           0x0000,
6671
                                           0x3c00,
6672
                                           0xb800,
6673
                                           0x293a,
6674
                                           0x0000,
6675
                                           0x0000,
6676
                                           0x0000,
6677
                                           0x0000,
6678
                                           0x0000};
6679

6680
  static const uint64_t ftmad_coeff32[] = {0x3f800000,
6681
                                           0xbe2aaaab,
6682
                                           0x3c088886,
6683
                                           0xb95008b9,
6684
                                           0x36369d6d,
6685
                                           0x00000000,
6686
                                           0x00000000,
6687
                                           0x00000000,
6688
                                           0x3f800000,
6689
                                           0xbf000000,
6690
                                           0x3d2aaaa6,
6691
                                           0xbab60705,
6692
                                           0x37cd37cc,
6693
                                           0x00000000,
6694
                                           0x00000000,
6695
                                           0x00000000};
6696

6697
  static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6698
                                           0xbfc5555555555543,
6699
                                           0x3f8111111110f30c,
6700
                                           0xbf2a01a019b92fc6,
6701
                                           0x3ec71de351f3d22b,
6702
                                           0xbe5ae5e2b60f7b91,
6703
                                           0x3de5d8408868552f,
6704
                                           0x0000000000000000,
6705
                                           0x3ff0000000000000,
6706
                                           0xbfe0000000000000,
6707
                                           0x3fa5555555555536,
6708
                                           0xbf56c16c16c13a0b,
6709
                                           0x3efa01a019b1e8d8,
6710
                                           0xbe927e4f7282f468,
6711
                                           0x3e21ee96d2641b13,
6712
                                           0xbda8f76380fbb401};
6713
  VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6714
  VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6715
  VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6716

6717
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6718
    FTMaddHelper<SimFloat16>(vform,
6719
                             dst,
6720
                             src1,
6721
                             src2,
6722
                             ftmad_coeff16[index],
6723
                             ftmad_coeff16[index + 8]);
6724
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6725
    FTMaddHelper<float>(vform,
6726
                        dst,
6727
                        src1,
6728
                        src2,
6729
                        ftmad_coeff32[index],
6730
                        ftmad_coeff32[index + 8]);
6731
  } else {
6732
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6733
    FTMaddHelper<double>(vform,
6734
                         dst,
6735
                         src1,
6736
                         src2,
6737
                         ftmad_coeff64[index],
6738
                         ftmad_coeff64[index + 8]);
6739
  }
6740
  return dst;
6741
}
6742

6743
LogicVRegister Simulator::fexpa(VectorFormat vform,
6744
                                LogicVRegister dst,
6745
                                const LogicVRegister& src) {
6746
  static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6747
                                           0x005d, 0x0075, 0x008e, 0x00a8,
6748
                                           0x00c2, 0x00dc, 0x00f8, 0x0114,
6749
                                           0x0130, 0x014d, 0x016b, 0x0189,
6750
                                           0x01a8, 0x01c8, 0x01e8, 0x0209,
6751
                                           0x022b, 0x024e, 0x0271, 0x0295,
6752
                                           0x02ba, 0x02e0, 0x0306, 0x032e,
6753
                                           0x0356, 0x037f, 0x03a9, 0x03d4};
6754

6755
  static const uint64_t fexpa_coeff32[] =
6756
      {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6757
       0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6758
       0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6759
       0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6760
       0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6761
       0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6762
       0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6763
       0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6764
       0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6765
       0x7d3e0c};
6766

6767
  static const uint64_t fexpa_coeff64[] =
6768
      {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6769
       0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6770
       0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6771
       0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6772
       0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6773
       0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6774
       0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6775
       0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6776
       0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6777
       0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6778
       0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6779
       0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6780
       0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6781
       0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6782
       0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6783
       0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6784

6785
  unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6786
  int index_highbit = 5;
6787
  int op_highbit, op_shift;
6788
  const uint64_t* fexpa_coeff;
6789

6790
  if (lane_size == kHRegSize) {
6791
    index_highbit = 4;
6792
    VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6793
    fexpa_coeff = fexpa_coeff16;
6794
    op_highbit = 9;
6795
    op_shift = 10;
6796
  } else if (lane_size == kSRegSize) {
6797
    VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6798
    fexpa_coeff = fexpa_coeff32;
6799
    op_highbit = 13;
6800
    op_shift = 23;
6801
  } else {
6802
    VIXL_ASSERT(lane_size == kDRegSize);
6803
    VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6804
    fexpa_coeff = fexpa_coeff64;
6805
    op_highbit = 16;
6806
    op_shift = 52;
6807
  }
6808

6809
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6810
    uint64_t op = src.Uint(vform, i);
6811
    uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6812
    result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6813
    dst.SetUint(vform, i, result);
6814
  }
6815
  return dst;
6816
}
6817

6818
template <typename T>
6819
LogicVRegister Simulator::fscale(VectorFormat vform,
6820
                                 LogicVRegister dst,
6821
                                 const LogicVRegister& src1,
6822
                                 const LogicVRegister& src2) {
6823
  T two = T(2.0);
6824
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6825
    T src1_val = src1.Float<T>(i);
6826
    if (!IsNaN(src1_val)) {
6827
      int64_t scale = src2.Int(vform, i);
6828
      // TODO: this is a low-performance implementation, but it's simple and
6829
      // less likely to be buggy. Consider replacing it with something faster.
6830

6831
      // Scales outside of these bounds become infinity or zero, so there's no
6832
      // point iterating further.
6833
      scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6834

6835
      // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6836
      // decrement scale until it's zero.
6837
      while (scale-- > 0) {
6838
        src1_val = FPMul(src1_val, two);
6839
      }
6840

6841
      // If scale is negative, divide by two and increment scale until it's
6842
      // zero. Initially, scale is (src2 - 1), so we pre-increment.
6843
      while (++scale < 0) {
6844
        src1_val = FPDiv(src1_val, two);
6845
      }
6846
    }
6847
    dst.SetFloat<T>(i, src1_val);
6848
  }
6849
  return dst;
6850
}
6851

6852
LogicVRegister Simulator::fscale(VectorFormat vform,
6853
                                 LogicVRegister dst,
6854
                                 const LogicVRegister& src1,
6855
                                 const LogicVRegister& src2) {
6856
  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6857
    fscale<SimFloat16>(vform, dst, src1, src2);
6858
  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6859
    fscale<float>(vform, dst, src1, src2);
6860
  } else {
6861
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6862
    fscale<double>(vform, dst, src1, src2);
6863
  }
6864
  return dst;
6865
}
6866

6867
LogicVRegister Simulator::scvtf(VectorFormat vform,
6868
                                unsigned dst_data_size_in_bits,
6869
                                unsigned src_data_size_in_bits,
6870
                                LogicVRegister dst,
6871
                                const LogicPRegister& pg,
6872
                                const LogicVRegister& src,
6873
                                FPRounding round,
6874
                                int fbits) {
6875
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6876
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6877
  dst.ClearForWrite(vform);
6878

6879
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6880
    if (!pg.IsActive(vform, i)) continue;
6881

6882
    int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6883
                                            0,
6884
                                            src.Uint(vform, i));
6885

6886
    switch (dst_data_size_in_bits) {
6887
      case kHRegSize: {
6888
        SimFloat16 result = FixedToFloat16(value, fbits, round);
6889
        dst.SetUint(vform, i, Float16ToRawbits(result));
6890
        break;
6891
      }
6892
      case kSRegSize: {
6893
        float result = FixedToFloat(value, fbits, round);
6894
        dst.SetUint(vform, i, FloatToRawbits(result));
6895
        break;
6896
      }
6897
      case kDRegSize: {
6898
        double result = FixedToDouble(value, fbits, round);
6899
        dst.SetUint(vform, i, DoubleToRawbits(result));
6900
        break;
6901
      }
6902
      default:
6903
        VIXL_UNIMPLEMENTED();
6904
        break;
6905
    }
6906
  }
6907

6908
  return dst;
6909
}
6910

6911
LogicVRegister Simulator::scvtf(VectorFormat vform,
6912
                                LogicVRegister dst,
6913
                                const LogicVRegister& src,
6914
                                int fbits,
6915
                                FPRounding round) {
6916
  return scvtf(vform,
6917
               LaneSizeInBitsFromFormat(vform),
6918
               LaneSizeInBitsFromFormat(vform),
6919
               dst,
6920
               GetPTrue(),
6921
               src,
6922
               round,
6923
               fbits);
6924
}
6925

6926
LogicVRegister Simulator::ucvtf(VectorFormat vform,
6927
                                unsigned dst_data_size_in_bits,
6928
                                unsigned src_data_size_in_bits,
6929
                                LogicVRegister dst,
6930
                                const LogicPRegister& pg,
6931
                                const LogicVRegister& src,
6932
                                FPRounding round,
6933
                                int fbits) {
6934
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6935
  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6936
  dst.ClearForWrite(vform);
6937

6938
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6939
    if (!pg.IsActive(vform, i)) continue;
6940

6941
    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6942
                                               0,
6943
                                               src.Uint(vform, i));
6944

6945
    switch (dst_data_size_in_bits) {
6946
      case kHRegSize: {
6947
        SimFloat16 result = UFixedToFloat16(value, fbits, round);
6948
        dst.SetUint(vform, i, Float16ToRawbits(result));
6949
        break;
6950
      }
6951
      case kSRegSize: {
6952
        float result = UFixedToFloat(value, fbits, round);
6953
        dst.SetUint(vform, i, FloatToRawbits(result));
6954
        break;
6955
      }
6956
      case kDRegSize: {
6957
        double result = UFixedToDouble(value, fbits, round);
6958
        dst.SetUint(vform, i, DoubleToRawbits(result));
6959
        break;
6960
      }
6961
      default:
6962
        VIXL_UNIMPLEMENTED();
6963
        break;
6964
    }
6965
  }
6966

6967
  return dst;
6968
}
6969

6970
LogicVRegister Simulator::ucvtf(VectorFormat vform,
6971
                                LogicVRegister dst,
6972
                                const LogicVRegister& src,
6973
                                int fbits,
6974
                                FPRounding round) {
6975
  return ucvtf(vform,
6976
               LaneSizeInBitsFromFormat(vform),
6977
               LaneSizeInBitsFromFormat(vform),
6978
               dst,
6979
               GetPTrue(),
6980
               src,
6981
               round,
6982
               fbits);
6983
}
6984

6985
LogicVRegister Simulator::unpk(VectorFormat vform,
6986
                               LogicVRegister dst,
6987
                               const LogicVRegister& src,
6988
                               UnpackType unpack_type,
6989
                               ExtendType extend_type) {
6990
  VectorFormat vform_half = VectorFormatHalfWidth(vform);
6991
  const int lane_count = LaneCountFromFormat(vform);
6992
  const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6993

6994
  switch (extend_type) {
6995
    case kSignedExtend: {
6996
      int64_t result[kZRegMaxSizeInBytes];
6997
      for (int i = 0; i < lane_count; ++i) {
6998
        result[i] = src.Int(vform_half, i + src_start_lane);
6999
      }
7000
      for (int i = 0; i < lane_count; ++i) {
7001
        dst.SetInt(vform, i, result[i]);
7002
      }
7003
      break;
7004
    }
7005
    case kUnsignedExtend: {
7006
      uint64_t result[kZRegMaxSizeInBytes];
7007
      for (int i = 0; i < lane_count; ++i) {
7008
        result[i] = src.Uint(vform_half, i + src_start_lane);
7009
      }
7010
      for (int i = 0; i < lane_count; ++i) {
7011
        dst.SetUint(vform, i, result[i]);
7012
      }
7013
      break;
7014
    }
7015
    default:
7016
      VIXL_UNREACHABLE();
7017
  }
7018
  return dst;
7019
}
7020

7021
LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
7022
                                                     VectorFormat vform,
7023
                                                     LogicPRegister dst,
7024
                                                     const LogicPRegister& mask,
7025
                                                     const LogicVRegister& src1,
7026
                                                     const LogicVRegister& src2,
7027
                                                     bool is_wide_elements,
7028
                                                     FlagsUpdate flags) {
7029
  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7030
    bool result = false;
7031
    if (mask.IsActive(vform, lane)) {
7032
      int64_t op1 = 0xbadbeef;
7033
      int64_t op2 = 0xbadbeef;
7034
      int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7035
      switch (cond) {
7036
        case eq:
7037
        case ge:
7038
        case gt:
7039
        case lt:
7040
        case le:
7041
        case ne:
7042
          op1 = src1.Int(vform, lane);
7043
          op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7044
                                 : src2.Int(vform, lane);
7045
          break;
7046
        case hi:
7047
        case hs:
7048
        case ls:
7049
        case lo:
7050
          op1 = src1.Uint(vform, lane);
7051
          op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7052
                                 : src2.Uint(vform, lane);
7053
          break;
7054
        default:
7055
          VIXL_UNREACHABLE();
7056
      }
7057

7058
      switch (cond) {
7059
        case eq:
7060
          result = (op1 == op2);
7061
          break;
7062
        case ne:
7063
          result = (op1 != op2);
7064
          break;
7065
        case ge:
7066
          result = (op1 >= op2);
7067
          break;
7068
        case gt:
7069
          result = (op1 > op2);
7070
          break;
7071
        case le:
7072
          result = (op1 <= op2);
7073
          break;
7074
        case lt:
7075
          result = (op1 < op2);
7076
          break;
7077
        case hs:
7078
          result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7079
          break;
7080
        case hi:
7081
          result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7082
          break;
7083
        case ls:
7084
          result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7085
          break;
7086
        case lo:
7087
          result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7088
          break;
7089
        default:
7090
          VIXL_UNREACHABLE();
7091
      }
7092
    }
7093
    dst.SetActive(vform, lane, result);
7094
  }
7095

7096
  if (flags == SetFlags) PredTest(vform, mask, dst);
7097

7098
  return dst;
7099
}
7100

7101
LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7102
                                                VectorFormat vform,
7103
                                                LogicVRegister dst,
7104
                                                const LogicVRegister& src1,
7105
                                                const LogicVRegister& src2,
7106
                                                bool is_wide_elements) {
7107
  unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7108
  VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7109

7110
  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7111
    int shift_src_lane = lane;
7112
    if (is_wide_elements) {
7113
      // If the shift amount comes from wide elements, select the D-sized lane
7114
      // which occupies the corresponding lanes of the value to be shifted.
7115
      shift_src_lane = (lane * lane_size) / kDRegSize;
7116
    }
7117
    uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7118

7119
    // Saturate shift_amount to the size of the lane that will be shifted.
7120
    if (shift_amount > lane_size) shift_amount = lane_size;
7121

7122
    uint64_t value = src1.Uint(vform, lane);
7123
    int64_t result = ShiftOperand(lane_size,
7124
                                  value,
7125
                                  shift_op,
7126
                                  static_cast<unsigned>(shift_amount));
7127
    dst.SetUint(vform, lane, result);
7128
  }
7129

7130
  return dst;
7131
}
7132

7133
LogicVRegister Simulator::asrd(VectorFormat vform,
7134
                               LogicVRegister dst,
7135
                               const LogicVRegister& src1,
7136
                               int shift) {
7137
  VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7138
                              LaneSizeInBitsFromFormat(vform)));
7139

7140
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7141
    int64_t value = src1.Int(vform, i);
7142
    if (shift <= 63) {
7143
      if (value < 0) {
7144
        // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7145
        // cast to int64_t, and cannot cause signed overflow in the result.
7146
        value = value + GetUintMask(shift);
7147
      }
7148
      value = ShiftOperand(kDRegSize, value, ASR, shift);
7149
    } else {
7150
      value = 0;
7151
    }
7152
    dst.SetInt(vform, i, value);
7153
  }
7154
  return dst;
7155
}
7156

7157
LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7158
    LogicalOp logical_op,
7159
    VectorFormat vform,
7160
    LogicVRegister zd,
7161
    const LogicVRegister& zn,
7162
    const LogicVRegister& zm) {
7163
  VIXL_ASSERT(IsSVEFormat(vform));
7164
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7165
    uint64_t op1 = zn.Uint(vform, i);
7166
    uint64_t op2 = zm.Uint(vform, i);
7167
    uint64_t result = 0;
7168
    switch (logical_op) {
7169
      case AND:
7170
        result = op1 & op2;
7171
        break;
7172
      case BIC:
7173
        result = op1 & ~op2;
7174
        break;
7175
      case EOR:
7176
        result = op1 ^ op2;
7177
        break;
7178
      case ORR:
7179
        result = op1 | op2;
7180
        break;
7181
      default:
7182
        VIXL_UNIMPLEMENTED();
7183
    }
7184
    zd.SetUint(vform, i, result);
7185
  }
7186

7187
  return zd;
7188
}
7189

7190
LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7191
                                                    LogicPRegister pd,
7192
                                                    const LogicPRegister& pn,
7193
                                                    const LogicPRegister& pm) {
7194
  for (int i = 0; i < pn.GetChunkCount(); i++) {
7195
    LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7196
    LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7197
    LogicPRegister::ChunkType result = 0;
7198
    switch (op) {
7199
      case ANDS_p_p_pp_z:
7200
      case AND_p_p_pp_z:
7201
        result = op1 & op2;
7202
        break;
7203
      case BICS_p_p_pp_z:
7204
      case BIC_p_p_pp_z:
7205
        result = op1 & ~op2;
7206
        break;
7207
      case EORS_p_p_pp_z:
7208
      case EOR_p_p_pp_z:
7209
        result = op1 ^ op2;
7210
        break;
7211
      case NANDS_p_p_pp_z:
7212
      case NAND_p_p_pp_z:
7213
        result = ~(op1 & op2);
7214
        break;
7215
      case NORS_p_p_pp_z:
7216
      case NOR_p_p_pp_z:
7217
        result = ~(op1 | op2);
7218
        break;
7219
      case ORNS_p_p_pp_z:
7220
      case ORN_p_p_pp_z:
7221
        result = op1 | ~op2;
7222
        break;
7223
      case ORRS_p_p_pp_z:
7224
      case ORR_p_p_pp_z:
7225
        result = op1 | op2;
7226
        break;
7227
      default:
7228
        VIXL_UNIMPLEMENTED();
7229
    }
7230
    pd.SetChunk(i, result);
7231
  }
7232
  return pd;
7233
}
7234

7235
LogicVRegister Simulator::SVEBitwiseImmHelper(
7236
    SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7237
    VectorFormat vform,
7238
    LogicVRegister zd,
7239
    uint64_t imm) {
7240
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7241
    uint64_t op1 = zd.Uint(vform, i);
7242
    uint64_t result = 0;
7243
    switch (op) {
7244
      case AND_z_zi:
7245
        result = op1 & imm;
7246
        break;
7247
      case EOR_z_zi:
7248
        result = op1 ^ imm;
7249
        break;
7250
      case ORR_z_zi:
7251
        result = op1 | imm;
7252
        break;
7253
      default:
7254
        VIXL_UNIMPLEMENTED();
7255
    }
7256
    zd.SetUint(vform, i, result);
7257
  }
7258

7259
  return zd;
7260
}
7261

7262
void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7263
                                         const LogicPRegister& pg,
7264
                                         unsigned zt_code,
7265
                                         const LogicSVEAddressVector& addr) {
7266
  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7267

7268
  int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7269
  int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7270
  int msize_in_bytes = addr.GetMsizeInBytes();
7271
  int reg_count = addr.GetRegCount();
7272

7273
  VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7274
  VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7275

7276
  unsigned zt_codes[4] = {zt_code,
7277
                          (zt_code + 1) % kNumberOfZRegisters,
7278
                          (zt_code + 2) % kNumberOfZRegisters,
7279
                          (zt_code + 3) % kNumberOfZRegisters};
7280

7281
  LogicVRegister zt[4] = {
7282
      ReadVRegister(zt_codes[0]),
7283
      ReadVRegister(zt_codes[1]),
7284
      ReadVRegister(zt_codes[2]),
7285
      ReadVRegister(zt_codes[3]),
7286
  };
7287

7288
  // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7289
  // are ignored, so read the source register using the VectorFormat that
7290
  // corresponds with the storage format, and multiply the index accordingly.
7291
  VectorFormat unpack_vform =
7292
      SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7293
  int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7294

7295
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7296
    if (!pg.IsActive(vform, i)) continue;
7297

7298
    for (int r = 0; r < reg_count; r++) {
7299
      uint64_t element_address = addr.GetElementAddress(i, r);
7300
      if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
7301
        return;
7302
      }
7303
    }
7304
  }
7305

7306
  if (ShouldTraceWrites()) {
7307
    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7308
    if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7309
      // Use an FP format where it's likely that we're accessing FP data.
7310
      format = GetPrintRegisterFormatTryFP(format);
7311
    }
7312
    // Stores don't represent a change to the source register's value, so only
7313
    // print the relevant part of the value.
7314
    format = GetPrintRegPartial(format);
7315

7316
    PrintZStructAccess(zt_code,
7317
                       reg_count,
7318
                       pg,
7319
                       format,
7320
                       msize_in_bytes,
7321
                       "->",
7322
                       addr);
7323
  }
7324
}
7325

7326
bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7327
                                        const LogicPRegister& pg,
7328
                                        unsigned zt_code,
7329
                                        const LogicSVEAddressVector& addr,
7330
                                        bool is_signed) {
7331
  int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7332
  int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7333
  int msize_in_bytes = addr.GetMsizeInBytes();
7334
  int reg_count = addr.GetRegCount();
7335

7336
  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7337
  VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7338
  VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7339

7340
  unsigned zt_codes[4] = {zt_code,
7341
                          (zt_code + 1) % kNumberOfZRegisters,
7342
                          (zt_code + 2) % kNumberOfZRegisters,
7343
                          (zt_code + 3) % kNumberOfZRegisters};
7344
  LogicVRegister zt[4] = {
7345
      ReadVRegister(zt_codes[0]),
7346
      ReadVRegister(zt_codes[1]),
7347
      ReadVRegister(zt_codes[2]),
7348
      ReadVRegister(zt_codes[3]),
7349
  };
7350

7351
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7352
    for (int r = 0; r < reg_count; r++) {
7353
      uint64_t element_address = addr.GetElementAddress(i, r);
7354

7355
      if (!pg.IsActive(vform, i)) {
7356
        zt[r].SetUint(vform, i, 0);
7357
        continue;
7358
      }
7359

7360
      if (is_signed) {
7361
        if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7362
          return false;
7363
        }
7364
      } else {
7365
        if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7366
          return false;
7367
        }
7368
      }
7369
    }
7370
  }
7371

7372
  if (ShouldTraceVRegs()) {
7373
    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7374
    if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7375
      // Use an FP format where it's likely that we're accessing FP data.
7376
      format = GetPrintRegisterFormatTryFP(format);
7377
    }
7378
    PrintZStructAccess(zt_code,
7379
                       reg_count,
7380
                       pg,
7381
                       format,
7382
                       msize_in_bytes,
7383
                       "<-",
7384
                       addr);
7385
  }
7386
  return true;
7387
}
7388

7389
LogicPRegister Simulator::brka(LogicPRegister pd,
7390
                               const LogicPRegister& pg,
7391
                               const LogicPRegister& pn) {
7392
  bool break_ = false;
7393
  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7394
    if (pg.IsActive(kFormatVnB, i)) {
7395
      pd.SetActive(kFormatVnB, i, !break_);
7396
      break_ |= pn.IsActive(kFormatVnB, i);
7397
    }
7398
  }
7399

7400
  return pd;
7401
}
7402

7403
LogicPRegister Simulator::brkb(LogicPRegister pd,
7404
                               const LogicPRegister& pg,
7405
                               const LogicPRegister& pn) {
7406
  bool break_ = false;
7407
  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7408
    if (pg.IsActive(kFormatVnB, i)) {
7409
      break_ |= pn.IsActive(kFormatVnB, i);
7410
      pd.SetActive(kFormatVnB, i, !break_);
7411
    }
7412
  }
7413

7414
  return pd;
7415
}
7416

7417
LogicPRegister Simulator::brkn(LogicPRegister pdm,
7418
                               const LogicPRegister& pg,
7419
                               const LogicPRegister& pn) {
7420
  if (!IsLastActive(kFormatVnB, pg, pn)) {
7421
    pfalse(pdm);
7422
  }
7423
  return pdm;
7424
}
7425

7426
LogicPRegister Simulator::brkpa(LogicPRegister pd,
7427
                                const LogicPRegister& pg,
7428
                                const LogicPRegister& pn,
7429
                                const LogicPRegister& pm) {
7430
  bool last_active = IsLastActive(kFormatVnB, pg, pn);
7431

7432
  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7433
    bool active = false;
7434
    if (pg.IsActive(kFormatVnB, i)) {
7435
      active = last_active;
7436
      last_active = last_active && !pm.IsActive(kFormatVnB, i);
7437
    }
7438
    pd.SetActive(kFormatVnB, i, active);
7439
  }
7440

7441
  return pd;
7442
}
7443

7444
LogicPRegister Simulator::brkpb(LogicPRegister pd,
7445
                                const LogicPRegister& pg,
7446
                                const LogicPRegister& pn,
7447
                                const LogicPRegister& pm) {
7448
  bool last_active = IsLastActive(kFormatVnB, pg, pn);
7449

7450
  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7451
    bool active = false;
7452
    if (pg.IsActive(kFormatVnB, i)) {
7453
      last_active = last_active && !pm.IsActive(kFormatVnB, i);
7454
      active = last_active;
7455
    }
7456
    pd.SetActive(kFormatVnB, i, active);
7457
  }
7458

7459
  return pd;
7460
}
7461

7462
void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7463
                                           const LogicPRegister& pg,
7464
                                           unsigned zt_code,
7465
                                           const LogicSVEAddressVector& addr,
7466
                                           SVEFaultTolerantLoadType type,
7467
                                           bool is_signed) {
7468
  int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7469
  int msize_in_bits = addr.GetMsizeInBits();
7470
  int msize_in_bytes = addr.GetMsizeInBytes();
7471

7472
  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7473
  VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7474
  VIXL_ASSERT(addr.GetRegCount() == 1);
7475

7476
  LogicVRegister zt = ReadVRegister(zt_code);
7477
  LogicPRegister ffr = ReadFFR();
7478

7479
  // Non-faulting loads are allowed to fail arbitrarily. To stress user
7480
  // code, fail a random element in roughly one in eight full-vector loads.
7481
  uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7482
  int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7483

7484
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7485
    uint64_t value = 0;
7486

7487
    if (pg.IsActive(vform, i)) {
7488
      uint64_t element_address = addr.GetElementAddress(i, 0);
7489

7490
      if (type == kSVEFirstFaultLoad) {
7491
        // First-faulting loads always load the first active element, regardless
7492
        // of FFR. The result will be discarded if its FFR lane is inactive, but
7493
        // it could still generate a fault.
7494
        VIXL_DEFINE_OR_RETURN(mem_result,
7495
                              MemReadUint(msize_in_bytes, element_address));
7496
        value = mem_result;
7497
        // All subsequent elements have non-fault semantics.
7498
        type = kSVENonFaultLoad;
7499

7500
      } else if (ffr.IsActive(vform, i)) {
7501
        // Simulation of fault-tolerant loads relies on system calls, and is
7502
        // likely to be relatively slow, so we only actually perform the load if
7503
        // its FFR lane is active.
7504

7505
        bool can_read = (i < fake_fault_at_lane) &&
7506
                        CanReadMemory(element_address, msize_in_bytes);
7507
        if (can_read) {
7508
          VIXL_DEFINE_OR_RETURN(mem_result,
7509
                                MemReadUint(msize_in_bytes, element_address));
7510
          value = mem_result;
7511
        } else {
7512
          // Propagate the fault to the end of FFR.
7513
          for (int j = i; j < LaneCountFromFormat(vform); j++) {
7514
            ffr.SetActive(vform, j, false);
7515
          }
7516
        }
7517
      }
7518
    }
7519

7520
    // The architecture permits a few possible results for inactive FFR lanes
7521
    // (including those caused by a fault in this instruction). We choose to
7522
    // leave the register value unchanged (like merging predication) because
7523
    // no other input to this instruction can have the same behaviour.
7524
    //
7525
    // Note that this behaviour takes precedence over pg's zeroing predication.
7526

7527
    if (ffr.IsActive(vform, i)) {
7528
      int msb = msize_in_bits - 1;
7529
      if (is_signed) {
7530
        zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7531
      } else {
7532
        zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7533
      }
7534
    }
7535
  }
7536

7537
  if (ShouldTraceVRegs()) {
7538
    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7539
    if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7540
      // Use an FP format where it's likely that we're accessing FP data.
7541
      format = GetPrintRegisterFormatTryFP(format);
7542
    }
7543
    // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7544
    // expects a single mask, so combine the two predicates.
7545
    SimPRegister mask;
7546
    SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7547
    PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7548
  }
7549
}
7550

7551
void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7552
                                                    VectorFormat vform,
7553
                                                    SVEOffsetModifier mod) {
7554
  bool is_signed = instr->ExtractBit(14) == 0;
7555
  bool is_ff = instr->ExtractBit(13) == 1;
7556
  // Note that these instructions don't use the Dtype encoding.
7557
  int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7558
  int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7559
  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7560
  LogicSVEAddressVector addr(base,
7561
                             &ReadVRegister(instr->GetRm()),
7562
                             vform,
7563
                             mod,
7564
                             scale);
7565
  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7566
  if (is_ff) {
7567
    SVEFaultTolerantLoadHelper(vform,
7568
                               ReadPRegister(instr->GetPgLow8()),
7569
                               instr->GetRt(),
7570
                               addr,
7571
                               kSVEFirstFaultLoad,
7572
                               is_signed);
7573
  } else {
7574
    SVEStructuredLoadHelper(vform,
7575
                            ReadPRegister(instr->GetPgLow8()),
7576
                            instr->GetRt(),
7577
                            addr,
7578
                            is_signed);
7579
  }
7580
}
7581

7582
int Simulator::GetFirstActive(VectorFormat vform,
7583
                              const LogicPRegister& pg) const {
7584
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7585
    if (pg.IsActive(vform, i)) return i;
7586
  }
7587
  return -1;
7588
}
7589

7590
int Simulator::GetLastActive(VectorFormat vform,
7591
                             const LogicPRegister& pg) const {
7592
  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7593
    if (pg.IsActive(vform, i)) return i;
7594
  }
7595
  return -1;
7596
}
7597

7598
int Simulator::CountActiveLanes(VectorFormat vform,
7599
                                const LogicPRegister& pg) const {
7600
  int count = 0;
7601
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7602
    count += pg.IsActive(vform, i) ? 1 : 0;
7603
  }
7604
  return count;
7605
}
7606

7607
int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7608
                                       const LogicPRegister& pg,
7609
                                       const LogicPRegister& pn) const {
7610
  int count = 0;
7611
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7612
    count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7613
  }
7614
  return count;
7615
}
7616

7617
int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7618
                                               int pattern) const {
7619
  VIXL_ASSERT(IsSVEFormat(vform));
7620
  int all = LaneCountFromFormat(vform);
7621
  VIXL_ASSERT(all > 0);
7622

7623
  switch (pattern) {
7624
    case SVE_VL1:
7625
    case SVE_VL2:
7626
    case SVE_VL3:
7627
    case SVE_VL4:
7628
    case SVE_VL5:
7629
    case SVE_VL6:
7630
    case SVE_VL7:
7631
    case SVE_VL8:
7632
      // VL1-VL8 are encoded directly.
7633
      VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7634
      VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7635
      return (pattern <= all) ? pattern : 0;
7636
    case SVE_VL16:
7637
    case SVE_VL32:
7638
    case SVE_VL64:
7639
    case SVE_VL128:
7640
    case SVE_VL256: {
7641
      // VL16-VL256 are encoded as log2(N) + c.
7642
      int min = 16 << (pattern - SVE_VL16);
7643
      return (min <= all) ? min : 0;
7644
    }
7645
    // Special cases.
7646
    case SVE_POW2:
7647
      return 1 << HighestSetBitPosition(all);
7648
    case SVE_MUL4:
7649
      return all - (all % 4);
7650
    case SVE_MUL3:
7651
      return all - (all % 3);
7652
    case SVE_ALL:
7653
      return all;
7654
  }
7655
  // Unnamed cases architecturally return 0.
7656
  return 0;
7657
}
7658

7659
LogicPRegister Simulator::match(VectorFormat vform,
7660
                                LogicPRegister dst,
7661
                                const LogicVRegister& haystack,
7662
                                const LogicVRegister& needles,
7663
                                bool negate_match) {
7664
  SimVRegister ztemp;
7665
  SimPRegister ptemp;
7666

7667
  pfalse(dst);
7668
  int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7669
  for (int i = 0; i < lanes_per_segment; i++) {
7670
    dup_elements_to_segments(vform, ztemp, needles, i);
7671
    SVEIntCompareVectorsHelper(eq,
7672
                               vform,
7673
                               ptemp,
7674
                               GetPTrue(),
7675
                               haystack,
7676
                               ztemp,
7677
                               false,
7678
                               LeaveFlags);
7679
    SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7680
  }
7681
  if (negate_match) {
7682
    ptrue(vform, ptemp, SVE_ALL);
7683
    SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7684
  }
7685
  return dst;
7686
}
7687

7688
uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7689
  if (IsContiguous()) {
7690
    return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7691
  }
7692

7693
  VIXL_ASSERT(IsScatterGather());
7694
  VIXL_ASSERT(vector_ != NULL);
7695

7696
  // For scatter-gather accesses, we need to extract the offset from vector_,
7697
  // and apply modifiers.
7698

7699
  uint64_t offset = 0;
7700
  switch (vector_form_) {
7701
    case kFormatVnS:
7702
      offset = vector_->GetLane<uint32_t>(lane);
7703
      break;
7704
    case kFormatVnD:
7705
      offset = vector_->GetLane<uint64_t>(lane);
7706
      break;
7707
    default:
7708
      VIXL_UNIMPLEMENTED();
7709
      break;
7710
  }
7711

7712
  switch (vector_mod_) {
7713
    case SVE_MUL_VL:
7714
      VIXL_UNIMPLEMENTED();
7715
      break;
7716
    case SVE_LSL:
7717
      // We apply the shift below. There's nothing to do here.
7718
      break;
7719
    case NO_SVE_OFFSET_MODIFIER:
7720
      VIXL_ASSERT(vector_shift_ == 0);
7721
      break;
7722
    case SVE_UXTW:
7723
      offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7724
      break;
7725
    case SVE_SXTW:
7726
      offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7727
      break;
7728
  }
7729

7730
  return base_ + (offset << vector_shift_);
7731
}
7732

7733
LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7734
                                            LogicVRegister dst,
7735
                                            const LogicVRegister& src) {
7736
  SimVRegister zero;
7737
  zero.Clear();
7738
  return uzp2(vform, dst, src, zero);
7739
}
7740

7741
LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7742
                                             LogicVRegister dst,
7743
                                             const LogicVRegister& src) {
7744
  SimVRegister zero;
7745
  zero.Clear();
7746
  return uzp1(vform, dst, src, zero);
7747
}
7748

7749
LogicVRegister Simulator::adcl(VectorFormat vform,
7750
                               LogicVRegister dst,
7751
                               const LogicVRegister& src1,
7752
                               const LogicVRegister& src2,
7753
                               bool top) {
7754
  unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7755
  VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7756

7757
  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7758
    uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7759
    uint64_t right = dst.Uint(vform, i);
7760
    unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7761
    std::pair<uint64_t, uint8_t> val_and_flags =
7762
        AddWithCarry(reg_size, left, right, carry_in);
7763

7764
    // Set even lanes to the result of the addition.
7765
    dst.SetUint(vform, i, val_and_flags.first);
7766

7767
    // Set odd lanes to the carry flag from the addition.
7768
    uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7769
    dst.SetUint(vform, i + 1, carry_out);
7770
  }
7771
  return dst;
7772
}
7773

7774
// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7775
// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7776
//
7777
// Matrices of the form:
7778
//
7779
//  src1 = ( a b c d e f g h )  src2 = ( A B )
7780
//         ( i j k l m n o p )         ( C D )
7781
//                                     ( E F )
7782
//                                     ( G H )
7783
//                                     ( I J )
7784
//                                     ( K L )
7785
//                                     ( M N )
7786
//                                     ( O P )
7787
//
7788
// Are stored in the input vector registers as:
7789
//
7790
//           15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
7791
//  src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7792
//  src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7793
//
7794
LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7795
                                 LogicVRegister srcdst,
7796
                                 const LogicVRegister& src1,
7797
                                 const LogicVRegister& src2,
7798
                                 bool src1_signed,
7799
                                 bool src2_signed) {
7800
  // Two destination forms are supported: Q register containing four S-sized
7801
  // elements (4S) and Z register containing n S-sized elements (VnS).
7802
  VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7803
  VectorFormat vform_src = kFormatVnB;
7804
  int b_per_segment = kQRegSize / kBRegSize;
7805
  int s_per_segment = kQRegSize / kSRegSize;
7806
  int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7807
  int segment_count = LaneCountFromFormat(vform_dst) / 4;
7808
  for (int seg = 0; seg < segment_count; seg++) {
7809
    for (int i = 0; i < 2; i++) {
7810
      for (int j = 0; j < 2; j++) {
7811
        int dstidx = (2 * i) + j + (seg * s_per_segment);
7812
        int64_t sum = srcdst.Int(vform_dst, dstidx);
7813
        for (int k = 0; k < 8; k++) {
7814
          int idx1 = (8 * i) + k + (seg * b_per_segment);
7815
          int idx2 = (8 * j) + k + (seg * b_per_segment);
7816
          int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7817
                                   : src1.Uint(vform_src, idx1);
7818
          int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7819
                                   : src2.Uint(vform_src, idx2);
7820
          sum += e1 * e2;
7821
        }
7822
        result[dstidx] = sum;
7823
      }
7824
    }
7825
  }
7826
  srcdst.SetIntArray(vform_dst, result);
7827
  return srcdst;
7828
}
7829

7830
// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7831
// result to the matrix in srcdst, and write back to srcdst.
7832
//
7833
// Matrices of the form:
7834
//
7835
//  src1 = ( a b )  src2 = ( A B )
7836
//         ( c d )         ( C D )
7837
//
7838
// Are stored in the input vector registers as:
7839
//
7840
//           3   2   1   0
7841
//  src1 = [ d | c | b | a ]
7842
//  src2 = [ D | B | C | A ]
7843
//
7844
template <typename T>
7845
LogicVRegister Simulator::fmatmul(VectorFormat vform,
7846
                                  LogicVRegister srcdst,
7847
                                  const LogicVRegister& src1,
7848
                                  const LogicVRegister& src2) {
7849
  T result[kZRegMaxSizeInBytes / sizeof(T)];
7850
  int T_per_segment = 4;
7851
  int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7852
  for (int seg = 0; seg < segment_count; seg++) {
7853
    int segoff = seg * T_per_segment;
7854
    for (int i = 0; i < 2; i++) {
7855
      for (int j = 0; j < 2; j++) {
7856
        T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7857
                            src2.Float<T>(2 * j + 0 + segoff));
7858
        T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7859
                            src2.Float<T>(2 * j + 1 + segoff));
7860
        T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7861
        result[2 * i + j + segoff] = FPAdd(sum, prod1);
7862
      }
7863
    }
7864
  }
7865
  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7866
    // Elements outside a multiple of 4T are set to zero. This happens only
7867
    // for double precision operations, when the VL is a multiple of 128 bits,
7868
    // but not a multiple of 256 bits.
7869
    T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7870
    srcdst.SetFloat<T>(vform, i, value);
7871
  }
7872
  return srcdst;
7873
}
7874

7875
LogicVRegister Simulator::fmatmul(VectorFormat vform,
7876
                                  LogicVRegister dst,
7877
                                  const LogicVRegister& src1,
7878
                                  const LogicVRegister& src2) {
7879
  if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7880
    fmatmul<float>(vform, dst, src1, src2);
7881
  } else {
7882
    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7883
    fmatmul<double>(vform, dst, src1, src2);
7884
  }
7885
  return dst;
7886
}
7887

7888
}  // namespace aarch64
7889
}  // namespace vixl
7890

7891
#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
7892

7893
Product

Resources

Company