Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/vixl/src/aarch64/cpu-aarch64.cc
4261 views
1
// Copyright 2015, VIXL authors
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are met:
6
//
7
// * Redistributions of source code must retain the above copyright notice,
8
// this list of conditions and the following disclaimer.
9
// * Redistributions in binary form must reproduce the above copyright notice,
10
// this list of conditions and the following disclaimer in the documentation
11
// and/or other materials provided with the distribution.
12
// * Neither the name of ARM Limited nor the names of its contributors may be
13
// used to endorse or promote products derived from this software without
14
// specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28
#include <sys/auxv.h>
29
#define VIXL_USE_LINUX_HWCAP 1
30
#endif
31
32
#include "../utils-vixl.h"
33
34
#include "cpu-aarch64.h"
35
36
namespace vixl {
37
namespace aarch64 {
38
39
40
const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41
const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42
const IDRegister::Field AA64PFR0::kRAS(28);
43
const IDRegister::Field AA64PFR0::kSVE(32);
44
const IDRegister::Field AA64PFR0::kDIT(48);
45
const IDRegister::Field AA64PFR0::kCSV2(56);
46
const IDRegister::Field AA64PFR0::kCSV3(60);
47
48
const IDRegister::Field AA64PFR1::kBT(0);
49
const IDRegister::Field AA64PFR1::kSSBS(4);
50
const IDRegister::Field AA64PFR1::kMTE(8);
51
const IDRegister::Field AA64PFR1::kSME(24);
52
53
const IDRegister::Field AA64ISAR0::kAES(4);
54
const IDRegister::Field AA64ISAR0::kSHA1(8);
55
const IDRegister::Field AA64ISAR0::kSHA2(12);
56
const IDRegister::Field AA64ISAR0::kCRC32(16);
57
const IDRegister::Field AA64ISAR0::kAtomic(20);
58
const IDRegister::Field AA64ISAR0::kRDM(28);
59
const IDRegister::Field AA64ISAR0::kSHA3(32);
60
const IDRegister::Field AA64ISAR0::kSM3(36);
61
const IDRegister::Field AA64ISAR0::kSM4(40);
62
const IDRegister::Field AA64ISAR0::kDP(44);
63
const IDRegister::Field AA64ISAR0::kFHM(48);
64
const IDRegister::Field AA64ISAR0::kTS(52);
65
const IDRegister::Field AA64ISAR0::kRNDR(60);
66
67
const IDRegister::Field AA64ISAR1::kDPB(0);
68
const IDRegister::Field AA64ISAR1::kAPA(4);
69
const IDRegister::Field AA64ISAR1::kAPI(8);
70
const IDRegister::Field AA64ISAR1::kJSCVT(12);
71
const IDRegister::Field AA64ISAR1::kFCMA(16);
72
const IDRegister::Field AA64ISAR1::kLRCPC(20);
73
const IDRegister::Field AA64ISAR1::kGPA(24);
74
const IDRegister::Field AA64ISAR1::kGPI(28);
75
const IDRegister::Field AA64ISAR1::kFRINTTS(32);
76
const IDRegister::Field AA64ISAR1::kSB(36);
77
const IDRegister::Field AA64ISAR1::kSPECRES(40);
78
const IDRegister::Field AA64ISAR1::kBF16(44);
79
const IDRegister::Field AA64ISAR1::kDGH(48);
80
const IDRegister::Field AA64ISAR1::kI8MM(52);
81
82
const IDRegister::Field AA64ISAR2::kWFXT(0);
83
const IDRegister::Field AA64ISAR2::kRPRES(4);
84
const IDRegister::Field AA64ISAR2::kMOPS(16);
85
const IDRegister::Field AA64ISAR2::kCSSC(52);
86
87
const IDRegister::Field AA64MMFR0::kECV(60);
88
89
const IDRegister::Field AA64MMFR1::kLO(16);
90
const IDRegister::Field AA64MMFR1::kAFP(44);
91
92
const IDRegister::Field AA64MMFR2::kAT(32);
93
94
const IDRegister::Field AA64ZFR0::kSVEver(0);
95
const IDRegister::Field AA64ZFR0::kAES(4);
96
const IDRegister::Field AA64ZFR0::kBitPerm(16);
97
const IDRegister::Field AA64ZFR0::kBF16(20);
98
const IDRegister::Field AA64ZFR0::kSHA3(32);
99
const IDRegister::Field AA64ZFR0::kSM4(40);
100
const IDRegister::Field AA64ZFR0::kI8MM(44);
101
const IDRegister::Field AA64ZFR0::kF32MM(52);
102
const IDRegister::Field AA64ZFR0::kF64MM(56);
103
104
const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
105
const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
106
const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
107
const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
108
const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
109
const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
110
const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
111
112
CPUFeatures AA64PFR0::GetCPUFeatures() const {
113
CPUFeatures f;
114
if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
115
if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
116
if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
117
if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
118
if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
119
if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
120
if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
121
if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
122
if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
123
if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
124
return f;
125
}
126
127
CPUFeatures AA64PFR1::GetCPUFeatures() const {
128
CPUFeatures f;
129
if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
130
if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
131
if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
132
if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
133
if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
134
if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
135
if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
136
return f;
137
}
138
139
CPUFeatures AA64ISAR0::GetCPUFeatures() const {
140
CPUFeatures f;
141
if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
142
if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
143
if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
144
if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
145
if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
146
if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
147
if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
148
if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
149
if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
150
if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
151
if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
152
if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
153
if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
154
if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
155
if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
156
if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
157
return f;
158
}
159
160
CPUFeatures AA64ISAR1::GetCPUFeatures() const {
161
CPUFeatures f;
162
if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
163
if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
164
if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
165
if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
166
if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
167
if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
168
if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
169
if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
170
if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
171
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
172
if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
173
if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
174
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
175
176
// Only one of these fields should be non-zero, but they have the same
177
// encodings, so merge the logic.
178
int apx = std::max(Get(kAPI), Get(kAPA));
179
if (apx >= 1) {
180
f.Combine(CPUFeatures::kPAuth);
181
// APA (rather than API) indicates QARMA.
182
if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
183
if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
184
if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
185
if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
186
if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
187
}
188
189
if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
190
if (Get(kGPA) >= 1) {
191
f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
192
}
193
return f;
194
}
195
196
CPUFeatures AA64ISAR2::GetCPUFeatures() const {
197
CPUFeatures f;
198
if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
199
if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
200
if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
201
if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
202
return f;
203
}
204
205
CPUFeatures AA64MMFR0::GetCPUFeatures() const {
206
CPUFeatures f;
207
if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
208
return f;
209
}
210
211
CPUFeatures AA64MMFR1::GetCPUFeatures() const {
212
CPUFeatures f;
213
if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
214
if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
215
return f;
216
}
217
218
CPUFeatures AA64MMFR2::GetCPUFeatures() const {
219
CPUFeatures f;
220
if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
221
return f;
222
}
223
224
CPUFeatures AA64ZFR0::GetCPUFeatures() const {
225
// This register is only available with SVE, but reads-as-zero in its absence,
226
// so it's always safe to read it.
227
CPUFeatures f;
228
if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
229
if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
230
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
231
if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
232
if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
233
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
234
if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
235
if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
236
if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
237
if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
238
if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
239
return f;
240
}
241
242
CPUFeatures AA64SMFR0::GetCPUFeatures() const {
243
CPUFeatures f;
244
if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
245
if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
246
if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
247
if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
248
if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
249
if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
250
if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
251
return f;
252
}
253
254
int IDRegister::Get(IDRegister::Field field) const {
255
int msb = field.GetMsb();
256
int lsb = field.GetLsb();
257
VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
258
(sizeof(int) * kBitsPerByte));
259
switch (field.GetType()) {
260
case Field::kSigned:
261
return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
262
case Field::kUnsigned:
263
return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
264
}
265
VIXL_UNREACHABLE();
266
return 0;
267
}
268
269
CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
270
CPUFeatures f;
271
#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
272
f.Combine(Read##NAME().GetCPUFeatures());
273
VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
274
#undef VIXL_COMBINE_ID_REG
275
return f;
276
}
277
278
CPUFeatures CPU::InferCPUFeaturesFromOS(
279
CPUFeatures::QueryIDRegistersOption option) {
280
CPUFeatures features;
281
282
#ifdef VIXL_USE_LINUX_HWCAP
283
// Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
284
// than explicit bits, but explicit bits allow us to identify features that
285
// the toolchain doesn't know about.
286
static const CPUFeatures::Feature kFeatureBitsLow[] =
287
{// Bits 0-7
288
CPUFeatures::kFP,
289
CPUFeatures::kNEON,
290
CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
291
CPUFeatures::kAES,
292
CPUFeatures::kPmull1Q,
293
CPUFeatures::kSHA1,
294
CPUFeatures::kSHA2,
295
CPUFeatures::kCRC32,
296
// Bits 8-15
297
CPUFeatures::kAtomics,
298
CPUFeatures::kFPHalf,
299
CPUFeatures::kNEONHalf,
300
CPUFeatures::kIDRegisterEmulation,
301
CPUFeatures::kRDM,
302
CPUFeatures::kJSCVT,
303
CPUFeatures::kFcma,
304
CPUFeatures::kRCpc,
305
// Bits 16-23
306
CPUFeatures::kDCPoP,
307
CPUFeatures::kSHA3,
308
CPUFeatures::kSM3,
309
CPUFeatures::kSM4,
310
CPUFeatures::kDotProduct,
311
CPUFeatures::kSHA512,
312
CPUFeatures::kSVE,
313
CPUFeatures::kFHM,
314
// Bits 24-31
315
CPUFeatures::kDIT,
316
CPUFeatures::kUSCAT,
317
CPUFeatures::kRCpcImm,
318
CPUFeatures::kFlagM,
319
CPUFeatures::kSSBSControl,
320
CPUFeatures::kSB,
321
CPUFeatures::kPAuth,
322
CPUFeatures::kPAuthGeneric};
323
VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
324
325
static const CPUFeatures::Feature kFeatureBitsHigh[] =
326
{// Bits 0-7
327
CPUFeatures::kDCCVADP,
328
CPUFeatures::kSVE2,
329
CPUFeatures::kSVEAES,
330
CPUFeatures::kSVEPmull128,
331
CPUFeatures::kSVEBitPerm,
332
CPUFeatures::kSVESHA3,
333
CPUFeatures::kSVESM4,
334
CPUFeatures::kAXFlag,
335
// Bits 8-15
336
CPUFeatures::kFrintToFixedSizedInt,
337
CPUFeatures::kSVEI8MM,
338
CPUFeatures::kSVEF32MM,
339
CPUFeatures::kSVEF64MM,
340
CPUFeatures::kSVEBF16,
341
CPUFeatures::kI8MM,
342
CPUFeatures::kBF16,
343
CPUFeatures::kDGH,
344
// Bits 16-23
345
CPUFeatures::kRNG,
346
CPUFeatures::kBTI,
347
CPUFeatures::kMTE,
348
CPUFeatures::kECV,
349
CPUFeatures::kAFP,
350
CPUFeatures::kRPRES,
351
CPUFeatures::kMTE3,
352
CPUFeatures::kSME,
353
// Bits 24-31
354
CPUFeatures::kSMEi16i64,
355
CPUFeatures::kSMEf64f64,
356
CPUFeatures::kSMEi8i32,
357
CPUFeatures::kSMEf16f32,
358
CPUFeatures::kSMEb16f32,
359
CPUFeatures::kSMEf32f32,
360
CPUFeatures::kSMEfa64,
361
CPUFeatures::kWFXT,
362
// Bits 32-39
363
CPUFeatures::kEBF16,
364
CPUFeatures::kSVE_EBF16};
365
VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
366
367
auto combine_features = [&features](uint64_t hwcap,
368
const CPUFeatures::Feature* feature_array,
369
size_t features_size) {
370
for (size_t i = 0; i < features_size; i++) {
371
if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
372
}
373
};
374
375
uint64_t hwcap_low = getauxval(AT_HWCAP);
376
uint64_t hwcap_high = getauxval(AT_HWCAP2);
377
378
combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
379
combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
380
381
// MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
382
if (features.Has(CPUFeatures::kMTE)) {
383
features.Combine(CPUFeatures::kMTEInstructions);
384
}
385
#endif // VIXL_USE_LINUX_HWCAP
386
387
if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
388
(features.Has(CPUFeatures::kIDRegisterEmulation))) {
389
features.Combine(InferCPUFeaturesFromIDRegisters());
390
}
391
return features;
392
}
393
394
395
#ifdef __aarch64__
396
#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
397
NAME CPU::Read##NAME() { \
398
uint64_t value = 0; \
399
__asm__("mrs %0, " MRS_ARG : "=r"(value)); \
400
return NAME(value); \
401
}
402
#else // __aarch64__
403
#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
404
NAME CPU::Read##NAME() { \
405
VIXL_UNREACHABLE(); \
406
return NAME(0); \
407
}
408
#endif // __aarch64__
409
410
VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
411
412
#undef VIXL_READ_ID_REG
413
414
415
// Initialise to smallest possible cache size.
416
unsigned CPU::dcache_line_size_ = 1;
417
unsigned CPU::icache_line_size_ = 1;
418
419
420
// Currently computes I and D cache line size.
421
void CPU::SetUp() {
422
uint32_t cache_type_register = GetCacheType();
423
424
// The cache type register holds information about the caches, including I
425
// D caches line size.
426
static const int kDCacheLineSizeShift = 16;
427
static const int kICacheLineSizeShift = 0;
428
static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
429
static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
430
431
// The cache type register holds the size of the I and D caches in words as
432
// a power of two.
433
uint32_t dcache_line_size_power_of_two =
434
(cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
435
uint32_t icache_line_size_power_of_two =
436
(cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
437
438
dcache_line_size_ = 4 << dcache_line_size_power_of_two;
439
icache_line_size_ = 4 << icache_line_size_power_of_two;
440
}
441
442
443
uint32_t CPU::GetCacheType() {
444
#ifdef __aarch64__
445
uint64_t cache_type_register;
446
// Copy the content of the cache type register to a core register.
447
__asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT(runtime/references)
448
: [ctr] "=r"(cache_type_register));
449
VIXL_ASSERT(IsUint32(cache_type_register));
450
return static_cast<uint32_t>(cache_type_register);
451
#else
452
// This will lead to a cache with 1 byte long lines, which is fine since
453
// neither EnsureIAndDCacheCoherency nor the simulator will need this
454
// information.
455
return 0;
456
#endif
457
}
458
459
460
// Query the SVE vector length. This requires CPUFeatures::kSVE.
461
int CPU::ReadSVEVectorLengthInBits() {
462
#ifdef __aarch64__
463
uint64_t vl;
464
// To support compilers that don't understand `rdvl`, encode the value
465
// directly and move it manually.
466
__asm__(
467
" .word 0x04bf5100\n" // rdvl x0, #8
468
" mov %[vl], x0\n"
469
: [vl] "=r"(vl)
470
:
471
: "x0");
472
VIXL_ASSERT(vl <= INT_MAX);
473
return static_cast<int>(vl);
474
#else
475
VIXL_UNREACHABLE();
476
return 0;
477
#endif
478
}
479
480
481
void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
482
#ifdef __aarch64__
483
// Implement the cache synchronisation for all targets where AArch64 is the
484
// host, even if we're building the simulator for an AAarch64 host. This
485
// allows for cases where the user wants to simulate code as well as run it
486
// natively.
487
488
if (length == 0) {
489
return;
490
}
491
492
// The code below assumes user space cache operations are allowed.
493
494
// Work out the line sizes for each cache, and use them to determine the
495
// start addresses.
496
uintptr_t start = reinterpret_cast<uintptr_t>(address);
497
uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
498
uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
499
uintptr_t dline = start & ~(dsize - 1);
500
uintptr_t iline = start & ~(isize - 1);
501
502
// Cache line sizes are always a power of 2.
503
VIXL_ASSERT(IsPowerOf2(dsize));
504
VIXL_ASSERT(IsPowerOf2(isize));
505
uintptr_t end = start + length;
506
507
do {
508
__asm__ __volatile__(
509
// Clean each line of the D cache containing the target data.
510
//
511
// dc : Data Cache maintenance
512
// c : Clean
513
// va : by (Virtual) Address
514
// u : to the point of Unification
515
// The point of unification for a processor is the point by which the
516
// instruction and data caches are guaranteed to see the same copy of a
517
// memory location. See ARM DDI 0406B page B2-12 for more information.
518
" dc cvau, %[dline]\n"
519
:
520
: [dline] "r"(dline)
521
// This code does not write to memory, but the "memory" dependency
522
// prevents GCC from reordering the code.
523
: "memory");
524
dline += dsize;
525
} while (dline < end);
526
527
__asm__ __volatile__(
528
// Make sure that the data cache operations (above) complete before the
529
// instruction cache operations (below).
530
//
531
// dsb : Data Synchronisation Barrier
532
// ish : Inner SHareable domain
533
//
534
// The point of unification for an Inner Shareable shareability domain is
535
// the point by which the instruction and data caches of all the
536
// processors
537
// in that Inner Shareable shareability domain are guaranteed to see the
538
// same copy of a memory location. See ARM DDI 0406B page B2-12 for more
539
// information.
540
" dsb ish\n"
541
:
542
:
543
: "memory");
544
545
do {
546
__asm__ __volatile__(
547
// Invalidate each line of the I cache containing the target data.
548
//
549
// ic : Instruction Cache maintenance
550
// i : Invalidate
551
// va : by Address
552
// u : to the point of Unification
553
" ic ivau, %[iline]\n"
554
:
555
: [iline] "r"(iline)
556
: "memory");
557
iline += isize;
558
} while (iline < end);
559
560
__asm__ __volatile__(
561
// Make sure that the instruction cache operations (above) take effect
562
// before the isb (below).
563
" dsb ish\n"
564
565
// Ensure that any instructions already in the pipeline are discarded and
566
// reloaded from the new data.
567
// isb : Instruction Synchronisation Barrier
568
" isb\n"
569
:
570
:
571
: "memory");
572
#else
573
// If the host isn't AArch64, we must be using the simulator, so this function
574
// doesn't have to do anything.
575
USE(address, length);
576
#endif
577
}
578
579
580
} // namespace aarch64
581
} // namespace vixl
582
583