Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/arm64/fp/za-test.S
26295 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
// Copyright (C) 2021 ARM Limited.
3
// Original author: Mark Brown <broonie@kernel.org>
4
//
5
// Scalable Matrix Extension ZA context switch test
6
// Repeatedly writes unique test patterns into each ZA tile
7
// and reads them back to verify integrity.
8
//
9
// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10
// (leave it running for as long as you want...)
11
// kill $pids
12
13
#include <asm/unistd.h>
14
#include "assembler.h"
15
#include "asm-offsets.h"
16
#include "sme-inst.h"
17
18
.arch_extension sve
19
20
#define MAXVL 2048
21
#define MAXVL_B (MAXVL / 8)
22
23
// Declare some storage space to shadow ZA register contents and a
24
// scratch buffer for a vector.
25
.pushsection .text
26
.data
27
.align 4
28
zaref:
29
.space MAXVL_B * MAXVL_B
30
scratch:
31
.space MAXVL_B
32
.popsection
33
34
// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
35
// Clobbers x0-x3
36
function memcpy
37
cmp x2, #0
38
b.eq 1f
39
0: ldrb w3, [x1], #1
40
strb w3, [x0], #1
41
subs x2, x2, #1
42
b.ne 0b
43
1: ret
44
endfunction
45
46
// Generate a test pattern for storage in ZA
47
// x0: pid
48
// x1: row in ZA
49
// x2: generation
50
51
// These values are used to constuct a 32-bit pattern that is repeated in the
52
// scratch buffer as many times as will fit:
53
// bits 31:28 generation number (increments once per test_loop)
54
// bits 27:16 pid
55
// bits 15: 8 row number
56
// bits 7: 0 32-bit lane index
57
58
function pattern
59
mov w3, wzr
60
bfi w3, w0, #16, #12 // PID
61
bfi w3, w1, #8, #8 // Row
62
bfi w3, w2, #28, #4 // Generation
63
64
ldr x0, =scratch
65
mov w1, #MAXVL_B / 4
66
67
0: str w3, [x0], #4
68
add w3, w3, #1 // Lane
69
subs w1, w1, #1
70
b.ne 0b
71
72
ret
73
endfunction
74
75
// Get the address of shadow data for ZA horizontal vector xn
76
.macro _adrza xd, xn, nrtmp
77
ldr \xd, =zaref
78
rdsvl \nrtmp, 1
79
madd \xd, x\nrtmp, \xn, \xd
80
.endm
81
82
// Set up test pattern in a ZA horizontal vector
83
// x0: pid
84
// x1: row number
85
// x2: generation
86
function setup_za
87
mov x4, x30
88
mov x12, x1 // Use x12 for vector select
89
90
bl pattern // Get pattern in scratch buffer
91
_adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
92
mov x5, x0
93
ldr x1, =scratch
94
bl memcpy // length set up in x2 by _adrza
95
96
_ldr_za 12, 5 // load vector w12 from pointer x5
97
98
ret x4
99
endfunction
100
101
// Trivial memory compare: compare x2 bytes starting at address x0 with
102
// bytes starting at address x1.
103
// Returns only if all bytes match; otherwise, the program is aborted.
104
// Clobbers x0-x5.
105
function memcmp
106
cbz x2, 2f
107
108
stp x0, x1, [sp, #-0x20]!
109
str x2, [sp, #0x10]
110
111
mov x5, #0
112
0: ldrb w3, [x0, x5]
113
ldrb w4, [x1, x5]
114
add x5, x5, #1
115
cmp w3, w4
116
b.ne 1f
117
subs x2, x2, #1
118
b.ne 0b
119
120
1: ldr x2, [sp, #0x10]
121
ldp x0, x1, [sp], #0x20
122
b.ne barf
123
124
2: ret
125
endfunction
126
127
// Verify that a ZA vector matches its shadow in memory, else abort
128
// x0: row number
129
// Clobbers x0-x7 and x12.
130
function check_za
131
mov x3, x30
132
133
mov x12, x0
134
_adrza x5, x0, 6 // pointer to expected value in x5
135
mov x4, x0
136
ldr x7, =scratch // x7 is scratch
137
138
mov x0, x7 // Poison scratch
139
mov x1, x6
140
bl memfill_ae
141
142
_str_za 12, 7 // save vector w12 to pointer x7
143
144
mov x0, x5
145
mov x1, x7
146
mov x2, x6
147
mov x30, x3
148
b memcmp
149
endfunction
150
151
// Modify the live SME register state, signal return will undo our changes
152
function irritator_handler
153
// Increment the irritation signal count (x23):
154
ldr x0, [x2, #ucontext_regs + 8 * 23]
155
add x0, x0, #1
156
str x0, [x2, #ucontext_regs + 8 * 23]
157
158
// This will reset ZA to all bits 0
159
smstop
160
smstart_za
161
162
ret
163
endfunction
164
165
function tickle_handler
166
// Increment the signal count (x23):
167
ldr x0, [x2, #ucontext_regs + 8 * 23]
168
add x0, x0, #1
169
str x0, [x2, #ucontext_regs + 8 * 23]
170
171
ret
172
endfunction
173
174
function terminate_handler
175
mov w21, w0
176
mov x20, x2
177
178
puts "Terminated by signal "
179
mov w0, w21
180
bl putdec
181
puts ", no error, iterations="
182
ldr x0, [x20, #ucontext_regs + 8 * 22]
183
bl putdec
184
puts ", signals="
185
ldr x0, [x20, #ucontext_regs + 8 * 23]
186
bl putdecn
187
188
mov x0, #0
189
mov x8, #__NR_exit
190
svc #0
191
endfunction
192
193
// w0: signal number
194
// x1: sa_action
195
// w2: sa_flags
196
// Clobbers x0-x6,x8
197
function setsignal
198
str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
199
200
mov w4, w0
201
mov x5, x1
202
mov w6, w2
203
204
add x0, sp, #16
205
mov x1, #sa_sz
206
bl memclr
207
208
mov w0, w4
209
add x1, sp, #16
210
str w6, [x1, #sa_flags]
211
str x5, [x1, #sa_handler]
212
mov x2, #0
213
mov x3, #sa_mask_sz
214
mov x8, #__NR_rt_sigaction
215
svc #0
216
217
cbz w0, 1f
218
219
puts "sigaction failure\n"
220
b .Labort
221
222
1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
223
ret
224
endfunction
225
226
// Main program entry point
227
.globl _start
228
function _start
229
enable_gcs
230
231
mov x23, #0 // signal count
232
233
mov w0, #SIGINT
234
adr x1, terminate_handler
235
mov w2, #SA_SIGINFO
236
bl setsignal
237
238
mov w0, #SIGTERM
239
adr x1, terminate_handler
240
mov w2, #SA_SIGINFO
241
bl setsignal
242
243
mov w0, #SIGUSR1
244
adr x1, irritator_handler
245
mov w2, #SA_SIGINFO
246
orr w2, w2, #SA_NODEFER
247
bl setsignal
248
249
mov w0, #SIGUSR2
250
adr x1, tickle_handler
251
mov w2, #SA_SIGINFO
252
orr w2, w2, #SA_NODEFER
253
bl setsignal
254
255
puts "Streaming mode "
256
smstart_za
257
258
// Sanity-check and report the vector length
259
260
rdsvl 19, 8
261
cmp x19, #128
262
b.lo 1f
263
cmp x19, #2048
264
b.hi 1f
265
tst x19, #(8 - 1)
266
b.eq 2f
267
268
1: puts "bad vector length: "
269
mov x0, x19
270
bl putdecn
271
b .Labort
272
273
2: puts "vector length:\t"
274
mov x0, x19
275
bl putdec
276
puts " bits\n"
277
278
// Obtain our PID, to ensure test pattern uniqueness between processes
279
mov x8, #__NR_getpid
280
svc #0
281
mov x20, x0
282
283
puts "PID:\t"
284
mov x0, x20
285
bl putdecn
286
287
mov x22, #0 // generation number, increments per iteration
288
.Ltest_loop:
289
rdsvl 0, 8
290
cmp x0, x19
291
b.ne vl_barf
292
293
rdsvl 21, 1 // Set up ZA & shadow with test pattern
294
0: mov x0, x20
295
sub x1, x21, #1
296
mov x2, x22
297
bl setup_za
298
subs x21, x21, #1
299
b.ne 0b
300
301
mov x8, #__NR_sched_yield // encourage preemption
302
1:
303
svc #0
304
305
mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
306
and x1, x0, #3
307
cmp x1, #2
308
b.ne svcr_barf
309
310
rdsvl 21, 1 // Verify that the data made it through
311
rdsvl 24, 1 // Verify that the data made it through
312
0: sub x0, x24, x21
313
bl check_za
314
subs x21, x21, #1
315
bne 0b
316
317
add x22, x22, #1 // Everything still working
318
b .Ltest_loop
319
320
.Labort:
321
mov x0, #0
322
mov x1, #SIGABRT
323
mov x8, #__NR_kill
324
svc #0
325
endfunction
326
327
function barf
328
// fpsimd.c acitivty log dump hack
329
// ldr w0, =0xdeadc0de
330
// mov w8, #__NR_exit
331
// svc #0
332
// end hack
333
334
mrs x13, S3_3_C4_C2_2
335
336
smstop
337
mov x10, x0 // expected data
338
mov x11, x1 // actual data
339
mov x12, x2 // data size
340
341
puts "Mismatch: PID="
342
mov x0, x20
343
bl putdec
344
puts ", iteration="
345
mov x0, x22
346
bl putdec
347
puts ", row="
348
mov x0, x21
349
bl putdecn
350
puts "\tExpected ["
351
mov x0, x10
352
mov x1, x12
353
bl dumphex
354
puts "]\n\tGot ["
355
mov x0, x11
356
mov x1, x12
357
bl dumphex
358
puts "]\n"
359
puts "\tSVCR: "
360
mov x0, x13
361
bl putdecn
362
363
mov x8, #__NR_getpid
364
svc #0
365
// fpsimd.c acitivty log dump hack
366
// ldr w0, =0xdeadc0de
367
// mov w8, #__NR_exit
368
// svc #0
369
// ^ end of hack
370
mov x1, #SIGABRT
371
mov x8, #__NR_kill
372
svc #0
373
// mov x8, #__NR_exit
374
// mov x1, #1
375
// svc #0
376
endfunction
377
378
function vl_barf
379
mov x10, x0
380
381
puts "Bad active VL: "
382
mov x0, x10
383
bl putdecn
384
385
mov x8, #__NR_exit
386
mov x1, #1
387
svc #0
388
endfunction
389
390
function svcr_barf
391
mov x10, x0
392
393
puts "Bad SVCR: "
394
mov x0, x10
395
bl putdecn
396
397
mov x8, #__NR_exit
398
mov x1, #1
399
svc #0
400
endfunction
401
402