Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/aarch64/memset.S
39491 views
1
/*
2
* memset - fill memory with a constant byte
3
*
4
* Copyright (c) 2012-2024, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
/* Assumptions:
9
*
10
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
11
*
12
*/
13
14
#include "asmdefs.h"
15
16
#define dstin x0
17
#define val x1
18
#define valw w1
19
#define count x2
20
#define dst x3
21
#define dstend x4
22
#define zva_val x5
23
#define off x3
24
#define dstend2 x5
25
26
ENTRY (__memset_aarch64)
27
dup v0.16B, valw
28
cmp count, 16
29
b.lo L(set_small)
30
31
add dstend, dstin, count
32
cmp count, 64
33
b.hs L(set_128)
34
35
/* Set 16..63 bytes. */
36
mov off, 16
37
and off, off, count, lsr 1
38
sub dstend2, dstend, off
39
str q0, [dstin]
40
str q0, [dstin, off]
41
str q0, [dstend2, -16]
42
str q0, [dstend, -16]
43
ret
44
45
.p2align 4
46
/* Set 0..15 bytes. */
47
L(set_small):
48
add dstend, dstin, count
49
cmp count, 4
50
b.lo 2f
51
lsr off, count, 3
52
sub dstend2, dstend, off, lsl 2
53
str s0, [dstin]
54
str s0, [dstin, off, lsl 2]
55
str s0, [dstend2, -4]
56
str s0, [dstend, -4]
57
ret
58
59
/* Set 0..3 bytes. */
60
2: cbz count, 3f
61
lsr off, count, 1
62
strb valw, [dstin]
63
strb valw, [dstin, off]
64
strb valw, [dstend, -1]
65
3: ret
66
67
.p2align 4
68
L(set_128):
69
bic dst, dstin, 15
70
cmp count, 128
71
b.hi L(set_long)
72
stp q0, q0, [dstin]
73
stp q0, q0, [dstin, 32]
74
stp q0, q0, [dstend, -64]
75
stp q0, q0, [dstend, -32]
76
ret
77
78
.p2align 4
79
L(set_long):
80
str q0, [dstin]
81
str q0, [dst, 16]
82
tst valw, 255
83
b.ne L(no_zva)
84
#ifndef SKIP_ZVA_CHECK
85
mrs zva_val, dczid_el0
86
and zva_val, zva_val, 31
87
cmp zva_val, 4 /* ZVA size is 64 bytes. */
88
b.ne L(no_zva)
89
#endif
90
stp q0, q0, [dst, 32]
91
bic dst, dstin, 63
92
sub count, dstend, dst /* Count is now 64 too large. */
93
sub count, count, 64 + 64 /* Adjust count and bias for loop. */
94
95
/* Write last bytes before ZVA loop. */
96
stp q0, q0, [dstend, -64]
97
stp q0, q0, [dstend, -32]
98
99
.p2align 4
100
L(zva64_loop):
101
add dst, dst, 64
102
dc zva, dst
103
subs count, count, 64
104
b.hi L(zva64_loop)
105
ret
106
107
.p2align 3
108
L(no_zva):
109
sub count, dstend, dst /* Count is 32 too large. */
110
sub count, count, 64 + 32 /* Adjust count and bias for loop. */
111
L(no_zva_loop):
112
stp q0, q0, [dst, 32]
113
stp q0, q0, [dst, 64]
114
add dst, dst, 64
115
subs count, count, 64
116
b.hi L(no_zva_loop)
117
stp q0, q0, [dstend, -64]
118
stp q0, q0, [dstend, -32]
119
ret
120
121
END (__memset_aarch64)
122
123