Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/lib/strcmp.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (c) 2012-2022, Arm Limited.
4
*
5
* Adapted from the original at:
6
* https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
7
*/
8
9
#include <linux/linkage.h>
10
#include <asm/assembler.h>
11
12
/* Assumptions:
13
*
14
* ARMv8-a, AArch64.
15
* MTE compatible.
16
*/
17
18
#define L(label) .L ## label
19
20
#define REP8_01 0x0101010101010101
21
#define REP8_7f 0x7f7f7f7f7f7f7f7f
22
23
#define src1 x0
24
#define src2 x1
25
#define result x0
26
27
#define data1 x2
28
#define data1w w2
29
#define data2 x3
30
#define data2w w3
31
#define has_nul x4
32
#define diff x5
33
#define off1 x5
34
#define syndrome x6
35
#define tmp x6
36
#define data3 x7
37
#define zeroones x8
38
#define shift x9
39
#define off2 x10
40
41
/* On big-endian early bytes are at MSB and on little-endian LSB.
42
LS_FW means shifting towards early bytes. */
43
#ifdef __AARCH64EB__
44
# define LS_FW lsl
45
#else
46
# define LS_FW lsr
47
#endif
48
49
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
50
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
51
can be done in parallel across the entire word.
52
Since carry propagation makes 0x1 bytes before a NUL byte appear
53
NUL too in big-endian, byte-reverse the data before the NUL check. */
54
55
56
SYM_FUNC_START(__pi_strcmp)
57
sub off2, src2, src1
58
mov zeroones, REP8_01
59
and tmp, src1, 7
60
tst off2, 7
61
b.ne L(misaligned8)
62
cbnz tmp, L(mutual_align)
63
64
.p2align 4
65
66
L(loop_aligned):
67
ldr data2, [src1, off2]
68
ldr data1, [src1], 8
69
L(start_realigned):
70
#ifdef __AARCH64EB__
71
rev tmp, data1
72
sub has_nul, tmp, zeroones
73
orr tmp, tmp, REP8_7f
74
#else
75
sub has_nul, data1, zeroones
76
orr tmp, data1, REP8_7f
77
#endif
78
bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
79
ccmp data1, data2, 0, eq
80
b.eq L(loop_aligned)
81
#ifdef __AARCH64EB__
82
rev has_nul, has_nul
83
#endif
84
eor diff, data1, data2
85
orr syndrome, diff, has_nul
86
L(end):
87
#ifndef __AARCH64EB__
88
rev syndrome, syndrome
89
rev data1, data1
90
rev data2, data2
91
#endif
92
clz shift, syndrome
93
/* The most-significant-non-zero bit of the syndrome marks either the
94
first bit that is different, or the top bit of the first zero byte.
95
Shifting left now will bring the critical information into the
96
top bits. */
97
lsl data1, data1, shift
98
lsl data2, data2, shift
99
/* But we need to zero-extend (char is unsigned) the value and then
100
perform a signed 32-bit subtraction. */
101
lsr data1, data1, 56
102
sub result, data1, data2, lsr 56
103
ret
104
105
.p2align 4
106
107
L(mutual_align):
108
/* Sources are mutually aligned, but are not currently at an
109
alignment boundary. Round down the addresses and then mask off
110
the bytes that precede the start point. */
111
bic src1, src1, 7
112
ldr data2, [src1, off2]
113
ldr data1, [src1], 8
114
neg shift, src2, lsl 3 /* Bits to alignment -64. */
115
mov tmp, -1
116
LS_FW tmp, tmp, shift
117
orr data1, data1, tmp
118
orr data2, data2, tmp
119
b L(start_realigned)
120
121
L(misaligned8):
122
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
123
checking to make sure that we don't access beyond the end of SRC2. */
124
cbz tmp, L(src1_aligned)
125
L(do_misaligned):
126
ldrb data1w, [src1], 1
127
ldrb data2w, [src2], 1
128
cmp data1w, 0
129
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
130
b.ne L(done)
131
tst src1, 7
132
b.ne L(do_misaligned)
133
134
L(src1_aligned):
135
neg shift, src2, lsl 3
136
bic src2, src2, 7
137
ldr data3, [src2], 8
138
#ifdef __AARCH64EB__
139
rev data3, data3
140
#endif
141
lsr tmp, zeroones, shift
142
orr data3, data3, tmp
143
sub has_nul, data3, zeroones
144
orr tmp, data3, REP8_7f
145
bics has_nul, has_nul, tmp
146
b.ne L(tail)
147
148
sub off1, src2, src1
149
150
.p2align 4
151
152
L(loop_unaligned):
153
ldr data3, [src1, off1]
154
ldr data2, [src1, off2]
155
#ifdef __AARCH64EB__
156
rev data3, data3
157
#endif
158
sub has_nul, data3, zeroones
159
orr tmp, data3, REP8_7f
160
ldr data1, [src1], 8
161
bics has_nul, has_nul, tmp
162
ccmp data1, data2, 0, eq
163
b.eq L(loop_unaligned)
164
165
lsl tmp, has_nul, shift
166
#ifdef __AARCH64EB__
167
rev tmp, tmp
168
#endif
169
eor diff, data1, data2
170
orr syndrome, diff, tmp
171
cbnz syndrome, L(end)
172
L(tail):
173
ldr data1, [src1]
174
neg shift, shift
175
lsr data2, data3, shift
176
lsr has_nul, has_nul, shift
177
#ifdef __AARCH64EB__
178
rev data2, data2
179
rev has_nul, has_nul
180
#endif
181
eor diff, data1, data2
182
orr syndrome, diff, has_nul
183
b L(end)
184
185
L(done):
186
sub result, data1, data2
187
ret
188
SYM_FUNC_END(__pi_strcmp)
189
SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
190
EXPORT_SYMBOL_NOKASAN(strcmp)
191
192