Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strcmp.S
39486 views
1
/*
2
* strcmp - compare two strings
3
*
4
* Copyright (c) 2012-2022, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
9
/* Assumptions:
10
*
11
* ARMv8-a, AArch64.
12
* MTE compatible.
13
*/
14
15
#include "asmdefs.h"
16
17
#define REP8_01 0x0101010101010101
18
#define REP8_7f 0x7f7f7f7f7f7f7f7f
19
20
#define src1 x0
21
#define src2 x1
22
#define result x0
23
24
#define data1 x2
25
#define data1w w2
26
#define data2 x3
27
#define data2w w3
28
#define has_nul x4
29
#define diff x5
30
#define off1 x5
31
#define syndrome x6
32
#define tmp x6
33
#define data3 x7
34
#define zeroones x8
35
#define shift x9
36
#define off2 x10
37
38
/* On big-endian early bytes are at MSB and on little-endian LSB.
39
LS_FW means shifting towards early bytes. */
40
#ifdef __AARCH64EB__
41
# define LS_FW lsl
42
#else
43
# define LS_FW lsr
44
#endif
45
46
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
47
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
48
can be done in parallel across the entire word.
49
Since carry propagation makes 0x1 bytes before a NUL byte appear
50
NUL too in big-endian, byte-reverse the data before the NUL check. */
51
52
53
ENTRY (__strcmp_aarch64)
54
sub off2, src2, src1
55
mov zeroones, REP8_01
56
and tmp, src1, 7
57
tst off2, 7
58
b.ne L(misaligned8)
59
cbnz tmp, L(mutual_align)
60
61
.p2align 4
62
63
L(loop_aligned):
64
ldr data2, [src1, off2]
65
ldr data1, [src1], 8
66
L(start_realigned):
67
#ifdef __AARCH64EB__
68
rev tmp, data1
69
sub has_nul, tmp, zeroones
70
orr tmp, tmp, REP8_7f
71
#else
72
sub has_nul, data1, zeroones
73
orr tmp, data1, REP8_7f
74
#endif
75
bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
76
ccmp data1, data2, 0, eq
77
b.eq L(loop_aligned)
78
#ifdef __AARCH64EB__
79
rev has_nul, has_nul
80
#endif
81
eor diff, data1, data2
82
orr syndrome, diff, has_nul
83
L(end):
84
#ifndef __AARCH64EB__
85
rev syndrome, syndrome
86
rev data1, data1
87
rev data2, data2
88
#endif
89
clz shift, syndrome
90
/* The most-significant-non-zero bit of the syndrome marks either the
91
first bit that is different, or the top bit of the first zero byte.
92
Shifting left now will bring the critical information into the
93
top bits. */
94
lsl data1, data1, shift
95
lsl data2, data2, shift
96
/* But we need to zero-extend (char is unsigned) the value and then
97
perform a signed 32-bit subtraction. */
98
lsr data1, data1, 56
99
sub result, data1, data2, lsr 56
100
ret
101
102
.p2align 4
103
104
L(mutual_align):
105
/* Sources are mutually aligned, but are not currently at an
106
alignment boundary. Round down the addresses and then mask off
107
the bytes that precede the start point. */
108
bic src1, src1, 7
109
ldr data2, [src1, off2]
110
ldr data1, [src1], 8
111
neg shift, src2, lsl 3 /* Bits to alignment -64. */
112
mov tmp, -1
113
LS_FW tmp, tmp, shift
114
orr data1, data1, tmp
115
orr data2, data2, tmp
116
b L(start_realigned)
117
118
L(misaligned8):
119
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
120
checking to make sure that we don't access beyond the end of SRC2. */
121
cbz tmp, L(src1_aligned)
122
L(do_misaligned):
123
ldrb data1w, [src1], 1
124
ldrb data2w, [src2], 1
125
cmp data1w, 0
126
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
127
b.ne L(done)
128
tst src1, 7
129
b.ne L(do_misaligned)
130
131
L(src1_aligned):
132
neg shift, src2, lsl 3
133
bic src2, src2, 7
134
ldr data3, [src2], 8
135
#ifdef __AARCH64EB__
136
rev data3, data3
137
#endif
138
lsr tmp, zeroones, shift
139
orr data3, data3, tmp
140
sub has_nul, data3, zeroones
141
orr tmp, data3, REP8_7f
142
bics has_nul, has_nul, tmp
143
b.ne L(tail)
144
145
sub off1, src2, src1
146
147
.p2align 4
148
149
L(loop_unaligned):
150
ldr data3, [src1, off1]
151
ldr data2, [src1, off2]
152
#ifdef __AARCH64EB__
153
rev data3, data3
154
#endif
155
sub has_nul, data3, zeroones
156
orr tmp, data3, REP8_7f
157
ldr data1, [src1], 8
158
bics has_nul, has_nul, tmp
159
ccmp data1, data2, 0, eq
160
b.eq L(loop_unaligned)
161
162
lsl tmp, has_nul, shift
163
#ifdef __AARCH64EB__
164
rev tmp, tmp
165
#endif
166
eor diff, data1, data2
167
orr syndrome, diff, tmp
168
cbnz syndrome, L(end)
169
L(tail):
170
ldr data1, [src1]
171
neg shift, shift
172
lsr data2, data3, shift
173
lsr has_nul, has_nul, shift
174
#ifdef __AARCH64EB__
175
rev data2, data2
176
rev has_nul, has_nul
177
#endif
178
eor diff, data1, data2
179
orr syndrome, diff, has_nul
180
b L(end)
181
182
L(done):
183
sub result, data1, data2
184
ret
185
186
END (__strcmp_aarch64)
187
188
189