Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/arm/memchr.S
39556 views
1
/*
2
* memchr - scan memory for a character
3
*
4
* Copyright (c) 2010-2022, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
/*
9
Written by Dave Gilbert <[email protected]>
10
11
This __memchr_arm routine is optimised on a Cortex-A9 and should work on
12
all ARMv7 processors. It has a fast past for short sizes, and has
13
an optimised path for large data sets; the worst case is finding the
14
match early in a large data set.
15
16
*/
17
18
@ 2011-02-07 david.gilbert@linaro.org
19
@ Extracted from local git a5b438d861
20
@ 2011-07-14 david.gilbert@linaro.org
21
@ Import endianness fix from local git ea786f1b
22
@ 2011-12-07 david.gilbert@linaro.org
23
@ Removed unneeded cbz from align loop
24
25
.syntax unified
26
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M'
27
/* keep config inherited from -march= */
28
#else
29
.arch armv7-a
30
#endif
31
32
@ this lets us check a flag in a 00/ff byte easily in either endianness
33
#ifdef __ARMEB__
34
#define CHARTSTMASK(c) 1<<(31-(c*8))
35
#else
36
#define CHARTSTMASK(c) 1<<(c*8)
37
#endif
38
.thumb
39
#include "asmdefs.h"
40
41
42
@ ---------------------------------------------------------------------------
43
.thumb_func
44
.align 2
45
.p2align 4,,15
46
.global __memchr_arm
47
.type __memchr_arm,%function
48
.fnstart
49
.cfi_startproc
50
__memchr_arm:
51
@ r0 = start of memory to scan
52
@ r1 = character to look for
53
@ r2 = length
54
@ returns r0 = pointer to character or NULL if not found
55
prologue
56
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
57
58
cmp r2,#16 @ If it's short don't bother with anything clever
59
blt 20f
60
61
tst r0, #7 @ If it's already aligned skip the next bit
62
beq 10f
63
64
@ Work up to an aligned point
65
5:
66
ldrb r3, [r0],#1
67
subs r2, r2, #1
68
cmp r3, r1
69
beq 50f @ If it matches exit found
70
tst r0, #7
71
bne 5b @ If not aligned yet then do next byte
72
73
10:
74
@ At this point, we are aligned, we know we have at least 8 bytes to work with
75
push {r4,r5,r6,r7}
76
.cfi_adjust_cfa_offset 16
77
.cfi_rel_offset 4, 0
78
.cfi_rel_offset 5, 4
79
.cfi_rel_offset 6, 8
80
.cfi_rel_offset 7, 12
81
orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
82
orr r1, r1, r1, lsl #16
83
bic r4, r2, #7 @ Number of double words to work with
84
mvns r7, #0 @ all F's
85
movs r3, #0
86
87
15:
88
ldmia r0!,{r5,r6}
89
subs r4, r4, #8
90
eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
91
eor r6,r6, r1
92
uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
93
sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
94
uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
95
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
96
cbnz r6, 60f
97
bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
98
99
pop {r4,r5,r6,r7}
100
.cfi_restore 7
101
.cfi_restore 6
102
.cfi_restore 5
103
.cfi_restore 4
104
.cfi_adjust_cfa_offset -16
105
and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
106
and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
107
108
20:
109
cbz r2, 40f @ 0 length or hit the end already then not found
110
111
21: @ Post aligned section, or just a short call
112
ldrb r3,[r0],#1
113
subs r2,r2,#1
114
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
115
cbz r3, 50f
116
bne 21b @ on r2 flags
117
118
40:
119
.cfi_remember_state
120
movs r0,#0 @ not found
121
epilogue
122
123
50:
124
.cfi_restore_state
125
.cfi_remember_state
126
subs r0,r0,#1 @ found
127
epilogue
128
129
60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
130
@ r0 points to the start of the double word after the one that was tested
131
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
132
.cfi_restore_state @ Standard post-prologue state
133
.cfi_adjust_cfa_offset 16
134
.cfi_rel_offset 4, 0
135
.cfi_rel_offset 5, 4
136
.cfi_rel_offset 6, 8
137
.cfi_rel_offset 7, 12
138
cmp r5, #0
139
itte eq
140
moveq r5, r6 @ the end is in the 2nd word
141
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
142
subne r0,r0,#7 @ or 2nd byte of 1st word
143
144
@ r0 currently points to the 3rd byte of the word containing the hit
145
tst r5, # CHARTSTMASK(0) @ 1st character
146
bne 61f
147
adds r0,r0,#1
148
tst r5, # CHARTSTMASK(1) @ 2nd character
149
ittt eq
150
addeq r0,r0,#1
151
tsteq r5, # (3<<15) @ 2nd & 3rd character
152
@ If not the 3rd must be the last one
153
addeq r0,r0,#1
154
155
61:
156
pop {r4,r5,r6,r7}
157
.cfi_restore 7
158
.cfi_restore 6
159
.cfi_restore 5
160
.cfi_restore 4
161
.cfi_adjust_cfa_offset -16
162
subs r0,r0,#1
163
epilogue
164
.cfi_endproc
165
.cantunwind
166
.fnend
167
168
.size __memchr_arm, . - __memchr_arm
169
170