Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strrchr.S
39486 views
1
/*-
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
31
#include "amd64_archlevel.h"
32
33
#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled
34
35
.weak rindex
36
.set rindex, strrchr
37
38
ARCHFUNCS(strrchr)
39
ARCHFUNC(strrchr, scalar)
40
ARCHFUNC(strrchr, baseline)
41
ENDARCHFUNCS(strrchr)
42
43
ARCHENTRY(strrchr, scalar)
44
mov %edi, %ecx
45
and $~7, %rdi # align to 8 byte
46
movzbl %sil, %esi # clear stray high bits
47
movabs $0x0101010101010101, %r8
48
mov (%rdi), %rax # load first word
49
imul %r8, %rsi # replicate char 8 times
50
51
/*
52
* Unaligned input: align to 8 bytes. Then proceed the same
53
* way as with aligned input, but prevent matches before the
54
* beginning of the string. This is achieved by oring 0x01
55
* into each byte of the buffer before the string
56
*/
57
shl $3, %ecx
58
mov %r8, %r10
59
shl %cl, %r10 # 0x01 where the string is
60
xor %r8, %r10 # 0x01 where it is not
61
neg %r8 # negate 01..01 so we can use lea
62
movabs $0x8080808080808080, %r9
63
64
mov %rsi, %rcx
65
xor %rax, %rcx # str ^ c
66
or %r10, %rax # ensure str != 0 before string
67
or %r10, %rcx # ensure str^c != 0 before string
68
bswap %rcx # in reverse order, to find last match
69
mov %rdi, %r10 # location of initial mismatch (if any)
70
xor %r11, %r11 # initial mismatch (none)
71
add $8, %rdi # advance to next iteration
72
lea (%rax, %r8, 1), %rdx # str - 0x01..01
73
not %rax # ~str
74
and %rdx, %rax # (str - 0x01..01) & ~str
75
and %r9, %rax # not including junk bits
76
jnz 1f # end of string?
77
78
lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
79
not %rcx # ~(str ^ c)
80
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
81
and %r9, %rcx # not including junk bits
82
mov %rcx, %r11 # remember mismatch in head
83
jmp 0f
84
85
/* main loop unrolled twice */
86
ALIGN_TEXT
87
3: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
88
not %rcx # ~(str ^ c)
89
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
90
and %r9, %rcx # not including junk bits
91
lea -8(%rdi), %rdx
92
cmovnz %rdx, %r10 # remember location of current mismatch
93
cmovnz %rcx, %r11
94
95
0: mov (%rdi), %rax # str
96
mov %rsi, %rcx
97
xor %rax, %rcx # str ^ c
98
bswap %rcx # in reverse order, to find last match
99
lea (%rax, %r8, 1), %rdx # str - 0x01..01
100
not %rax # ~str
101
and %rdx, %rax # (str - 0x01..01) & ~str
102
and %r9, %rax # not including junk bits
103
jnz 2f # end of string?
104
105
lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
106
not %rcx # ~(str ^ c)
107
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
108
and %r9, %rcx # not including junk bits
109
cmovnz %rdi, %r10 # remember location of current mismatch
110
cmovnz %rcx, %r11
111
112
mov 8(%rdi), %rax # str
113
add $16, %rdi
114
mov %rsi, %rcx
115
xor %rax, %rcx # str ^ c
116
bswap %rcx
117
lea (%rax, %r8, 1), %rdx # str - 0x01..01
118
not %rax # ~str
119
and %rdx, %rax # (str - 0x01..01) & ~str
120
and %r9, %rax # not including junk bits
121
jz 3b # end of string?
122
123
/* NUL found */
124
1: sub $8, %rdi # undo advance past buffer
125
2: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
126
not %rcx # ~(str ^ c)
127
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
128
and %r9, %rcx # not including junk bits
129
lea -1(%rax), %rdx
130
xor %rdx, %rax # mask of bytes in the string
131
bswap %rdx # in reverse order
132
and %rdx, %rcx # c found in the tail?
133
cmovnz %rdi, %r10
134
cmovnz %rcx, %r11
135
bswap %r11 # unreverse byte order
136
bsr %r11, %rcx # last location of c in (R10)
137
shr $3, %rcx # as byte offset
138
lea (%r10, %rcx, 1), %rax # pointer to match
139
test %r11, %r11 # was there actually a match?
140
cmovz %r11, %rax # if not, return null pointer
141
ret
142
ARCHEND(strrchr, scalar)
143
144
ARCHENTRY(strrchr, baseline)
145
mov %edi, %ecx
146
and $~0xf, %rdi # align to 16 bytes
147
movdqa (%rdi), %xmm1
148
movd %esi, %xmm0
149
and $0xf, %ecx # offset from alignment
150
pxor %xmm2, %xmm2
151
mov $-1, %edx
152
punpcklbw %xmm0, %xmm0 # c -> cc
153
shl %cl, %edx # bits corresponding to bytes in the string
154
punpcklwd %xmm0, %xmm0 # cc -> cccc
155
xor %r8, %r8 # address of latest match
156
mov $1, %esi # bit mask of latest match
157
mov %rdi, %r9 # candidate location for next match
158
add $16, %rdi # advance to next chunk
159
160
/* check for match in head */
161
pcmpeqb %xmm1, %xmm2 # NUL byte present?
162
pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc
163
pcmpeqb %xmm0, %xmm1 # c present?
164
pmovmskb %xmm2, %eax
165
pmovmskb %xmm1, %ecx
166
and %edx, %ecx # c present in the string?
167
and %edx, %eax # NUL present in the string?
168
jnz .Lend2
169
170
/* main loop unrolled twice */
171
ALIGN_TEXT
172
0: movdqa (%rdi), %xmm1
173
test %ecx, %ecx # was there a match in the last iter.?
174
cmovnz %r9, %r8 # remember match if any
175
cmovnz %ecx, %esi
176
pxor %xmm2, %xmm2
177
pcmpeqb %xmm1, %xmm2 # NUL byte present?
178
pcmpeqb %xmm0, %xmm1 # c present?
179
pmovmskb %xmm2, %eax
180
pmovmskb %xmm1, %ecx
181
test %eax, %eax # end of string in first half?
182
jnz .Lend
183
184
movdqa 16(%rdi), %xmm1
185
test %ecx, %ecx # was there a match in the last iter.?
186
cmovnz %rdi, %r8 # remember match if any
187
cmovnz %ecx, %esi
188
pxor %xmm2, %xmm2
189
pcmpeqb %xmm1, %xmm2 # NUL byte present?
190
pcmpeqb %xmm0, %xmm1 # c present?
191
pmovmskb %xmm2, %eax
192
pmovmskb %xmm1, %ecx
193
lea 16(%rdi), %r9
194
add $32, %rdi
195
test %eax, %eax # end of string in second half?
196
jz 0b
197
198
ALIGN_TEXT
199
.Lend2: sub $16, %rdi
200
.Lend: lea -1(%rax), %edx
201
xor %edx, %eax # mask of bytes in the string
202
and %eax, %ecx # c found in the tail?
203
cmovnz %rdi, %r8
204
cmovnz %ecx, %esi
205
bsr %esi, %esi # last location of c in (R8)
206
lea (%r8, %rsi, 1), %rax # pointer to match
207
ret
208
ARCHEND(strrchr, baseline)
209
.section .note.GNU-stack,"",%progbits
210
211