Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/memrchr.S
39486 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2023, 2025 Robert Clausecker <[email protected]>
5
*/
6
7
#include <machine/asm.h>
8
9
#include "amd64_archlevel.h"
10
11
#define ALIGN_TEXT .p2align 4, 0x90
12
13
ARCHFUNCS(memrchr)
14
ARCHFUNC(memrchr, scalar)
15
ARCHFUNC(memrchr, baseline)
16
ENDARCHFUNCS(memrchr)
17
18
ARCHENTRY(memrchr, scalar)
19
lea -1(%rdi, %rdx, 1), %rax # point to last char in buffer
20
sub $4, %rdx # 4 bytes left to process?
21
jb .Ltail
22
23
ALIGN_TEXT
24
0: cmp %sil, (%rax) # match at last entry?
25
je 1f
26
27
cmp %sil, -1(%rax) # match at second to last entry?
28
je 2f
29
30
cmp %sil, -2(%rax) # match at third to last entry?
31
je 3f
32
33
cmp %sil, -3(%rax) # match at fourth to last entry?
34
je 4f
35
36
sub $4, %rax
37
sub $4, %rdx
38
jae 0b
39
40
.Ltail: cmp $-3, %edx # at least one character left to process?
41
jb .Lnotfound
42
43
cmp %sil, (%rax)
44
je 1f
45
46
cmp $-2, %edx # at least two characters left to process?
47
jb .Lnotfound
48
49
cmp %sil, -1(%rax)
50
je 2f
51
52
cmp $-1, %edx # at least three characters left to process?
53
jb .Lnotfound
54
55
cmp %sil, -2(%rax)
56
je 3f
57
58
.Lnotfound:
59
xor %eax, %eax
60
ret
61
62
/* match found -- adjust rax to point to matching byte */
63
4: dec %rax
64
3: dec %rax
65
2: dec %rax
66
1: ret
67
ARCHEND(memrchr, scalar)
68
69
ARCHENTRY(memrchr, baseline)
70
test %rdx, %rdx # empty input?
71
je .Lnomatchb
72
73
74
lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer
75
lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer
76
movd %esi, %xmm2
77
and $~0x1f, %rdx # pointer to final 32 buffer bytes
78
movdqa (%rdx), %xmm0 # load last 32 bytes
79
movdqa 16(%rdx), %xmm1
80
81
punpcklbw %xmm2, %xmm2 # c -> cc
82
83
mov $-1, %r8d
84
neg %ecx
85
mov %r8d, %r9d
86
shr %cl, %r8d # mask with zeroes after the string
87
88
punpcklwd %xmm2, %xmm2 # cc -> cccc
89
90
mov %edi, %ecx
91
mov %r9d, %eax
92
shl %cl, %r9d # mask with zeroes before the string
93
94
pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc
95
96
cmp %rdx, %rdi # tail is beginning of buffer?
97
cmovae %r9d, %eax # if yes, do combined head/tail processing
98
and %r8d, %eax # mak of bytes in tail part of string
99
100
/* process tail */
101
pcmpeqb %xmm2, %xmm1
102
pcmpeqb %xmm2, %xmm0
103
pmovmskb %xmm1, %esi
104
pmovmskb %xmm0, %ecx
105
shl $16, %esi
106
or %esi, %ecx # locations of matches
107
and %ecx, %eax # any match inside buffer?
108
jnz .Lprecisematchb
109
110
cmp %rdx, %rdi # did the buffer begin here?
111
jae .Lnomatchb # if yes, we are done
112
113
/* main loop */
114
ALIGN_TEXT
115
0: movdqa -32(%rdx), %xmm0 # load previous string chunk
116
movdqa -16(%rdx), %xmm1
117
sub $32, %rdx # beginning of string reached?
118
cmp %rdx, %rdi
119
jae .Ltailb
120
121
pcmpeqb %xmm2, %xmm0
122
pcmpeqb %xmm2, %xmm1
123
por %xmm1, %xmm0 # match in either half?
124
pmovmskb %xmm0, %eax
125
test %eax, %eax
126
jz 0b
127
128
.Lmatchb:
129
pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes
130
pmovmskb %xmm1, %ecx
131
pmovmskb %xmm2, %eax
132
shl $16, %ecx
133
or %ecx, %eax # location of matches
134
135
.Lprecisematchb:
136
bsr %eax, %eax # find location of match
137
add %rdx, %rax # point to matching byte
138
ret
139
140
.Ltailb:
141
pcmpeqb %xmm2, %xmm1
142
pcmpeqb %xmm2, %xmm0
143
pmovmskb %xmm1, %ecx
144
pmovmskb %xmm0, %eax
145
shl $16, %ecx
146
or %ecx, %eax # location of matches
147
and %r9d, %eax # mask out matches before buffer
148
bsr %eax, %edi # location of match
149
lea (%rdx, %rdi, 1), %rdx # pointer to match (if any)
150
cmovnz %rdx, %rax # point to match if present,
151
ret # else null pointer
152
153
.Lnomatchb:
154
xor %eax, %eax # return null pointer
155
ret
156
ARCHEND(memrchr, baseline)
157
158
.section .note.GNU-stack, "", %progbits
159
160