Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strchrnul.S
39492 views
1
/*-
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
31
#include "amd64_archlevel.h"
32
33
#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled
34
35
.weak strchrnul
36
.set strchrnul, __strchrnul
37
38
ARCHFUNCS(__strchrnul)
39
ARCHFUNC(__strchrnul, scalar)
40
ARCHFUNC(__strchrnul, baseline)
41
ENDARCHFUNCS(__strchrnul)
42
43
/*
44
* strchrnul(str, c)
45
* This is implemented like strlen(str), but we check for the
46
* presence of both NUL and c in each iteration.
47
*/
48
ARCHENTRY(__strchrnul, scalar)
49
mov %edi, %ecx
50
and $~7, %rdi # align to 8 byte
51
movzbl %sil, %esi # clear stray high bits
52
movabs $0x0101010101010101, %r8
53
mov (%rdi), %rax # load first word
54
imul %r8, %rsi # replicate char 8 times
55
56
/*
57
* Unaligned input: align to 8 bytes. Then proceed the same
58
* way as with aligned input, but prevent matches before the
59
* beginning of the string. This is achieved by oring 0x01
60
* into each byte of the buffer before the string
61
*/
62
shl $3, %ecx
63
mov %r8, %r10
64
add $8, %rdi
65
shl %cl, %r10 # 0x01 where the string is
66
xor %r8, %r10 # 0x01 where it is not
67
neg %r8 # negate 01..01 so we can use lea
68
movabs $0x8080808080808080, %r9
69
70
mov %rsi, %rcx
71
xor %rax, %rcx # str ^ c
72
or %r10, %rax # str without NUL bytes before it
73
or %r10, %rcx # (str ^ c) without matches before it
74
lea (%rax, %r8, 1), %rdx # str - 0x01..01
75
lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
76
not %rax # ~str
77
not %rcx # ~(str ^ c)
78
and %rdx, %rax # (str - 0x01..01) & ~str
79
and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
80
or %rcx, %rax # matches for both
81
and %r9, %rax # not including junk bytes
82
jnz 1f
83
84
/* main loop unrolled twice */
85
ALIGN_TEXT
86
0: mov (%rdi), %rax # str
87
mov %rsi, %rcx
88
xor %rax, %rcx # str ^ c
89
lea (%rax, %r8, 1), %rdx # str - 0x01..01
90
lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
91
not %rax # ~str
92
not %rcx # ~(str ^ c)
93
and %rdx, %rax # (str - 0x01..01) & ~str
94
and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
95
or %rcx, %rax # matches for both
96
and %r9, %rax # not including junk bits
97
jnz 2f
98
99
mov 8(%rdi), %rax # str
100
add $16, %rdi
101
mov %rsi, %rcx
102
xor %rax, %rcx # str ^ c
103
lea (%rax, %r8, 1), %rdx # str - 0x01..01
104
lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
105
not %rax # ~str
106
not %rcx # ~(str ^ c)
107
and %rdx, %rax # (str - 0x01..01) & ~str
108
and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
109
or %rcx, %rax # matches for both
110
and %r9, %rax # not including junk bits
111
jz 0b
112
113
/* NUL or c found */
114
1: sub $8, %rdi # undo advance past buffer
115
2: tzcnt %rax, %rax # first NUL or c byte match
116
shr $3, %eax # scale from bit to byte index
117
add %rdi, %rax # pointer to found c or NUL
118
ret
119
ARCHEND(__strchrnul, scalar)
120
121
ARCHENTRY(__strchrnul, baseline)
122
mov %edi, %ecx
123
and $~0xf, %rdi # align to 16 byte
124
movdqa (%rdi), %xmm1
125
movd %esi, %xmm0
126
and $0xf, %ecx # distance from (%rdi) to start of string
127
pxor %xmm2, %xmm2
128
mov $-1, %edx
129
punpcklbw %xmm0, %xmm0 # c -> cc
130
shl %cl, %edx # bits corresponding to bytes in the string
131
punpcklwd %xmm0, %xmm0 # cc -> cccc
132
add $16, %rdi
133
134
/* check for match in head */
135
pcmpeqb %xmm1, %xmm2 # NUL bytes present?
136
pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc
137
pcmpeqb %xmm0, %xmm1 # c present?
138
por %xmm2, %xmm1 # either present?
139
pmovmskb %xmm1, %eax
140
and %edx, %eax # match in the string?
141
jnz 1f
142
143
/* main loop unrolled twice */
144
ALIGN_TEXT
145
0: movdqa (%rdi), %xmm1
146
pxor %xmm2, %xmm2
147
pcmpeqb %xmm1, %xmm2 # NUL bytes present?
148
pcmpeqb %xmm0, %xmm1 # c present?
149
por %xmm2, %xmm1 # either present?
150
pmovmskb %xmm1, %eax
151
test %eax, %eax # match in the string?
152
jnz 2f
153
154
movdqa 16(%rdi), %xmm1
155
add $32, %rdi
156
pxor %xmm2, %xmm2
157
pcmpeqb %xmm1, %xmm2 # NUL bytes present?
158
pcmpeqb %xmm0, %xmm1 # c present?
159
por %xmm2, %xmm1 # either present?
160
pmovmskb %xmm1, %eax
161
test %eax, %eax # match in the string?
162
jz 0b
163
164
1: sub $16, %rdi # undo advance past buffer
165
2: tzcnt %eax, %eax # where is the match?
166
add %rdi, %rax # pointer to found c or NUL
167
ret
168
ARCHEND(__strchrnul, baseline)
169
170
.section .note.GNU-stack,"",%progbits
171
172