Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strlen.S
39491 views
1
/*-
2
* Written by Mateusz Guzik <[email protected]>
3
* Copyright (c) 2023 The FreeBSD Foundation
4
*
5
* Portions of this software were developed by Robert Clausecker
6
* <[email protected]> under sponsorship from the FreeBSD Foundation.
7
*
8
* Public domain.
9
*/
10
11
#include <machine/asm.h>
12
#include "amd64_archlevel.h"
13
14
/*
15
* Note: this routine was written with kernel use in mind (read: no simd),
16
* it is only present in userspace as a temporary measure until something
17
* better gets imported.
18
*/
19
20
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
21
22
ARCHFUNCS(strlen)
23
ARCHFUNC(strlen, scalar)
24
ARCHFUNC(strlen, baseline)
25
ENDARCHFUNCS(strlen)
26
27
/*
28
* strlen(string)
29
* %rdi
30
*
31
* Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
32
*
33
* 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
34
* with leaq.
35
*
36
* For a description see either:
37
* - "Hacker's Delight" by Henry S. Warren, Jr.
38
* - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
39
* by Agner Fog
40
*
41
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
42
*/
43
ARCHENTRY(strlen, scalar)
44
movabsq $0xfefefefefefefeff,%r8
45
movabsq $0x8080808080808080,%r9
46
47
movq %rdi,%r10
48
movq %rdi,%rcx
49
testb $7,%dil
50
jz 2f
51
52
/*
53
* Handle misaligned reads: align to 8 and fill
54
* the spurious bytes.
55
*/
56
andq $~7,%rdi
57
movq (%rdi),%r11
58
shlq $3,%rcx
59
movq $-1,%rdx
60
shlq %cl,%rdx
61
notq %rdx
62
orq %rdx,%r11
63
64
leaq (%r11,%r8),%rcx
65
notq %r11
66
andq %r11,%rcx
67
andq %r9,%rcx
68
jnz 3f
69
70
/*
71
* Main loop.
72
*/
73
ALIGN_TEXT
74
1:
75
leaq 8(%rdi),%rdi
76
2:
77
movq (%rdi),%r11
78
leaq (%r11,%r8),%rcx
79
notq %r11
80
andq %r11,%rcx
81
andq %r9,%rcx
82
jz 1b
83
3:
84
bsfq %rcx,%rcx
85
shrq $3,%rcx
86
leaq (%rcx,%rdi),%rax
87
subq %r10,%rax
88
ret
89
ARCHEND(strlen, scalar)
90
91
ARCHENTRY(strlen, baseline)
92
mov %rdi, %rcx
93
pxor %xmm1, %xmm1
94
and $~0xf, %rdi # align string
95
pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
96
mov %rcx, %rsi # string pointer copy for later
97
and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
98
pmovmskb %xmm1, %eax
99
add $32, %rdi # advance to next iteration
100
shr %cl, %eax # clear out matches in junk bytes
101
test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
102
jnz 2f
103
104
ALIGN_TEXT
105
1: pxor %xmm1, %xmm1
106
pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
107
pmovmskb %xmm1, %eax
108
test %eax, %eax # were any NUL bytes present?
109
jnz 3f
110
111
/* the same unrolled once more */
112
pxor %xmm1, %xmm1
113
pcmpeqb (%rdi), %xmm1
114
pmovmskb %xmm1, %eax
115
add $32, %rdi # advance to next iteration
116
test %eax, %eax
117
jz 1b
118
119
/* match found in loop body */
120
sub $16, %rdi # undo half the advancement
121
3: tzcnt %eax, %eax # find the first NUL byte
122
sub %rsi, %rdi # string length until beginning of (%rdi)
123
lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
124
ret
125
126
/* match found in head */
127
2: tzcnt %eax, %eax # compute string length
128
ret
129
ARCHEND(strlen, baseline)
130
131
.section .note.GNU-stack,"",%progbits
132
133