Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/lib/memcmp.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (c) 2013-2021, Arm Limited.
4
*
5
* Adapted from the original at:
6
* https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
7
*/
8
9
#include <linux/linkage.h>
10
#include <asm/assembler.h>
11
12
/* Assumptions:
13
*
14
* ARMv8-a, AArch64, unaligned accesses.
15
*/
16
17
#define L(label) .L ## label
18
19
/* Parameters and result. */
20
#define src1 x0
21
#define src2 x1
22
#define limit x2
23
#define result w0
24
25
/* Internal variables. */
26
#define data1 x3
27
#define data1w w3
28
#define data1h x4
29
#define data2 x5
30
#define data2w w5
31
#define data2h x6
32
#define tmp1 x7
33
#define tmp2 x8
34
35
SYM_FUNC_START(__pi_memcmp)
36
subs limit, limit, 8
37
b.lo L(less8)
38
39
ldr data1, [src1], 8
40
ldr data2, [src2], 8
41
cmp data1, data2
42
b.ne L(return)
43
44
subs limit, limit, 8
45
b.gt L(more16)
46
47
ldr data1, [src1, limit]
48
ldr data2, [src2, limit]
49
b L(return)
50
51
L(more16):
52
ldr data1, [src1], 8
53
ldr data2, [src2], 8
54
cmp data1, data2
55
bne L(return)
56
57
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
58
strings. */
59
subs limit, limit, 16
60
b.ls L(last_bytes)
61
62
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
63
try to align, so limit it only to strings larger than 128 bytes. */
64
cmp limit, 96
65
b.ls L(loop16)
66
67
/* Align src1 and adjust src2 with bytes not yet done. */
68
and tmp1, src1, 15
69
add limit, limit, tmp1
70
sub src1, src1, tmp1
71
sub src2, src2, tmp1
72
73
/* Loop performing 16 bytes per iteration using aligned src1.
74
Limit is pre-decremented by 16 and must be larger than zero.
75
Exit if <= 16 bytes left to do or if the data is not equal. */
76
.p2align 4
77
L(loop16):
78
ldp data1, data1h, [src1], 16
79
ldp data2, data2h, [src2], 16
80
subs limit, limit, 16
81
ccmp data1, data2, 0, hi
82
ccmp data1h, data2h, 0, eq
83
b.eq L(loop16)
84
85
cmp data1, data2
86
bne L(return)
87
mov data1, data1h
88
mov data2, data2h
89
cmp data1, data2
90
bne L(return)
91
92
/* Compare last 1-16 bytes using unaligned access. */
93
L(last_bytes):
94
add src1, src1, limit
95
add src2, src2, limit
96
ldp data1, data1h, [src1]
97
ldp data2, data2h, [src2]
98
cmp data1, data2
99
bne L(return)
100
mov data1, data1h
101
mov data2, data2h
102
cmp data1, data2
103
104
/* Compare data bytes and set return value to 0, -1 or 1. */
105
L(return):
106
#ifndef __AARCH64EB__
107
rev data1, data1
108
rev data2, data2
109
#endif
110
cmp data1, data2
111
L(ret_eq):
112
cset result, ne
113
cneg result, result, lo
114
ret
115
116
.p2align 4
117
/* Compare up to 8 bytes. Limit is [-8..-1]. */
118
L(less8):
119
adds limit, limit, 4
120
b.lo L(less4)
121
ldr data1w, [src1], 4
122
ldr data2w, [src2], 4
123
cmp data1w, data2w
124
b.ne L(return)
125
sub limit, limit, 4
126
L(less4):
127
adds limit, limit, 4
128
beq L(ret_eq)
129
L(byte_loop):
130
ldrb data1w, [src1], 1
131
ldrb data2w, [src2], 1
132
subs limit, limit, 1
133
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
134
b.eq L(byte_loop)
135
sub result, data1w, data2w
136
ret
137
SYM_FUNC_END(__pi_memcmp)
138
SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
139
EXPORT_SYMBOL_NOKASAN(memcmp)
140
141