Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arc/lib/strcmp.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4
*/
5
6
/* This is optimized primarily for the ARC700.
7
It would be possible to speed up the loops by one cycle / word
8
respective one cycle / byte by forcing double source 1 alignment, unrolling
9
by a factor of two, and speculatively loading the second word / byte of
10
source 1; however, that would increase the overhead for loop setup / finish,
11
and strcmp might often terminate early. */
12
13
#include <linux/linkage.h>
14
15
ENTRY_CFI(strcmp)
16
or r2,r0,r1
17
bmsk_s r2,r2,1
18
brne r2,0,.Lcharloop
19
mov_s r12,0x01010101
20
ror r5,r12
21
.Lwordloop:
22
ld.ab r2,[r0,4]
23
ld.ab r3,[r1,4]
24
nop_s
25
sub r4,r2,r12
26
bic r4,r4,r2
27
and r4,r4,r5
28
brne r4,0,.Lfound0
29
breq r2,r3,.Lwordloop
30
#ifdef __LITTLE_ENDIAN__
31
xor r0,r2,r3 ; mask for difference
32
sub_s r1,r0,1
33
bic_s r0,r0,r1 ; mask for least significant difference bit
34
sub r1,r5,r0
35
xor r0,r5,r1 ; mask for least significant difference byte
36
and_s r2,r2,r0
37
and_s r3,r3,r0
38
#endif /* LITTLE ENDIAN */
39
cmp_s r2,r3
40
mov_s r0,1
41
j_s.d [blink]
42
bset.lo r0,r0,31
43
44
.balign 4
45
#ifdef __LITTLE_ENDIAN__
46
.Lfound0:
47
xor r0,r2,r3 ; mask for difference
48
or r0,r0,r4 ; or in zero indicator
49
sub_s r1,r0,1
50
bic_s r0,r0,r1 ; mask for least significant difference bit
51
sub r1,r5,r0
52
xor r0,r5,r1 ; mask for least significant difference byte
53
and_s r2,r2,r0
54
and_s r3,r3,r0
55
sub.f r0,r2,r3
56
mov.hi r0,1
57
j_s.d [blink]
58
bset.lo r0,r0,31
59
#else /* BIG ENDIAN */
60
/* The zero-detection above can mis-detect 0x01 bytes as zeroes
61
because of carry-propagateion from a lower significant zero byte.
62
We can compensate for this by checking that bit0 is zero.
63
This compensation is not necessary in the step where we
64
get a low estimate for r2, because in any affected bytes
65
we already have 0x00 or 0x01, which will remain unchanged
66
when bit 7 is cleared. */
67
.balign 4
68
.Lfound0:
69
lsr r0,r4,8
70
lsr_s r1,r2
71
bic_s r2,r2,r0 ; get low estimate for r2 and get ...
72
bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
73
or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
74
cmp_s r3,r2 ; ... be independent of trailing garbage
75
or_s r2,r2,r0 ; likewise for r3 > r2
76
bic_s r3,r3,r0
77
rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
78
cmp_s r2,r3
79
j_s.d [blink]
80
bset.lo r0,r0,31
81
#endif /* ENDIAN */
82
83
.balign 4
84
.Lcharloop:
85
ldb.ab r2,[r0,1]
86
ldb.ab r3,[r1,1]
87
nop_s
88
breq r2,0,.Lcmpend
89
breq r2,r3,.Lcharloop
90
.Lcmpend:
91
j_s.d [blink]
92
sub r0,r2,r3
93
END_CFI(strcmp)
94
95