Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arc/lib/memset-archs.S
26442 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4
*/
5
6
#include <linux/linkage.h>
7
#include <asm/cache.h>
8
9
/*
10
* The memset implementation below is optimized to use prefetchw and prealloc
11
* instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12
* If you want to implement optimized memset for other possible L1 data cache
13
* line lengths (32B and 128B) you should rewrite code carefully checking
14
* we don't call any prefetchw/prealloc instruction for L1 cache lines which
15
* don't belongs to memset area.
16
*/
17
18
#if L1_CACHE_SHIFT == 6
19
20
.macro PREALLOC_INSTR reg, off
21
prealloc [\reg, \off]
22
.endm
23
24
.macro PREFETCHW_INSTR reg, off
25
prefetchw [\reg, \off]
26
.endm
27
28
#else
29
30
.macro PREALLOC_INSTR reg, off
31
.endm
32
33
.macro PREFETCHW_INSTR reg, off
34
.endm
35
36
#endif
37
38
ENTRY_CFI(memset)
39
mov.f 0, r2
40
;;; if size is zero
41
jz.d [blink]
42
mov r3, r0 ; don't clobber ret val
43
44
PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
45
46
;;; if length < 8
47
brls.d.nt r2, 8, .Lsmallchunk
48
mov.f lp_count,r2
49
50
and.f r4, r0, 0x03
51
rsub lp_count, r4, 4
52
lpnz @.Laligndestination
53
;; LOOP BEGIN
54
stb.ab r1, [r3,1]
55
sub r2, r2, 1
56
.Laligndestination:
57
58
;;; Destination is aligned
59
and r1, r1, 0xFF
60
asl r4, r1, 8
61
or r4, r4, r1
62
asl r5, r4, 16
63
or r5, r5, r4
64
mov r4, r5
65
66
sub3 lp_count, r2, 8
67
cmp r2, 64
68
bmsk.hi r2, r2, 5
69
mov.ls lp_count, 0
70
add3.hi r2, r2, 8
71
72
;;; Convert len to Dwords, unfold x8
73
lsr.f lp_count, lp_count, 6
74
75
lpnz @.Lset64bytes
76
;; LOOP START
77
PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
78
79
#ifdef CONFIG_ARC_HAS_LL64
80
std.ab r4, [r3, 8]
81
std.ab r4, [r3, 8]
82
std.ab r4, [r3, 8]
83
std.ab r4, [r3, 8]
84
std.ab r4, [r3, 8]
85
std.ab r4, [r3, 8]
86
std.ab r4, [r3, 8]
87
std.ab r4, [r3, 8]
88
#else
89
st.ab r4, [r3, 4]
90
st.ab r4, [r3, 4]
91
st.ab r4, [r3, 4]
92
st.ab r4, [r3, 4]
93
st.ab r4, [r3, 4]
94
st.ab r4, [r3, 4]
95
st.ab r4, [r3, 4]
96
st.ab r4, [r3, 4]
97
st.ab r4, [r3, 4]
98
st.ab r4, [r3, 4]
99
st.ab r4, [r3, 4]
100
st.ab r4, [r3, 4]
101
st.ab r4, [r3, 4]
102
st.ab r4, [r3, 4]
103
st.ab r4, [r3, 4]
104
st.ab r4, [r3, 4]
105
#endif
106
.Lset64bytes:
107
108
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
109
lpnz .Lset32bytes
110
;; LOOP START
111
#ifdef CONFIG_ARC_HAS_LL64
112
std.ab r4, [r3, 8]
113
std.ab r4, [r3, 8]
114
std.ab r4, [r3, 8]
115
std.ab r4, [r3, 8]
116
#else
117
st.ab r4, [r3, 4]
118
st.ab r4, [r3, 4]
119
st.ab r4, [r3, 4]
120
st.ab r4, [r3, 4]
121
st.ab r4, [r3, 4]
122
st.ab r4, [r3, 4]
123
st.ab r4, [r3, 4]
124
st.ab r4, [r3, 4]
125
#endif
126
.Lset32bytes:
127
128
and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
129
.Lsmallchunk:
130
lpnz .Lcopy3bytes
131
;; LOOP START
132
stb.ab r1, [r3, 1]
133
.Lcopy3bytes:
134
135
j [blink]
136
137
END_CFI(memset)
138
139
ENTRY_CFI(memzero)
140
; adjust bzero args to memset args
141
mov r2, r1
142
b.d memset ;tail call so need to tinker with blink
143
mov r1, 0
144
END_CFI(memzero)
145
146