Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sh/lib64/memset.S
10817 views
1
/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
2
/* Modified by SuperH, Inc. September 2003 */
3
!
4
! Fast SH memset
5
!
6
! by Toshiyasu Morita (tm@netcom.com)
7
!
8
! SH5 code by J"orn Rennecke ([email protected])
9
! Copyright 2002 SuperH Ltd.
10
!
11
12
#if __BYTE_ORDER == __LITTLE_ENDIAN
13
#define SHHI shlld
14
#define SHLO shlrd
15
#else
16
#define SHHI shlrd
17
#define SHLO shlld
18
#endif
19
20
.section .text..SHmedia32,"ax"
21
.globl memset
22
.type memset, @function
23
24
.align 5
25
26
memset:
27
pta/l multiquad, tr0
28
andi r2, 7, r22
29
ptabs r18, tr2
30
mshflo.b r3,r3,r3
31
add r4, r22, r23
32
mperm.w r3, r63, r3 // Fill pattern now in every byte of r3
33
34
movi 8, r9
35
bgtu/u r23, r9, tr0 // multiquad
36
37
beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses
38
ldlo.q r2, 0, r7
39
shlli r4, 2, r4
40
movi -1, r8
41
SHHI r8, r4, r8
42
SHHI r8, r4, r8
43
mcmv r7, r8, r3
44
stlo.q r2, 0, r3
45
blink tr2, r63
46
47
multiquad:
48
pta/l lastquad, tr0
49
stlo.q r2, 0, r3
50
shlri r23, 3, r24
51
add r2, r4, r5
52
beqi/u r24, 1, tr0 // lastquad
53
pta/l loop, tr1
54
sub r2, r22, r25
55
andi r5, -8, r20 // calculate end address and
56
addi r20, -7*8, r8 // loop end address; This might overflow, so we need
57
// to use a different test before we start the loop
58
bge/u r24, r9, tr1 // loop
59
st.q r25, 8, r3
60
st.q r20, -8, r3
61
shlri r24, 1, r24
62
beqi/u r24, 1, tr0 // lastquad
63
st.q r25, 16, r3
64
st.q r20, -16, r3
65
beqi/u r24, 2, tr0 // lastquad
66
st.q r25, 24, r3
67
st.q r20, -24, r3
68
lastquad:
69
sthi.q r5, -1, r3
70
blink tr2,r63
71
72
loop:
73
!!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895.
74
// QQQ commenting out is locically correct, but sub-optimal
75
// QQQ Sean McGoogan - 4th April 2003.
76
st.q r25, 8, r3
77
st.q r25, 16, r3
78
st.q r25, 24, r3
79
st.q r25, 32, r3
80
addi r25, 32, r25
81
bgeu/l r8, r25, tr1 // loop
82
83
st.q r20, -40, r3
84
st.q r20, -32, r3
85
st.q r20, -24, r3
86
st.q r20, -16, r3
87
st.q r20, -8, r3
88
sthi.q r5, -1, r3
89
blink tr2,r63
90
91
.size memset,.-memset
92
93