Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sh/lib64/copy_page.S
10817 views
1
/*
2
Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
3
4
This file is subject to the terms and conditions of the GNU General Public
5
License. See the file "COPYING" in the main directory of this archive
6
for more details.
7
8
Tight version of mempy for the case of just copying a page.
9
Prefetch strategy empirically optimised against RTL simulations
10
of SH5-101 cut2 eval chip with Cayman board DDR memory.
11
12
Parameters:
13
r2 : destination effective address (start of page)
14
r3 : source effective address (start of page)
15
16
Always copies 4096 bytes.
17
18
Points to review.
19
* Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
20
It seems like the prefetch needs to be at at least 4 lines ahead to get
21
the data into the cache in time, and the allocos contend with outstanding
22
prefetches for the same cache set, so it's better to have the numbers
23
different.
24
*/
25
26
.section .text..SHmedia32,"ax"
27
.little
28
29
.balign 8
30
.global copy_page
31
copy_page:
32
33
/* Copy 4096 bytes worth of data from r3 to r2.
34
Do prefetches 4 lines ahead.
35
Do alloco 2 lines ahead */
36
37
pta 1f, tr1
38
pta 2f, tr2
39
pta 3f, tr3
40
ptabs r18, tr0
41
42
#if 0
43
/* TAKum03020 */
44
ld.q r3, 0x00, r63
45
ld.q r3, 0x20, r63
46
ld.q r3, 0x40, r63
47
ld.q r3, 0x60, r63
48
#endif
49
alloco r2, 0x00
50
synco ! TAKum03020
51
alloco r2, 0x20
52
synco ! TAKum03020
53
54
movi 3968, r6
55
add r2, r6, r6
56
addi r6, 64, r7
57
addi r7, 64, r8
58
sub r3, r2, r60
59
addi r60, 8, r61
60
addi r61, 8, r62
61
addi r62, 8, r23
62
addi r60, 0x80, r22
63
64
/* Minimal code size. The extra branches inside the loop don't cost much
65
because they overlap with the time spent waiting for prefetches to
66
complete. */
67
1:
68
#if 0
69
/* TAKum03020 */
70
bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
71
ldx.q r2, r22, r63 ! prefetch 4 lines hence
72
#endif
73
2:
74
bge/u r2, r7, tr3 ! skip alloco for last 2 lines
75
alloco r2, 0x40 ! alloc destination line 2 lines ahead
76
synco ! TAKum03020
77
3:
78
ldx.q r2, r60, r36
79
ldx.q r2, r61, r37
80
ldx.q r2, r62, r38
81
ldx.q r2, r23, r39
82
st.q r2, 0, r36
83
st.q r2, 8, r37
84
st.q r2, 16, r38
85
st.q r2, 24, r39
86
addi r2, 32, r2
87
bgt/l r8, r2, tr1
88
89
blink tr0, r63 ! return
90
91