Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/alpha/lib/memcpy.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* linux/arch/alpha/lib/memcpy.c
4
*
5
* Copyright (C) 1995 Linus Torvalds
6
*/
7
8
/*
9
* This is a reasonably optimized memcpy() routine.
10
*/
11
12
/*
13
* Note that the C code is written to be optimized into good assembly. However,
14
* at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
15
* explicit compare against 0 (instead of just using the proper "blt reg, xx" or
16
* "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
17
*/
18
19
#include <linux/types.h>
20
#include <linux/export.h>
21
#include <linux/string.h>
22
23
/*
24
* This should be done in one go with ldq_u*2/mask/stq_u. Do it
25
* with a macro so that we can fix it up later..
26
*/
27
#define ALIGN_DEST_TO8_UP(d,s,n) \
28
while (d & 7) { \
29
if (n <= 0) return; \
30
n--; \
31
*(char *) d = *(char *) s; \
32
d++; s++; \
33
}
34
#define ALIGN_DEST_TO8_DN(d,s,n) \
35
while (d & 7) { \
36
if (n <= 0) return; \
37
n--; \
38
d--; s--; \
39
*(char *) d = *(char *) s; \
40
}
41
42
/*
43
* This should similarly be done with ldq_u*2/mask/stq. The destination
44
* is aligned, but we don't fill in a full quad-word
45
*/
46
#define DO_REST_UP(d,s,n) \
47
while (n > 0) { \
48
n--; \
49
*(char *) d = *(char *) s; \
50
d++; s++; \
51
}
52
#define DO_REST_DN(d,s,n) \
53
while (n > 0) { \
54
n--; \
55
d--; s--; \
56
*(char *) d = *(char *) s; \
57
}
58
59
/*
60
* This should be done with ldq/mask/stq. The source and destination are
61
* aligned, but we don't fill in a full quad-word
62
*/
63
#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
64
#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
65
66
/*
67
* This does unaligned memory copies. We want to avoid storing to
68
* an unaligned address, as that would do a read-modify-write cycle.
69
* We also want to avoid double-reading the unaligned reads.
70
*
71
* Note the ordering to try to avoid load (and address generation) latencies.
72
*/
73
static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
74
long n)
75
{
76
ALIGN_DEST_TO8_UP(d,s,n);
77
n -= 8; /* to avoid compare against 8 in the loop */
78
if (n >= 0) {
79
unsigned long low_word, high_word;
80
__asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
81
do {
82
unsigned long tmp;
83
__asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
84
n -= 8;
85
__asm__("extql %1,%2,%0"
86
:"=r" (low_word)
87
:"r" (low_word), "r" (s));
88
__asm__("extqh %1,%2,%0"
89
:"=r" (tmp)
90
:"r" (high_word), "r" (s));
91
s += 8;
92
*(unsigned long *) d = low_word | tmp;
93
d += 8;
94
low_word = high_word;
95
} while (n >= 0);
96
}
97
n += 8;
98
DO_REST_UP(d,s,n);
99
}
100
101
static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
102
long n)
103
{
104
/* I don't understand AXP assembler well enough for this. -Tim */
105
s += n;
106
d += n;
107
while (n--)
108
* (char *) --d = * (char *) --s;
109
}
110
111
/*
112
* Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
113
* for the load-store. I don't know why, but it would seem that using a floating
114
* point register for the move seems to slow things down (very small difference,
115
* though).
116
*
117
* Note the ordering to try to avoid load (and address generation) latencies.
118
*/
119
static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
120
long n)
121
{
122
ALIGN_DEST_TO8_UP(d,s,n);
123
n -= 8;
124
while (n >= 0) {
125
unsigned long tmp;
126
__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
127
n -= 8;
128
s += 8;
129
*(unsigned long *) d = tmp;
130
d += 8;
131
}
132
n += 8;
133
DO_REST_ALIGNED_UP(d,s,n);
134
}
135
static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
136
long n)
137
{
138
s += n;
139
d += n;
140
ALIGN_DEST_TO8_DN(d,s,n);
141
n -= 8;
142
while (n >= 0) {
143
unsigned long tmp;
144
s -= 8;
145
__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
146
n -= 8;
147
d -= 8;
148
*(unsigned long *) d = tmp;
149
}
150
n += 8;
151
DO_REST_ALIGNED_DN(d,s,n);
152
}
153
154
#undef memcpy
155
156
void * memcpy(void * dest, const void *src, size_t n)
157
{
158
if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
159
__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
160
n);
161
return dest;
162
}
163
__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
164
return dest;
165
}
166
EXPORT_SYMBOL(memcpy);
167
168