Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/tile/lib/memcpy_tile64.c
10817 views
1
/*
2
* Copyright 2010 Tilera Corporation. All Rights Reserved.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation, version 2.
7
*
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11
* NON INFRINGEMENT. See the GNU General Public License for
12
* more details.
13
*/
14
15
#include <linux/string.h>
16
#include <linux/smp.h>
17
#include <linux/module.h>
18
#include <linux/uaccess.h>
19
#include <asm/fixmap.h>
20
#include <asm/kmap_types.h>
21
#include <asm/tlbflush.h>
22
#include <hv/hypervisor.h>
23
#include <arch/chip.h>
24
25
26
#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27
28
/* Defined in memcpy.S */
29
extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30
extern unsigned long __copy_to_user_inatomic_asm(
31
void __user *to, const void *from, unsigned long n);
32
extern unsigned long __copy_from_user_inatomic_asm(
33
void *to, const void __user *from, unsigned long n);
34
extern unsigned long __copy_from_user_zeroing_asm(
35
void *to, const void __user *from, unsigned long n);
36
37
typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38
39
/* Size above which to consider TLB games for performance */
40
#define LARGE_COPY_CUTOFF 2048
41
42
/* Communicate to the simulator what we are trying to do. */
43
#define sim_allow_multiple_caching(b) \
44
__insn_mtspr(SPR_SIM_CONTROL, \
45
SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46
47
/*
48
* Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49
*
50
* We set up our own source and destination PTEs that we fully control.
51
* This is the only way to guarantee that we don't race with another
52
* thread that is modifying the PTE; we can't afford to try the
53
* copy_{to,from}_user() technique of catching the interrupt, since
54
* we must run with interrupts disabled to avoid the risk of some
55
* other code seeing the incoherent data in our cache. (Recall that
56
* our cache is indexed by PA, so even if the other code doesn't use
57
* our kmap_atomic virtual addresses, they'll still hit in cache using
58
* the normal VAs that aren't supposed to hit in cache.)
59
*/
60
static void memcpy_multicache(void *dest, const void *source,
61
pte_t dst_pte, pte_t src_pte, int len)
62
{
63
int idx;
64
unsigned long flags, newsrc, newdst;
65
pmd_t *pmdp;
66
pte_t *ptep;
67
int type0, type1;
68
int cpu = get_cpu();
69
70
/*
71
* Disable interrupts so that we don't recurse into memcpy()
72
* in an interrupt handler, nor accidentally reference
73
* the PA of the source from an interrupt routine. Also
74
* notify the simulator that we're playing games so we don't
75
* generate spurious coherency warnings.
76
*/
77
local_irq_save(flags);
78
sim_allow_multiple_caching(1);
79
80
/* Set up the new dest mapping */
81
type0 = kmap_atomic_idx_push();
82
idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
83
newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
84
pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
85
ptep = pte_offset_kernel(pmdp, newdst);
86
if (pte_val(*ptep) != pte_val(dst_pte)) {
87
set_pte(ptep, dst_pte);
88
local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
89
}
90
91
/* Set up the new source mapping */
92
type1 = kmap_atomic_idx_push();
93
idx += (type0 - type1);
94
src_pte = hv_pte_set_nc(src_pte);
95
src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
96
newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97
pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98
ptep = pte_offset_kernel(pmdp, newsrc);
99
__set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100
local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101
102
/* Actually move the data. */
103
__memcpy_asm((void *)newdst, (const void *)newsrc, len);
104
105
/*
106
* Remap the source as locally-cached and not OLOC'ed so that
107
* we can inval without also invaling the remote cpu's cache.
108
* This also avoids known errata with inv'ing cacheable oloc data.
109
*/
110
src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111
src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112
__set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113
local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114
115
/*
116
* Do the actual invalidation, covering the full L2 cache line
117
* at the end since __memcpy_asm() is somewhat aggressive.
118
*/
119
__inv_buffer((void *)newsrc, len);
120
121
/*
122
* We're done: notify the simulator that all is back to normal,
123
* and re-enable interrupts and pre-emption.
124
*/
125
kmap_atomic_idx_pop();
126
kmap_atomic_idx_pop();
127
sim_allow_multiple_caching(0);
128
local_irq_restore(flags);
129
put_cpu();
130
}
131
132
/*
133
* Identify large copies from remotely-cached memory, and copy them
134
* via memcpy_multicache() if they look good, otherwise fall back
135
* to the particular kind of copying passed as the memcpy_t function.
136
*/
137
static unsigned long fast_copy(void *dest, const void *source, int len,
138
memcpy_t func)
139
{
140
/*
141
* Check if it's big enough to bother with. We may end up doing a
142
* small copy via TLB manipulation if we're near a page boundary,
143
* but presumably we'll make it up when we hit the second page.
144
*/
145
while (len >= LARGE_COPY_CUTOFF) {
146
int copy_size, bytes_left_on_page;
147
pte_t *src_ptep, *dst_ptep;
148
pte_t src_pte, dst_pte;
149
struct page *src_page, *dst_page;
150
151
/* Is the source page oloc'ed to a remote cpu? */
152
retry_source:
153
src_ptep = virt_to_pte(current->mm, (unsigned long)source);
154
if (src_ptep == NULL)
155
break;
156
src_pte = *src_ptep;
157
if (!hv_pte_get_present(src_pte) ||
158
!hv_pte_get_readable(src_pte) ||
159
hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
160
break;
161
if (get_remote_cache_cpu(src_pte) == smp_processor_id())
162
break;
163
src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
164
get_page(src_page);
165
if (pte_val(src_pte) != pte_val(*src_ptep)) {
166
put_page(src_page);
167
goto retry_source;
168
}
169
if (pte_huge(src_pte)) {
170
/* Adjust the PTE to correspond to a small page */
171
int pfn = hv_pte_get_pfn(src_pte);
172
pfn += (((unsigned long)source & (HPAGE_SIZE-1))
173
>> PAGE_SHIFT);
174
src_pte = pfn_pte(pfn, src_pte);
175
src_pte = pte_mksmall(src_pte);
176
}
177
178
/* Is the destination page writable? */
179
retry_dest:
180
dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
181
if (dst_ptep == NULL) {
182
put_page(src_page);
183
break;
184
}
185
dst_pte = *dst_ptep;
186
if (!hv_pte_get_present(dst_pte) ||
187
!hv_pte_get_writable(dst_pte)) {
188
put_page(src_page);
189
break;
190
}
191
dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
192
if (dst_page == src_page) {
193
/*
194
* Source and dest are on the same page; this
195
* potentially exposes us to incoherence if any
196
* part of src and dest overlap on a cache line.
197
* Just give up rather than trying to be precise.
198
*/
199
put_page(src_page);
200
break;
201
}
202
get_page(dst_page);
203
if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
204
put_page(dst_page);
205
goto retry_dest;
206
}
207
if (pte_huge(dst_pte)) {
208
/* Adjust the PTE to correspond to a small page */
209
int pfn = hv_pte_get_pfn(dst_pte);
210
pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
211
>> PAGE_SHIFT);
212
dst_pte = pfn_pte(pfn, dst_pte);
213
dst_pte = pte_mksmall(dst_pte);
214
}
215
216
/* All looks good: create a cachable PTE and copy from it */
217
copy_size = len;
218
bytes_left_on_page =
219
PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
220
if (copy_size > bytes_left_on_page)
221
copy_size = bytes_left_on_page;
222
bytes_left_on_page =
223
PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
224
if (copy_size > bytes_left_on_page)
225
copy_size = bytes_left_on_page;
226
memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
227
228
/* Release the pages */
229
put_page(dst_page);
230
put_page(src_page);
231
232
/* Continue on the next page */
233
dest += copy_size;
234
source += copy_size;
235
len -= copy_size;
236
}
237
238
return func(dest, source, len);
239
}
240
241
void *memcpy(void *to, const void *from, __kernel_size_t n)
242
{
243
if (n < LARGE_COPY_CUTOFF)
244
return (void *)__memcpy_asm(to, from, n);
245
else
246
return (void *)fast_copy(to, from, n, __memcpy_asm);
247
}
248
249
unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
250
unsigned long n)
251
{
252
if (n < LARGE_COPY_CUTOFF)
253
return __copy_to_user_inatomic_asm(to, from, n);
254
else
255
return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
256
}
257
258
unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
259
unsigned long n)
260
{
261
if (n < LARGE_COPY_CUTOFF)
262
return __copy_from_user_inatomic_asm(to, from, n);
263
else
264
return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
265
}
266
267
unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
268
unsigned long n)
269
{
270
if (n < LARGE_COPY_CUTOFF)
271
return __copy_from_user_zeroing_asm(to, from, n);
272
else
273
return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
274
}
275
276
#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
277
278