Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/vma/include/dup.h
121838 views
1
/* SPDX-License-Identifier: GPL-2.0+ */
2
3
#pragma once
4
5
/* Forward declarations to avoid header cycle. */
6
struct vm_area_struct;
7
static inline void vma_start_write(struct vm_area_struct *vma);
8
9
extern const struct vm_operations_struct vma_dummy_vm_ops;
10
extern unsigned long stack_guard_gap;
11
extern const struct vm_operations_struct vma_dummy_vm_ops;
12
extern unsigned long rlimit(unsigned int limit);
13
struct task_struct *get_current(void);
14
15
#define MMF_HAS_MDWE 28
16
#define current get_current()
17
18
/*
19
* Define the task command name length as enum, then it can be visible to
20
* BPF programs.
21
*/
22
enum {
23
TASK_COMM_LEN = 16,
24
};
25
26
/* PARTIALLY implemented types. */
27
struct mm_struct {
28
struct maple_tree mm_mt;
29
int map_count; /* number of VMAs */
30
unsigned long total_vm; /* Total pages mapped */
31
unsigned long locked_vm; /* Pages that have PG_mlocked set */
32
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
33
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
34
unsigned long stack_vm; /* VM_STACK */
35
36
unsigned long def_flags;
37
38
mm_flags_t flags; /* Must use mm_flags_* helpers to access */
39
};
40
struct address_space {
41
struct rb_root_cached i_mmap;
42
unsigned long flags;
43
atomic_t i_mmap_writable;
44
};
45
struct file_operations {
46
int (*mmap)(struct file *, struct vm_area_struct *);
47
int (*mmap_prepare)(struct vm_area_desc *);
48
};
49
struct file {
50
struct address_space *f_mapping;
51
const struct file_operations *f_op;
52
};
53
struct anon_vma_chain {
54
struct anon_vma *anon_vma;
55
struct list_head same_vma;
56
};
57
struct task_struct {
58
char comm[TASK_COMM_LEN];
59
pid_t pid;
60
struct mm_struct *mm;
61
62
/* Used for emulating ABI behavior of previous Linux versions: */
63
unsigned int personality;
64
};
65
66
struct kref {
67
refcount_t refcount;
68
};
69
70
struct anon_vma_name {
71
struct kref kref;
72
/* The name needs to be at the end because it is dynamically sized. */
73
char name[];
74
};
75
76
/*
77
* Contains declarations that are DUPLICATED from kernel source in order to
78
* faciliate userland VMA testing.
79
*
80
* These must be kept in sync with kernel source.
81
*/
82
83
#define VMA_LOCK_OFFSET 0x40000000
84
85
typedef struct { unsigned long v; } freeptr_t;
86
87
#define VM_NONE 0x00000000
88
89
typedef int __bitwise vma_flag_t;
90
91
#define ACCESS_PRIVATE(p, member) ((p)->member)
92
93
#define DECLARE_VMA_BIT(name, bitnum) \
94
VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
95
#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
96
VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
97
enum {
98
DECLARE_VMA_BIT(READ, 0),
99
DECLARE_VMA_BIT(WRITE, 1),
100
DECLARE_VMA_BIT(EXEC, 2),
101
DECLARE_VMA_BIT(SHARED, 3),
102
/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
103
DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
104
DECLARE_VMA_BIT(MAYWRITE, 5),
105
DECLARE_VMA_BIT(MAYEXEC, 6),
106
DECLARE_VMA_BIT(MAYSHARE, 7),
107
DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
108
#ifdef CONFIG_MMU
109
DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
110
#else
111
/* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
112
DECLARE_VMA_BIT(MAYOVERLAY, 9),
113
#endif /* CONFIG_MMU */
114
/* Page-ranges managed without "struct page", just pure PFN */
115
DECLARE_VMA_BIT(PFNMAP, 10),
116
DECLARE_VMA_BIT(MAYBE_GUARD, 11),
117
DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
118
DECLARE_VMA_BIT(LOCKED, 13),
119
DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
120
DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
121
DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
122
DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
123
DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
124
DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
125
DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
126
DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
127
DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
128
DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
129
DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
130
DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
131
DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
132
DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
133
DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
134
DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
135
DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
136
DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
137
/* These bits are reused, we define specific uses below. */
138
DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
139
DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
140
DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
141
DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
142
DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
143
DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
144
DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
145
/*
146
* This flag is used to connect VFIO to arch specific KVM code. It
147
* indicates that the memory under this VMA is safe for use with any
148
* non-cachable memory type inside KVM. Some VFIO devices, on some
149
* platforms, are thought to be unsafe and can cause machine crashes
150
* if KVM does not lock down the memory type.
151
*/
152
DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
153
#ifdef CONFIG_PPC32
154
DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
155
#else
156
DECLARE_VMA_BIT(DROPPABLE, 40),
157
#endif
158
DECLARE_VMA_BIT(UFFD_MINOR, 41),
159
DECLARE_VMA_BIT(SEALED, 42),
160
/* Flags that reuse flags above. */
161
DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
162
DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
163
DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
164
DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
165
DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
166
#if defined(CONFIG_X86_USER_SHADOW_STACK)
167
/*
168
* VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
169
* support core mm.
170
*
171
* These VMAs will get a single end guard page. This helps userspace
172
* protect itself from attacks. A single page is enough for current
173
* shadow stack archs (x86). See the comments near alloc_shstk() in
174
* arch/x86/kernel/shstk.c for more details on the guard size.
175
*/
176
DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
177
#elif defined(CONFIG_ARM64_GCS)
178
/*
179
* arm64's Guarded Control Stack implements similar functionality and
180
* has similar constraints to shadow stacks.
181
*/
182
DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
183
#endif
184
DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
185
DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
186
DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
187
DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
188
DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
189
DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
190
DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
191
DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
192
#ifdef CONFIG_STACK_GROWSUP
193
DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
194
DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
195
#else
196
DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
197
#endif
198
};
199
200
#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
201
#define VM_READ INIT_VM_FLAG(READ)
202
#define VM_WRITE INIT_VM_FLAG(WRITE)
203
#define VM_EXEC INIT_VM_FLAG(EXEC)
204
#define VM_SHARED INIT_VM_FLAG(SHARED)
205
#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
206
#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
207
#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
208
#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
209
#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
210
#ifdef CONFIG_MMU
211
#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
212
#else
213
#define VM_UFFD_MISSING VM_NONE
214
#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
215
#endif
216
#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
217
#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
218
#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
219
#define VM_LOCKED INIT_VM_FLAG(LOCKED)
220
#define VM_IO INIT_VM_FLAG(IO)
221
#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
222
#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
223
#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
224
#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
225
#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
226
#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
227
#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
228
#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
229
#define VM_SYNC INIT_VM_FLAG(SYNC)
230
#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
231
#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
232
#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
233
#ifdef CONFIG_MEM_SOFT_DIRTY
234
#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
235
#else
236
#define VM_SOFTDIRTY VM_NONE
237
#endif
238
#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
239
#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
240
#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
241
#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
242
#define VM_STACK INIT_VM_FLAG(STACK)
243
#ifdef CONFIG_STACK_GROWS_UP
244
#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
245
#else
246
#define VM_STACK_EARLY VM_NONE
247
#endif
248
#ifdef CONFIG_ARCH_HAS_PKEYS
249
#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
250
/* Despite the naming, these are FLAGS not bits. */
251
#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
252
#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
253
#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
254
#if CONFIG_ARCH_PKEY_BITS > 3
255
#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
256
#else
257
#define VM_PKEY_BIT3 VM_NONE
258
#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
259
#if CONFIG_ARCH_PKEY_BITS > 4
260
#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
261
#else
262
#define VM_PKEY_BIT4 VM_NONE
263
#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
264
#endif /* CONFIG_ARCH_HAS_PKEYS */
265
#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
266
#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
267
#else
268
#define VM_SHADOW_STACK VM_NONE
269
#endif
270
#if defined(CONFIG_PPC64)
271
#define VM_SAO INIT_VM_FLAG(SAO)
272
#elif defined(CONFIG_PARISC)
273
#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
274
#elif defined(CONFIG_SPARC64)
275
#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
276
#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
277
#elif defined(CONFIG_ARM64)
278
#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
279
#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
280
#elif !defined(CONFIG_MMU)
281
#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
282
#endif
283
#ifndef VM_GROWSUP
284
#define VM_GROWSUP VM_NONE
285
#endif
286
#ifdef CONFIG_ARM64_MTE
287
#define VM_MTE INIT_VM_FLAG(MTE)
288
#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
289
#else
290
#define VM_MTE VM_NONE
291
#define VM_MTE_ALLOWED VM_NONE
292
#endif
293
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
294
#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
295
#else
296
#define VM_UFFD_MINOR VM_NONE
297
#endif
298
#ifdef CONFIG_64BIT
299
#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
300
#define VM_SEALED INIT_VM_FLAG(SEALED)
301
#else
302
#define VM_ALLOW_ANY_UNCACHED VM_NONE
303
#define VM_SEALED VM_NONE
304
#endif
305
#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
306
#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
307
#else
308
#define VM_DROPPABLE VM_NONE
309
#endif
310
311
/* Bits set in the VMA until the stack is in its final location */
312
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
313
314
#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
315
316
/* Common data flag combinations */
317
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
318
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
319
#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
320
VM_MAYWRITE | VM_MAYEXEC)
321
#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
322
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
323
324
#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
325
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
326
#endif
327
328
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
329
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
330
#endif
331
332
#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
333
334
#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
335
336
/* VMA basic access permission flags */
337
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
338
339
/*
340
* Special vmas that are non-mergable, non-mlock()able.
341
*/
342
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
343
344
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
345
#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
346
#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
347
#define STACK_TOP TASK_SIZE_LOW
348
#define STACK_TOP_MAX TASK_SIZE_MAX
349
350
/* This mask represents all the VMA flag bits used by mlock */
351
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
352
353
#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
354
355
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
356
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
357
358
#define RLIMIT_STACK 3 /* max stack size */
359
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
360
361
#define CAP_IPC_LOCK 14
362
363
#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
364
365
#define VM_IGNORE_MERGE VM_STICKY
366
367
#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
368
369
#define pgprot_val(x) ((x).pgprot)
370
#define __pgprot(x) ((pgprot_t) { (x) } )
371
372
#define for_each_vma(__vmi, __vma) \
373
while (((__vma) = vma_next(&(__vmi))) != NULL)
374
375
/* The MM code likes to work with exclusive end addresses */
376
#define for_each_vma_range(__vmi, __vma, __end) \
377
while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
378
379
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
380
381
#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
382
383
#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
384
#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
385
386
#define AS_MM_ALL_LOCKS 2
387
388
#define swap(a, b) \
389
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
390
391
/*
392
* Flags for bug emulation.
393
*
394
* These occupy the top three bytes.
395
*/
396
enum {
397
READ_IMPLIES_EXEC = 0x0400000,
398
};
399
400
struct vma_iterator {
401
struct ma_state mas;
402
};
403
404
#define VMA_ITERATOR(name, __mm, __addr) \
405
struct vma_iterator name = { \
406
.mas = { \
407
.tree = &(__mm)->mm_mt, \
408
.index = __addr, \
409
.node = NULL, \
410
.status = ma_start, \
411
}, \
412
}
413
414
#define DEFINE_MUTEX(mutexname) \
415
struct mutex mutexname = {}
416
417
#define DECLARE_BITMAP(name, bits) \
418
unsigned long name[BITS_TO_LONGS(bits)]
419
420
#define EMPTY_VMA_FLAGS ((vma_flags_t){ })
421
422
/* What action should be taken after an .mmap_prepare call is complete? */
423
enum mmap_action_type {
424
MMAP_NOTHING, /* Mapping is complete, no further action. */
425
MMAP_REMAP_PFN, /* Remap PFN range. */
426
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
427
};
428
429
/*
430
* Describes an action an mmap_prepare hook can instruct to be taken to complete
431
* the mapping of a VMA. Specified in vm_area_desc.
432
*/
433
struct mmap_action {
434
union {
435
/* Remap range. */
436
struct {
437
unsigned long start;
438
unsigned long start_pfn;
439
unsigned long size;
440
pgprot_t pgprot;
441
} remap;
442
};
443
enum mmap_action_type type;
444
445
/*
446
* If specified, this hook is invoked after the selected action has been
447
* successfully completed. Note that the VMA write lock still held.
448
*
449
* The absolute minimum ought to be done here.
450
*
451
* Returns 0 on success, or an error code.
452
*/
453
int (*success_hook)(const struct vm_area_struct *vma);
454
455
/*
456
* If specified, this hook is invoked when an error occurred when
457
* attempting the selection action.
458
*
459
* The hook can return an error code in order to filter the error, but
460
* it is not valid to clear the error here.
461
*/
462
int (*error_hook)(int err);
463
464
/*
465
* This should be set in rare instances where the operation required
466
* that the rmap should not be able to access the VMA until
467
* completely set up.
468
*/
469
bool hide_from_rmap_until_complete :1;
470
};
471
472
/* Operations which modify VMAs. */
473
enum vma_operation {
474
VMA_OP_SPLIT,
475
VMA_OP_MERGE_UNFAULTED,
476
VMA_OP_REMAP,
477
VMA_OP_FORK,
478
};
479
480
/*
481
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
482
* manipulate mutable fields which will cause those fields to be updated in the
483
* resultant VMA.
484
*
485
* Helper functions are not required for manipulating any field.
486
*/
487
struct vm_area_desc {
488
/* Immutable state. */
489
const struct mm_struct *const mm;
490
struct file *const file; /* May vary from vm_file in stacked callers. */
491
unsigned long start;
492
unsigned long end;
493
494
/* Mutable fields. Populated with initial state. */
495
pgoff_t pgoff;
496
struct file *vm_file;
497
union {
498
vm_flags_t vm_flags;
499
vma_flags_t vma_flags;
500
};
501
pgprot_t page_prot;
502
503
/* Write-only fields. */
504
const struct vm_operations_struct *vm_ops;
505
void *private_data;
506
507
/* Take further action? */
508
struct mmap_action action;
509
};
510
511
struct vm_area_struct {
512
/* The first cache line has the info for VMA tree walking. */
513
514
union {
515
struct {
516
/* VMA covers [vm_start; vm_end) addresses within mm */
517
unsigned long vm_start;
518
unsigned long vm_end;
519
};
520
freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
521
};
522
523
struct mm_struct *vm_mm; /* The address space we belong to. */
524
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
525
526
/*
527
* Flags, see mm.h.
528
* To modify use vm_flags_{init|reset|set|clear|mod} functions.
529
*/
530
union {
531
const vm_flags_t vm_flags;
532
vma_flags_t flags;
533
};
534
535
#ifdef CONFIG_PER_VMA_LOCK
536
/*
537
* Can only be written (using WRITE_ONCE()) while holding both:
538
* - mmap_lock (in write mode)
539
* - vm_refcnt bit at VMA_LOCK_OFFSET is set
540
* Can be read reliably while holding one of:
541
* - mmap_lock (in read or write mode)
542
* - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
543
* Can be read unreliably (using READ_ONCE()) for pessimistic bailout
544
* while holding nothing (except RCU to keep the VMA struct allocated).
545
*
546
* This sequence counter is explicitly allowed to overflow; sequence
547
* counter reuse can only lead to occasional unnecessary use of the
548
* slowpath.
549
*/
550
unsigned int vm_lock_seq;
551
#endif
552
553
/*
554
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
555
* list, after a COW of one of the file pages. A MAP_SHARED vma
556
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
557
* or brk vma (with NULL file) can only be in an anon_vma list.
558
*/
559
struct list_head anon_vma_chain; /* Serialized by mmap_lock &
560
* page_table_lock */
561
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
562
563
/* Function pointers to deal with this struct. */
564
const struct vm_operations_struct *vm_ops;
565
566
/* Information about our backing store: */
567
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
568
units */
569
struct file * vm_file; /* File we map to (can be NULL). */
570
void * vm_private_data; /* was vm_pte (shared mem) */
571
572
#ifdef CONFIG_SWAP
573
atomic_long_t swap_readahead_info;
574
#endif
575
#ifndef CONFIG_MMU
576
struct vm_region *vm_region; /* NOMMU mapping region */
577
#endif
578
#ifdef CONFIG_NUMA
579
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
580
#endif
581
#ifdef CONFIG_NUMA_BALANCING
582
struct vma_numab_state *numab_state; /* NUMA Balancing state */
583
#endif
584
#ifdef CONFIG_PER_VMA_LOCK
585
/* Unstable RCU readers are allowed to read this. */
586
refcount_t vm_refcnt;
587
#endif
588
/*
589
* For areas with an address space and backing store,
590
* linkage into the address_space->i_mmap interval tree.
591
*
592
*/
593
struct {
594
struct rb_node rb;
595
unsigned long rb_subtree_last;
596
} shared;
597
#ifdef CONFIG_ANON_VMA_NAME
598
/*
599
* For private and shared anonymous mappings, a pointer to a null
600
* terminated string containing the name given to the vma, or NULL if
601
* unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
602
*/
603
struct anon_vma_name *anon_name;
604
#endif
605
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
606
} __randomize_layout;
607
608
struct vm_operations_struct {
609
void (*open)(struct vm_area_struct * area);
610
/**
611
* @close: Called when the VMA is being removed from the MM.
612
* Context: User context. May sleep. Caller holds mmap_lock.
613
*/
614
void (*close)(struct vm_area_struct * area);
615
/* Called any time before splitting to check if it's allowed */
616
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
617
int (*mremap)(struct vm_area_struct *area);
618
/*
619
* Called by mprotect() to make driver-specific permission
620
* checks before mprotect() is finalised. The VMA must not
621
* be modified. Returns 0 if mprotect() can proceed.
622
*/
623
int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
624
unsigned long end, unsigned long newflags);
625
vm_fault_t (*fault)(struct vm_fault *vmf);
626
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
627
vm_fault_t (*map_pages)(struct vm_fault *vmf,
628
pgoff_t start_pgoff, pgoff_t end_pgoff);
629
unsigned long (*pagesize)(struct vm_area_struct * area);
630
631
/* notification that a previously read-only page is about to become
632
* writable, if an error is returned it will cause a SIGBUS */
633
vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
634
635
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
636
vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
637
638
/* called by access_process_vm when get_user_pages() fails, typically
639
* for use by special VMAs. See also generic_access_phys() for a generic
640
* implementation useful for any iomem mapping.
641
*/
642
int (*access)(struct vm_area_struct *vma, unsigned long addr,
643
void *buf, int len, int write);
644
645
/* Called by the /proc/PID/maps code to ask the vma whether it
646
* has a special name. Returning non-NULL will also cause this
647
* vma to be dumped unconditionally. */
648
const char *(*name)(struct vm_area_struct *vma);
649
650
#ifdef CONFIG_NUMA
651
/*
652
* set_policy() op must add a reference to any non-NULL @new mempolicy
653
* to hold the policy upon return. Caller should pass NULL @new to
654
* remove a policy and fall back to surrounding context--i.e. do not
655
* install a MPOL_DEFAULT policy, nor the task or system default
656
* mempolicy.
657
*/
658
int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
659
660
/*
661
* get_policy() op must add reference [mpol_get()] to any policy at
662
* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
663
* in mm/mempolicy.c will do this automatically.
664
* get_policy() must NOT add a ref if the policy at (vma,addr) is not
665
* marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
666
* If no [shared/vma] mempolicy exists at the addr, get_policy() op
667
* must return NULL--i.e., do not "fallback" to task or system default
668
* policy.
669
*/
670
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
671
unsigned long addr, pgoff_t *ilx);
672
#endif
673
#ifdef CONFIG_FIND_NORMAL_PAGE
674
/*
675
* Called by vm_normal_page() for special PTEs in @vma at @addr. This
676
* allows for returning a "normal" page from vm_normal_page() even
677
* though the PTE indicates that the "struct page" either does not exist
678
* or should not be touched: "special".
679
*
680
* Do not add new users: this really only works when a "normal" page
681
* was mapped, but then the PTE got changed to something weird (+
682
* marked special) that would not make pte_pfn() identify the originally
683
* inserted page.
684
*/
685
struct page *(*find_normal_page)(struct vm_area_struct *vma,
686
unsigned long addr);
687
#endif /* CONFIG_FIND_NORMAL_PAGE */
688
};
689
690
struct vm_unmapped_area_info {
691
#define VM_UNMAPPED_AREA_TOPDOWN 1
692
unsigned long flags;
693
unsigned long length;
694
unsigned long low_limit;
695
unsigned long high_limit;
696
unsigned long align_mask;
697
unsigned long align_offset;
698
unsigned long start_gap;
699
};
700
701
struct pagetable_move_control {
702
struct vm_area_struct *old; /* Source VMA. */
703
struct vm_area_struct *new; /* Destination VMA. */
704
unsigned long old_addr; /* Address from which the move begins. */
705
unsigned long old_end; /* Exclusive address at which old range ends. */
706
unsigned long new_addr; /* Address to move page tables to. */
707
unsigned long len_in; /* Bytes to remap specified by user. */
708
709
bool need_rmap_locks; /* Do rmap locks need to be taken? */
710
bool for_stack; /* Is this an early temp stack being moved? */
711
};
712
713
#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
714
struct pagetable_move_control name = { \
715
.old = old_, \
716
.new = new_, \
717
.old_addr = old_addr_, \
718
.old_end = (old_addr_) + (len_), \
719
.new_addr = new_addr_, \
720
.len_in = len_, \
721
}
722
723
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
724
{
725
mas_pause(&vmi->mas);
726
}
727
728
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
729
{
730
return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
731
}
732
733
static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
734
{
735
return __pgprot(vm_flags);
736
}
737
738
static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
739
{
740
return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
741
}
742
743
/*
744
* Copy value to the first system word of VMA flags, non-atomically.
745
*
746
* IMPORTANT: This does not overwrite bytes past the first system word. The
747
* caller must account for this.
748
*/
749
static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
750
{
751
*ACCESS_PRIVATE(flags, __vma_flags) = value;
752
}
753
754
/*
755
* Copy value to the first system word of VMA flags ONCE, non-atomically.
756
*
757
* IMPORTANT: This does not overwrite bytes past the first system word. The
758
* caller must account for this.
759
*/
760
static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
761
{
762
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
763
764
WRITE_ONCE(*bitmap, value);
765
}
766
767
/* Update the first system word of VMA flags setting bits, non-atomically. */
768
static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
769
{
770
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
771
772
*bitmap |= value;
773
}
774
775
/* Update the first system word of VMA flags clearing bits, non-atomically. */
776
static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
777
{
778
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
779
780
*bitmap &= ~value;
781
}
782
783
static inline void vma_flags_clear_all(vma_flags_t *flags)
784
{
785
bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
786
}
787
788
static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
789
{
790
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
791
792
__set_bit((__force int)bit, bitmap);
793
}
794
795
/* Use when VMA is not part of the VMA tree and needs no locking */
796
static inline void vm_flags_init(struct vm_area_struct *vma,
797
vm_flags_t flags)
798
{
799
vma_flags_clear_all(&vma->flags);
800
vma_flags_overwrite_word(&vma->flags, flags);
801
}
802
803
/*
804
* Use when VMA is part of the VMA tree and modifications need coordination
805
* Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
806
* it should be locked explicitly beforehand.
807
*/
808
static inline void vm_flags_reset(struct vm_area_struct *vma,
809
vm_flags_t flags)
810
{
811
vma_assert_write_locked(vma);
812
vm_flags_init(vma, flags);
813
}
814
815
static inline void vm_flags_reset_once(struct vm_area_struct *vma,
816
vm_flags_t flags)
817
{
818
vma_assert_write_locked(vma);
819
/*
820
* The user should only be interested in avoiding reordering of
821
* assignment to the first word.
822
*/
823
vma_flags_clear_all(&vma->flags);
824
vma_flags_overwrite_word_once(&vma->flags, flags);
825
}
826
827
static inline void vm_flags_set(struct vm_area_struct *vma,
828
vm_flags_t flags)
829
{
830
vma_start_write(vma);
831
vma_flags_set_word(&vma->flags, flags);
832
}
833
834
static inline void vm_flags_clear(struct vm_area_struct *vma,
835
vm_flags_t flags)
836
{
837
vma_start_write(vma);
838
vma_flags_clear_word(&vma->flags, flags);
839
}
840
841
static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
842
843
#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
844
(const vma_flag_t []){__VA_ARGS__})
845
846
static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
847
vma_flags_t to_test)
848
{
849
const unsigned long *bitmap = flags->__vma_flags;
850
const unsigned long *bitmap_to_test = to_test.__vma_flags;
851
852
return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
853
}
854
855
#define vma_flags_test(flags, ...) \
856
vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
857
858
static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
859
vma_flags_t to_test)
860
{
861
const unsigned long *bitmap = flags->__vma_flags;
862
const unsigned long *bitmap_to_test = to_test.__vma_flags;
863
864
return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
865
}
866
867
#define vma_flags_test_all(flags, ...) \
868
vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
869
870
static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
871
{
872
unsigned long *bitmap = flags->__vma_flags;
873
const unsigned long *bitmap_to_set = to_set.__vma_flags;
874
875
bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
876
}
877
878
#define vma_flags_set(flags, ...) \
879
vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
880
881
static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
882
{
883
unsigned long *bitmap = flags->__vma_flags;
884
const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
885
886
bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
887
}
888
889
#define vma_flags_clear(flags, ...) \
890
vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
891
892
static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
893
vma_flags_t flags)
894
{
895
return vma_flags_test_all_mask(&vma->flags, flags);
896
}
897
898
#define vma_test_all_flags(vma, ...) \
899
vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
900
901
static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
902
{
903
return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
904
(VM_SHARED | VM_MAYWRITE);
905
}
906
907
static inline void vma_set_flags_mask(struct vm_area_struct *vma,
908
vma_flags_t flags)
909
{
910
vma_flags_set_mask(&vma->flags, flags);
911
}
912
913
#define vma_set_flags(vma, ...) \
914
vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
915
916
static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
917
vma_flags_t flags)
918
{
919
return vma_flags_test_mask(&desc->vma_flags, flags);
920
}
921
922
#define vma_desc_test_flags(desc, ...) \
923
vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
924
925
static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
926
vma_flags_t flags)
927
{
928
vma_flags_set_mask(&desc->vma_flags, flags);
929
}
930
931
#define vma_desc_set_flags(desc, ...) \
932
vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
933
934
static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
935
vma_flags_t flags)
936
{
937
vma_flags_clear_mask(&desc->vma_flags, flags);
938
}
939
940
#define vma_desc_clear_flags(desc, ...) \
941
vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
942
943
static inline bool is_shared_maywrite(const vma_flags_t *flags)
944
{
945
return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
946
}
947
948
static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
949
{
950
return is_shared_maywrite(&vma->flags);
951
}
952
953
static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
954
{
955
/*
956
* Uses mas_find() to get the first VMA when the iterator starts.
957
* Calling mas_next() could skip the first entry.
958
*/
959
return mas_find(&vmi->mas, ULONG_MAX);
960
}
961
962
/*
963
* WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
964
* assertions should be made either under mmap_write_lock or when the object
965
* has been isolated under mmap_write_lock, ensuring no competing writers.
966
*/
967
static inline void vma_assert_attached(struct vm_area_struct *vma)
968
{
969
WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
970
}
971
972
static inline void vma_assert_detached(struct vm_area_struct *vma)
973
{
974
WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
975
}
976
977
static inline void vma_assert_write_locked(struct vm_area_struct *);
978
static inline void vma_mark_attached(struct vm_area_struct *vma)
979
{
980
vma_assert_write_locked(vma);
981
vma_assert_detached(vma);
982
refcount_set_release(&vma->vm_refcnt, 1);
983
}
984
985
static inline void vma_mark_detached(struct vm_area_struct *vma)
986
{
987
vma_assert_write_locked(vma);
988
vma_assert_attached(vma);
989
/* We are the only writer, so no need to use vma_refcount_put(). */
990
if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
991
/*
992
* Reader must have temporarily raised vm_refcnt but it will
993
* drop it without using the vma since vma is write-locked.
994
*/
995
}
996
}
997
998
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
999
{
1000
memset(vma, 0, sizeof(*vma));
1001
vma->vm_mm = mm;
1002
vma->vm_ops = &vma_dummy_vm_ops;
1003
INIT_LIST_HEAD(&vma->anon_vma_chain);
1004
vma->vm_lock_seq = UINT_MAX;
1005
}
1006
1007
/*
1008
* These are defined in vma.h, but sadly vm_stat_account() is referenced by
1009
* kernel/fork.c, so we have to these broadly available there, and temporarily
1010
* define them here to resolve the dependency cycle.
1011
*/
1012
#define is_exec_mapping(flags) \
1013
((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
1014
1015
#define is_stack_mapping(flags) \
1016
(((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
1017
1018
#define is_data_mapping(flags) \
1019
((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
1020
1021
static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
1022
long npages)
1023
{
1024
WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
1025
1026
if (is_exec_mapping(flags))
1027
mm->exec_vm += npages;
1028
else if (is_stack_mapping(flags))
1029
mm->stack_vm += npages;
1030
else if (is_data_mapping(flags))
1031
mm->data_vm += npages;
1032
}
1033
1034
#undef is_exec_mapping
1035
#undef is_stack_mapping
1036
#undef is_data_mapping
1037
1038
static inline void vm_unacct_memory(long pages)
1039
{
1040
vm_acct_memory(-pages);
1041
}
1042
1043
static inline void mapping_allow_writable(struct address_space *mapping)
1044
{
1045
atomic_inc(&mapping->i_mmap_writable);
1046
}
1047
1048
static inline
1049
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
1050
{
1051
return mas_find(&vmi->mas, max - 1);
1052
}
1053
1054
static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
1055
unsigned long start, unsigned long end, gfp_t gfp)
1056
{
1057
__mas_set_range(&vmi->mas, start, end - 1);
1058
mas_store_gfp(&vmi->mas, NULL, gfp);
1059
if (unlikely(mas_is_err(&vmi->mas)))
1060
return -ENOMEM;
1061
1062
return 0;
1063
}
1064
1065
static inline void vma_set_anonymous(struct vm_area_struct *vma)
1066
{
1067
vma->vm_ops = NULL;
1068
}
1069
1070
/* Declared in vma.h. */
1071
static inline void set_vma_from_desc(struct vm_area_struct *vma,
1072
struct vm_area_desc *desc);
1073
1074
static inline int __compat_vma_mmap(const struct file_operations *f_op,
1075
struct file *file, struct vm_area_struct *vma)
1076
{
1077
struct vm_area_desc desc = {
1078
.mm = vma->vm_mm,
1079
.file = file,
1080
.start = vma->vm_start,
1081
.end = vma->vm_end,
1082
1083
.pgoff = vma->vm_pgoff,
1084
.vm_file = vma->vm_file,
1085
.vm_flags = vma->vm_flags,
1086
.page_prot = vma->vm_page_prot,
1087
1088
.action.type = MMAP_NOTHING, /* Default */
1089
};
1090
int err;
1091
1092
err = f_op->mmap_prepare(&desc);
1093
if (err)
1094
return err;
1095
1096
mmap_action_prepare(&desc.action, &desc);
1097
set_vma_from_desc(vma, &desc);
1098
return mmap_action_complete(&desc.action, vma);
1099
}
1100
1101
static inline int compat_vma_mmap(struct file *file,
1102
struct vm_area_struct *vma)
1103
{
1104
return __compat_vma_mmap(file->f_op, file, vma);
1105
}
1106
1107
1108
static inline void vma_iter_init(struct vma_iterator *vmi,
1109
struct mm_struct *mm, unsigned long addr)
1110
{
1111
mas_init(&vmi->mas, &mm->mm_mt, addr);
1112
}
1113
1114
static inline unsigned long vma_pages(struct vm_area_struct *vma)
1115
{
1116
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1117
}
1118
1119
static inline void mmap_assert_locked(struct mm_struct *);
1120
static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
1121
unsigned long start_addr,
1122
unsigned long end_addr)
1123
{
1124
unsigned long index = start_addr;
1125
1126
mmap_assert_locked(mm);
1127
return mt_find(&mm->mm_mt, &index, end_addr - 1);
1128
}
1129
1130
static inline
1131
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
1132
{
1133
return mtree_load(&mm->mm_mt, addr);
1134
}
1135
1136
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
1137
{
1138
return mas_prev(&vmi->mas, 0);
1139
}
1140
1141
static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
1142
{
1143
mas_set(&vmi->mas, addr);
1144
}
1145
1146
static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1147
{
1148
return !vma->vm_ops;
1149
}
1150
1151
/* Defined in vma.h, so temporarily define here to avoid circular dependency. */
1152
#define vma_iter_load(vmi) \
1153
mas_walk(&(vmi)->mas)
1154
1155
static inline struct vm_area_struct *
1156
find_vma_prev(struct mm_struct *mm, unsigned long addr,
1157
struct vm_area_struct **pprev)
1158
{
1159
struct vm_area_struct *vma;
1160
VMA_ITERATOR(vmi, mm, addr);
1161
1162
vma = vma_iter_load(&vmi);
1163
*pprev = vma_prev(&vmi);
1164
if (!vma)
1165
vma = vma_next(&vmi);
1166
return vma;
1167
}
1168
1169
#undef vma_iter_load
1170
1171
static inline void vma_iter_free(struct vma_iterator *vmi)
1172
{
1173
mas_destroy(&vmi->mas);
1174
}
1175
1176
static inline
1177
struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
1178
{
1179
return mas_next_range(&vmi->mas, ULONG_MAX);
1180
}
1181
1182
bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
1183
1184
/* Update vma->vm_page_prot to reflect vma->vm_flags. */
1185
static inline void vma_set_page_prot(struct vm_area_struct *vma)
1186
{
1187
vm_flags_t vm_flags = vma->vm_flags;
1188
pgprot_t vm_page_prot;
1189
1190
/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1191
vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
1192
1193
if (vma_wants_writenotify(vma, vm_page_prot)) {
1194
vm_flags &= ~VM_SHARED;
1195
/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1196
vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
1197
}
1198
/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
1199
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
1200
}
1201
1202
static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
1203
{
1204
if (vma->vm_flags & VM_GROWSDOWN)
1205
return stack_guard_gap;
1206
1207
/* See reasoning around the VM_SHADOW_STACK definition */
1208
if (vma->vm_flags & VM_SHADOW_STACK)
1209
return PAGE_SIZE;
1210
1211
return 0;
1212
}
1213
1214
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
1215
{
1216
unsigned long gap = stack_guard_start_gap(vma);
1217
unsigned long vm_start = vma->vm_start;
1218
1219
vm_start -= gap;
1220
if (vm_start > vma->vm_start)
1221
vm_start = 0;
1222
return vm_start;
1223
}
1224
1225
static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
1226
{
1227
unsigned long vm_end = vma->vm_end;
1228
1229
if (vma->vm_flags & VM_GROWSUP) {
1230
vm_end += stack_guard_gap;
1231
if (vm_end < vma->vm_end)
1232
vm_end = -PAGE_SIZE;
1233
}
1234
return vm_end;
1235
}
1236
1237
static inline bool vma_is_accessible(struct vm_area_struct *vma)
1238
{
1239
return vma->vm_flags & VM_ACCESS_FLAGS;
1240
}
1241
1242
static inline bool mlock_future_ok(const struct mm_struct *mm,
1243
vm_flags_t vm_flags, unsigned long bytes)
1244
{
1245
unsigned long locked_pages, limit_pages;
1246
1247
if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
1248
return true;
1249
1250
locked_pages = bytes >> PAGE_SHIFT;
1251
locked_pages += mm->locked_vm;
1252
1253
limit_pages = rlimit(RLIMIT_MEMLOCK);
1254
limit_pages >>= PAGE_SHIFT;
1255
1256
return locked_pages <= limit_pages;
1257
}
1258
1259
static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
1260
{
1261
/* If MDWE is disabled, we have nothing to deny. */
1262
if (mm_flags_test(MMF_HAS_MDWE, current->mm))
1263
return false;
1264
1265
/* If the new VMA is not executable, we have nothing to deny. */
1266
if (!(new & VM_EXEC))
1267
return false;
1268
1269
/* Under MDWE we do not accept newly writably executable VMAs... */
1270
if (new & VM_WRITE)
1271
return true;
1272
1273
/* ...nor previously non-executable VMAs becoming executable. */
1274
if (!(old & VM_EXEC))
1275
return true;
1276
1277
return false;
1278
}
1279
1280
static inline int mapping_map_writable(struct address_space *mapping)
1281
{
1282
return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
1283
0 : -EPERM;
1284
}
1285
1286
/* Did the driver provide valid mmap hook configuration? */
1287
static inline bool can_mmap_file(struct file *file)
1288
{
1289
bool has_mmap = file->f_op->mmap;
1290
bool has_mmap_prepare = file->f_op->mmap_prepare;
1291
1292
/* Hooks are mutually exclusive. */
1293
if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
1294
return false;
1295
if (!has_mmap && !has_mmap_prepare)
1296
return false;
1297
1298
return true;
1299
}
1300
1301
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
1302
{
1303
if (file->f_op->mmap_prepare)
1304
return compat_vma_mmap(file, vma);
1305
1306
return file->f_op->mmap(file, vma);
1307
}
1308
1309
static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
1310
{
1311
return file->f_op->mmap_prepare(desc);
1312
}
1313
1314
static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
1315
{
1316
/* Changing an anonymous vma with this is illegal */
1317
get_file(file);
1318
swap(vma->vm_file, file);
1319
fput(file);
1320
}
1321
1322