Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/vma/vma_internal.h
49744 views
1
/* SPDX-License-Identifier: GPL-2.0+ */
2
/*
3
* vma_internal.h
4
*
5
* Header providing userland wrappers and shims for the functionality provided
6
* by mm/vma_internal.h.
7
*
8
* We make the header guard the same as mm/vma_internal.h, so if this shim
9
* header is included, it precludes the inclusion of the kernel one.
10
*/
11
12
#ifndef __MM_VMA_INTERNAL_H
13
#define __MM_VMA_INTERNAL_H
14
15
#define __private
16
#define __bitwise
17
#define __randomize_layout
18
19
#define CONFIG_MMU
20
#define CONFIG_PER_VMA_LOCK
21
22
#include <stdlib.h>
23
24
#include <linux/atomic.h>
25
#include <linux/list.h>
26
#include <linux/maple_tree.h>
27
#include <linux/mm.h>
28
#include <linux/rbtree.h>
29
#include <linux/refcount.h>
30
#include <linux/slab.h>
31
32
extern unsigned long stack_guard_gap;
33
#ifdef CONFIG_MMU
34
extern unsigned long mmap_min_addr;
35
extern unsigned long dac_mmap_min_addr;
36
#else
37
#define mmap_min_addr 0UL
38
#define dac_mmap_min_addr 0UL
39
#endif
40
41
#define VM_WARN_ON(_expr) (WARN_ON(_expr))
42
#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
43
#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
44
#define VM_BUG_ON(_expr) (BUG_ON(_expr))
45
#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
46
47
#define MMF_HAS_MDWE 28
48
49
/*
50
* vm_flags in vm_area_struct, see mm_types.h.
51
* When changing, update also include/trace/events/mmflags.h
52
*/
53
54
#define VM_NONE 0x00000000
55
56
/**
57
* typedef vma_flag_t - specifies an individual VMA flag by bit number.
58
*
59
* This value is made type safe by sparse to avoid passing invalid flag values
60
* around.
61
*/
62
typedef int __bitwise vma_flag_t;
63
64
#define DECLARE_VMA_BIT(name, bitnum) \
65
VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
66
#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
67
VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
68
enum {
69
DECLARE_VMA_BIT(READ, 0),
70
DECLARE_VMA_BIT(WRITE, 1),
71
DECLARE_VMA_BIT(EXEC, 2),
72
DECLARE_VMA_BIT(SHARED, 3),
73
/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
74
DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
75
DECLARE_VMA_BIT(MAYWRITE, 5),
76
DECLARE_VMA_BIT(MAYEXEC, 6),
77
DECLARE_VMA_BIT(MAYSHARE, 7),
78
DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
79
#ifdef CONFIG_MMU
80
DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
81
#else
82
/* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
83
DECLARE_VMA_BIT(MAYOVERLAY, 9),
84
#endif /* CONFIG_MMU */
85
/* Page-ranges managed without "struct page", just pure PFN */
86
DECLARE_VMA_BIT(PFNMAP, 10),
87
DECLARE_VMA_BIT(MAYBE_GUARD, 11),
88
DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
89
DECLARE_VMA_BIT(LOCKED, 13),
90
DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
91
DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
92
DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
93
DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
94
DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
95
DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
96
DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
97
DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
98
DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
99
DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
100
DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
101
DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
102
DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
103
DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
104
DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
105
DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
106
DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
107
DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
108
/* These bits are reused, we define specific uses below. */
109
DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
110
DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
111
DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
112
DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
113
DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
114
DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
115
DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
116
/*
117
* This flag is used to connect VFIO to arch specific KVM code. It
118
* indicates that the memory under this VMA is safe for use with any
119
* non-cachable memory type inside KVM. Some VFIO devices, on some
120
* platforms, are thought to be unsafe and can cause machine crashes
121
* if KVM does not lock down the memory type.
122
*/
123
DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
124
#ifdef CONFIG_PPC32
125
DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
126
#else
127
DECLARE_VMA_BIT(DROPPABLE, 40),
128
#endif
129
DECLARE_VMA_BIT(UFFD_MINOR, 41),
130
DECLARE_VMA_BIT(SEALED, 42),
131
/* Flags that reuse flags above. */
132
DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
133
DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
134
DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
135
DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
136
DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
137
#if defined(CONFIG_X86_USER_SHADOW_STACK)
138
/*
139
* VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
140
* support core mm.
141
*
142
* These VMAs will get a single end guard page. This helps userspace
143
* protect itself from attacks. A single page is enough for current
144
* shadow stack archs (x86). See the comments near alloc_shstk() in
145
* arch/x86/kernel/shstk.c for more details on the guard size.
146
*/
147
DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
148
#elif defined(CONFIG_ARM64_GCS)
149
/*
150
* arm64's Guarded Control Stack implements similar functionality and
151
* has similar constraints to shadow stacks.
152
*/
153
DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
154
#endif
155
DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
156
DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
157
DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
158
DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
159
DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
160
DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
161
DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
162
DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
163
#ifdef CONFIG_STACK_GROWSUP
164
DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
165
DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
166
#else
167
DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
168
#endif
169
};
170
171
#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
172
#define VM_READ INIT_VM_FLAG(READ)
173
#define VM_WRITE INIT_VM_FLAG(WRITE)
174
#define VM_EXEC INIT_VM_FLAG(EXEC)
175
#define VM_SHARED INIT_VM_FLAG(SHARED)
176
#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
177
#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
178
#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
179
#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
180
#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
181
#ifdef CONFIG_MMU
182
#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
183
#else
184
#define VM_UFFD_MISSING VM_NONE
185
#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
186
#endif
187
#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
188
#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
189
#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
190
#define VM_LOCKED INIT_VM_FLAG(LOCKED)
191
#define VM_IO INIT_VM_FLAG(IO)
192
#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
193
#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
194
#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
195
#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
196
#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
197
#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
198
#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
199
#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
200
#define VM_SYNC INIT_VM_FLAG(SYNC)
201
#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
202
#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
203
#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
204
#ifdef CONFIG_MEM_SOFT_DIRTY
205
#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
206
#else
207
#define VM_SOFTDIRTY VM_NONE
208
#endif
209
#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
210
#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
211
#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
212
#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
213
#define VM_STACK INIT_VM_FLAG(STACK)
214
#ifdef CONFIG_STACK_GROWS_UP
215
#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
216
#else
217
#define VM_STACK_EARLY VM_NONE
218
#endif
219
#ifdef CONFIG_ARCH_HAS_PKEYS
220
#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
221
/* Despite the naming, these are FLAGS not bits. */
222
#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
223
#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
224
#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
225
#if CONFIG_ARCH_PKEY_BITS > 3
226
#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
227
#else
228
#define VM_PKEY_BIT3 VM_NONE
229
#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
230
#if CONFIG_ARCH_PKEY_BITS > 4
231
#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
232
#else
233
#define VM_PKEY_BIT4 VM_NONE
234
#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
235
#endif /* CONFIG_ARCH_HAS_PKEYS */
236
#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
237
#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
238
#else
239
#define VM_SHADOW_STACK VM_NONE
240
#endif
241
#if defined(CONFIG_PPC64)
242
#define VM_SAO INIT_VM_FLAG(SAO)
243
#elif defined(CONFIG_PARISC)
244
#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
245
#elif defined(CONFIG_SPARC64)
246
#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
247
#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
248
#elif defined(CONFIG_ARM64)
249
#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
250
#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
251
#elif !defined(CONFIG_MMU)
252
#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
253
#endif
254
#ifndef VM_GROWSUP
255
#define VM_GROWSUP VM_NONE
256
#endif
257
#ifdef CONFIG_ARM64_MTE
258
#define VM_MTE INIT_VM_FLAG(MTE)
259
#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
260
#else
261
#define VM_MTE VM_NONE
262
#define VM_MTE_ALLOWED VM_NONE
263
#endif
264
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
265
#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
266
#else
267
#define VM_UFFD_MINOR VM_NONE
268
#endif
269
#ifdef CONFIG_64BIT
270
#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
271
#define VM_SEALED INIT_VM_FLAG(SEALED)
272
#else
273
#define VM_ALLOW_ANY_UNCACHED VM_NONE
274
#define VM_SEALED VM_NONE
275
#endif
276
#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
277
#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
278
#else
279
#define VM_DROPPABLE VM_NONE
280
#endif
281
282
/* Bits set in the VMA until the stack is in its final location */
283
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
284
285
#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
286
287
/* Common data flag combinations */
288
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
289
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
290
#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
291
VM_MAYWRITE | VM_MAYEXEC)
292
#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
293
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
294
295
#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
296
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
297
#endif
298
299
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
300
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
301
#endif
302
303
#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
304
305
#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
306
307
/* VMA basic access permission flags */
308
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
309
310
/*
311
* Special vmas that are non-mergable, non-mlock()able.
312
*/
313
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
314
315
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
316
#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
317
#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
318
#define STACK_TOP TASK_SIZE_LOW
319
#define STACK_TOP_MAX TASK_SIZE_MAX
320
321
/* This mask represents all the VMA flag bits used by mlock */
322
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
323
324
#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
325
326
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
327
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
328
329
#define RLIMIT_STACK 3 /* max stack size */
330
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
331
332
#define CAP_IPC_LOCK 14
333
334
/*
335
* Flags which should be 'sticky' on merge - that is, flags which, when one VMA
336
* possesses it but the other does not, the merged VMA should nonetheless have
337
* applied to it:
338
*
339
* VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
340
* references cleared via /proc/$pid/clear_refs, any merged VMA
341
* should be considered soft-dirty also as it operates at a VMA
342
* granularity.
343
*/
344
#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
345
346
/*
347
* VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
348
* of these flags and the other not does not preclude a merge.
349
*
350
* VM_STICKY - When merging VMAs, VMA flags must match, unless they are
351
* 'sticky'. If any sticky flags exist in either VMA, we simply
352
* set all of them on the merged VMA.
353
*/
354
#define VM_IGNORE_MERGE VM_STICKY
355
356
/*
357
* Flags which should result in page tables being copied on fork. These are
358
* flags which indicate that the VMA maps page tables which cannot be
359
* reconsistuted upon page fault, so necessitate page table copying upon
360
*
361
* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
362
* reasonably reconstructed on page fault.
363
*
364
* VM_UFFD_WP - Encodes metadata about an installed uffd
365
* write protect handler, which cannot be
366
* reconstructed on page fault.
367
*
368
* We always copy pgtables when dst_vma has uffd-wp
369
* enabled even if it's file-backed
370
* (e.g. shmem). Because when uffd-wp is enabled,
371
* pgtable contains uffd-wp protection information,
372
* that's something we can't retrieve from page cache,
373
* and skip copying will lose those info.
374
*
375
* VM_MAYBE_GUARD - Could contain page guard region markers which
376
* by design are a property of the page tables
377
* only and thus cannot be reconstructed on page
378
* fault.
379
*/
380
#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
381
382
#define FIRST_USER_ADDRESS 0UL
383
#define USER_PGTABLES_CEILING 0UL
384
385
#define vma_policy(vma) NULL
386
387
#define down_write_nest_lock(sem, nest_lock)
388
389
#define pgprot_val(x) ((x).pgprot)
390
#define __pgprot(x) ((pgprot_t) { (x) } )
391
392
#define for_each_vma(__vmi, __vma) \
393
while (((__vma) = vma_next(&(__vmi))) != NULL)
394
395
/* The MM code likes to work with exclusive end addresses */
396
#define for_each_vma_range(__vmi, __vma, __end) \
397
while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
398
399
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
400
401
#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
402
403
#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
404
#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
405
406
#define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
407
408
#define AS_MM_ALL_LOCKS 2
409
410
/* We hardcode this for now. */
411
#define sysctl_max_map_count 0x1000000UL
412
413
#define pgoff_t unsigned long
414
typedef unsigned long pgprotval_t;
415
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
416
typedef unsigned long vm_flags_t;
417
typedef __bitwise unsigned int vm_fault_t;
418
419
/*
420
* The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
421
* either way :)
422
*/
423
#define pr_warn_once pr_err
424
425
#define data_race(expr) expr
426
427
#define ASSERT_EXCLUSIVE_WRITER(x)
428
429
#define pgtable_supports_soft_dirty() 1
430
431
/**
432
* swap - swap values of @a and @b
433
* @a: first value
434
* @b: second value
435
*/
436
#define swap(a, b) \
437
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
438
439
struct kref {
440
refcount_t refcount;
441
};
442
443
/*
444
* Define the task command name length as enum, then it can be visible to
445
* BPF programs.
446
*/
447
enum {
448
TASK_COMM_LEN = 16,
449
};
450
451
/*
452
* Flags for bug emulation.
453
*
454
* These occupy the top three bytes.
455
*/
456
enum {
457
READ_IMPLIES_EXEC = 0x0400000,
458
};
459
460
struct task_struct {
461
char comm[TASK_COMM_LEN];
462
pid_t pid;
463
struct mm_struct *mm;
464
465
/* Used for emulating ABI behavior of previous Linux versions: */
466
unsigned int personality;
467
};
468
469
struct task_struct *get_current(void);
470
#define current get_current()
471
472
struct anon_vma {
473
struct anon_vma *root;
474
struct rb_root_cached rb_root;
475
476
/* Test fields. */
477
bool was_cloned;
478
bool was_unlinked;
479
};
480
481
struct anon_vma_chain {
482
struct anon_vma *anon_vma;
483
struct list_head same_vma;
484
};
485
486
struct anon_vma_name {
487
struct kref kref;
488
/* The name needs to be at the end because it is dynamically sized. */
489
char name[];
490
};
491
492
struct vma_iterator {
493
struct ma_state mas;
494
};
495
496
#define VMA_ITERATOR(name, __mm, __addr) \
497
struct vma_iterator name = { \
498
.mas = { \
499
.tree = &(__mm)->mm_mt, \
500
.index = __addr, \
501
.node = NULL, \
502
.status = ma_start, \
503
}, \
504
}
505
506
struct address_space {
507
struct rb_root_cached i_mmap;
508
unsigned long flags;
509
atomic_t i_mmap_writable;
510
};
511
512
struct vm_userfaultfd_ctx {};
513
struct mempolicy {};
514
struct mmu_gather {};
515
struct mutex {};
516
#define DEFINE_MUTEX(mutexname) \
517
struct mutex mutexname = {}
518
519
#define DECLARE_BITMAP(name, bits) \
520
unsigned long name[BITS_TO_LONGS(bits)]
521
522
#define NUM_MM_FLAG_BITS (64)
523
typedef struct {
524
__private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
525
} mm_flags_t;
526
527
/*
528
* Opaque type representing current VMA (vm_area_struct) flag state. Must be
529
* accessed via vma_flags_xxx() helper functions.
530
*/
531
#define NUM_VMA_FLAG_BITS BITS_PER_LONG
532
typedef struct {
533
DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
534
} __private vma_flags_t;
535
536
struct mm_struct {
537
struct maple_tree mm_mt;
538
int map_count; /* number of VMAs */
539
unsigned long total_vm; /* Total pages mapped */
540
unsigned long locked_vm; /* Pages that have PG_mlocked set */
541
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
542
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
543
unsigned long stack_vm; /* VM_STACK */
544
545
unsigned long def_flags;
546
547
mm_flags_t flags; /* Must use mm_flags_* helpers to access */
548
};
549
550
struct vm_area_struct;
551
552
553
/* What action should be taken after an .mmap_prepare call is complete? */
554
enum mmap_action_type {
555
MMAP_NOTHING, /* Mapping is complete, no further action. */
556
MMAP_REMAP_PFN, /* Remap PFN range. */
557
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
558
};
559
560
/*
561
* Describes an action an mmap_prepare hook can instruct to be taken to complete
562
* the mapping of a VMA. Specified in vm_area_desc.
563
*/
564
struct mmap_action {
565
union {
566
/* Remap range. */
567
struct {
568
unsigned long start;
569
unsigned long start_pfn;
570
unsigned long size;
571
pgprot_t pgprot;
572
} remap;
573
};
574
enum mmap_action_type type;
575
576
/*
577
* If specified, this hook is invoked after the selected action has been
578
* successfully completed. Note that the VMA write lock still held.
579
*
580
* The absolute minimum ought to be done here.
581
*
582
* Returns 0 on success, or an error code.
583
*/
584
int (*success_hook)(const struct vm_area_struct *vma);
585
586
/*
587
* If specified, this hook is invoked when an error occurred when
588
* attempting the selection action.
589
*
590
* The hook can return an error code in order to filter the error, but
591
* it is not valid to clear the error here.
592
*/
593
int (*error_hook)(int err);
594
595
/*
596
* This should be set in rare instances where the operation required
597
* that the rmap should not be able to access the VMA until
598
* completely set up.
599
*/
600
bool hide_from_rmap_until_complete :1;
601
};
602
603
/*
604
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
605
* manipulate mutable fields which will cause those fields to be updated in the
606
* resultant VMA.
607
*
608
* Helper functions are not required for manipulating any field.
609
*/
610
struct vm_area_desc {
611
/* Immutable state. */
612
const struct mm_struct *const mm;
613
struct file *const file; /* May vary from vm_file in stacked callers. */
614
unsigned long start;
615
unsigned long end;
616
617
/* Mutable fields. Populated with initial state. */
618
pgoff_t pgoff;
619
struct file *vm_file;
620
union {
621
vm_flags_t vm_flags;
622
vma_flags_t vma_flags;
623
};
624
pgprot_t page_prot;
625
626
/* Write-only fields. */
627
const struct vm_operations_struct *vm_ops;
628
void *private_data;
629
630
/* Take further action? */
631
struct mmap_action action;
632
};
633
634
struct file_operations {
635
int (*mmap)(struct file *, struct vm_area_struct *);
636
int (*mmap_prepare)(struct vm_area_desc *);
637
};
638
639
struct file {
640
struct address_space *f_mapping;
641
const struct file_operations *f_op;
642
};
643
644
#define VMA_LOCK_OFFSET 0x40000000
645
646
typedef struct { unsigned long v; } freeptr_t;
647
648
struct vm_area_struct {
649
/* The first cache line has the info for VMA tree walking. */
650
651
union {
652
struct {
653
/* VMA covers [vm_start; vm_end) addresses within mm */
654
unsigned long vm_start;
655
unsigned long vm_end;
656
};
657
freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
658
};
659
660
struct mm_struct *vm_mm; /* The address space we belong to. */
661
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
662
663
/*
664
* Flags, see mm.h.
665
* To modify use vm_flags_{init|reset|set|clear|mod} functions.
666
*/
667
union {
668
const vm_flags_t vm_flags;
669
vma_flags_t flags;
670
};
671
672
#ifdef CONFIG_PER_VMA_LOCK
673
/*
674
* Can only be written (using WRITE_ONCE()) while holding both:
675
* - mmap_lock (in write mode)
676
* - vm_refcnt bit at VMA_LOCK_OFFSET is set
677
* Can be read reliably while holding one of:
678
* - mmap_lock (in read or write mode)
679
* - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
680
* Can be read unreliably (using READ_ONCE()) for pessimistic bailout
681
* while holding nothing (except RCU to keep the VMA struct allocated).
682
*
683
* This sequence counter is explicitly allowed to overflow; sequence
684
* counter reuse can only lead to occasional unnecessary use of the
685
* slowpath.
686
*/
687
unsigned int vm_lock_seq;
688
#endif
689
690
/*
691
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
692
* list, after a COW of one of the file pages. A MAP_SHARED vma
693
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
694
* or brk vma (with NULL file) can only be in an anon_vma list.
695
*/
696
struct list_head anon_vma_chain; /* Serialized by mmap_lock &
697
* page_table_lock */
698
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
699
700
/* Function pointers to deal with this struct. */
701
const struct vm_operations_struct *vm_ops;
702
703
/* Information about our backing store: */
704
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
705
units */
706
struct file * vm_file; /* File we map to (can be NULL). */
707
void * vm_private_data; /* was vm_pte (shared mem) */
708
709
#ifdef CONFIG_SWAP
710
atomic_long_t swap_readahead_info;
711
#endif
712
#ifndef CONFIG_MMU
713
struct vm_region *vm_region; /* NOMMU mapping region */
714
#endif
715
#ifdef CONFIG_NUMA
716
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
717
#endif
718
#ifdef CONFIG_NUMA_BALANCING
719
struct vma_numab_state *numab_state; /* NUMA Balancing state */
720
#endif
721
#ifdef CONFIG_PER_VMA_LOCK
722
/* Unstable RCU readers are allowed to read this. */
723
refcount_t vm_refcnt;
724
#endif
725
/*
726
* For areas with an address space and backing store,
727
* linkage into the address_space->i_mmap interval tree.
728
*
729
*/
730
struct {
731
struct rb_node rb;
732
unsigned long rb_subtree_last;
733
} shared;
734
#ifdef CONFIG_ANON_VMA_NAME
735
/*
736
* For private and shared anonymous mappings, a pointer to a null
737
* terminated string containing the name given to the vma, or NULL if
738
* unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
739
*/
740
struct anon_vma_name *anon_name;
741
#endif
742
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
743
} __randomize_layout;
744
745
struct vm_fault {};
746
747
struct vm_operations_struct {
748
void (*open)(struct vm_area_struct * area);
749
/**
750
* @close: Called when the VMA is being removed from the MM.
751
* Context: User context. May sleep. Caller holds mmap_lock.
752
*/
753
void (*close)(struct vm_area_struct * area);
754
/* Called any time before splitting to check if it's allowed */
755
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
756
int (*mremap)(struct vm_area_struct *area);
757
/*
758
* Called by mprotect() to make driver-specific permission
759
* checks before mprotect() is finalised. The VMA must not
760
* be modified. Returns 0 if mprotect() can proceed.
761
*/
762
int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
763
unsigned long end, unsigned long newflags);
764
vm_fault_t (*fault)(struct vm_fault *vmf);
765
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
766
vm_fault_t (*map_pages)(struct vm_fault *vmf,
767
pgoff_t start_pgoff, pgoff_t end_pgoff);
768
unsigned long (*pagesize)(struct vm_area_struct * area);
769
770
/* notification that a previously read-only page is about to become
771
* writable, if an error is returned it will cause a SIGBUS */
772
vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
773
774
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
775
vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
776
777
/* called by access_process_vm when get_user_pages() fails, typically
778
* for use by special VMAs. See also generic_access_phys() for a generic
779
* implementation useful for any iomem mapping.
780
*/
781
int (*access)(struct vm_area_struct *vma, unsigned long addr,
782
void *buf, int len, int write);
783
784
/* Called by the /proc/PID/maps code to ask the vma whether it
785
* has a special name. Returning non-NULL will also cause this
786
* vma to be dumped unconditionally. */
787
const char *(*name)(struct vm_area_struct *vma);
788
789
#ifdef CONFIG_NUMA
790
/*
791
* set_policy() op must add a reference to any non-NULL @new mempolicy
792
* to hold the policy upon return. Caller should pass NULL @new to
793
* remove a policy and fall back to surrounding context--i.e. do not
794
* install a MPOL_DEFAULT policy, nor the task or system default
795
* mempolicy.
796
*/
797
int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
798
799
/*
800
* get_policy() op must add reference [mpol_get()] to any policy at
801
* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
802
* in mm/mempolicy.c will do this automatically.
803
* get_policy() must NOT add a ref if the policy at (vma,addr) is not
804
* marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
805
* If no [shared/vma] mempolicy exists at the addr, get_policy() op
806
* must return NULL--i.e., do not "fallback" to task or system default
807
* policy.
808
*/
809
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
810
unsigned long addr, pgoff_t *ilx);
811
#endif
812
#ifdef CONFIG_FIND_NORMAL_PAGE
813
/*
814
* Called by vm_normal_page() for special PTEs in @vma at @addr. This
815
* allows for returning a "normal" page from vm_normal_page() even
816
* though the PTE indicates that the "struct page" either does not exist
817
* or should not be touched: "special".
818
*
819
* Do not add new users: this really only works when a "normal" page
820
* was mapped, but then the PTE got changed to something weird (+
821
* marked special) that would not make pte_pfn() identify the originally
822
* inserted page.
823
*/
824
struct page *(*find_normal_page)(struct vm_area_struct *vma,
825
unsigned long addr);
826
#endif /* CONFIG_FIND_NORMAL_PAGE */
827
};
828
829
struct vm_unmapped_area_info {
830
#define VM_UNMAPPED_AREA_TOPDOWN 1
831
unsigned long flags;
832
unsigned long length;
833
unsigned long low_limit;
834
unsigned long high_limit;
835
unsigned long align_mask;
836
unsigned long align_offset;
837
unsigned long start_gap;
838
};
839
840
struct pagetable_move_control {
841
struct vm_area_struct *old; /* Source VMA. */
842
struct vm_area_struct *new; /* Destination VMA. */
843
unsigned long old_addr; /* Address from which the move begins. */
844
unsigned long old_end; /* Exclusive address at which old range ends. */
845
unsigned long new_addr; /* Address to move page tables to. */
846
unsigned long len_in; /* Bytes to remap specified by user. */
847
848
bool need_rmap_locks; /* Do rmap locks need to be taken? */
849
bool for_stack; /* Is this an early temp stack being moved? */
850
};
851
852
#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
853
struct pagetable_move_control name = { \
854
.old = old_, \
855
.new = new_, \
856
.old_addr = old_addr_, \
857
.old_end = (old_addr_) + (len_), \
858
.new_addr = new_addr_, \
859
.len_in = len_, \
860
}
861
862
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
863
{
864
mas_pause(&vmi->mas);
865
}
866
867
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
868
{
869
return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
870
}
871
872
static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
873
{
874
return __pgprot(vm_flags);
875
}
876
877
static inline bool is_shared_maywrite(vm_flags_t vm_flags)
878
{
879
return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
880
(VM_SHARED | VM_MAYWRITE);
881
}
882
883
static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
884
{
885
return is_shared_maywrite(vma->vm_flags);
886
}
887
888
static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
889
{
890
/*
891
* Uses mas_find() to get the first VMA when the iterator starts.
892
* Calling mas_next() could skip the first entry.
893
*/
894
return mas_find(&vmi->mas, ULONG_MAX);
895
}
896
897
/*
898
* WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
899
* assertions should be made either under mmap_write_lock or when the object
900
* has been isolated under mmap_write_lock, ensuring no competing writers.
901
*/
902
static inline void vma_assert_attached(struct vm_area_struct *vma)
903
{
904
WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
905
}
906
907
static inline void vma_assert_detached(struct vm_area_struct *vma)
908
{
909
WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
910
}
911
912
static inline void vma_assert_write_locked(struct vm_area_struct *);
913
static inline void vma_mark_attached(struct vm_area_struct *vma)
914
{
915
vma_assert_write_locked(vma);
916
vma_assert_detached(vma);
917
refcount_set_release(&vma->vm_refcnt, 1);
918
}
919
920
static inline void vma_mark_detached(struct vm_area_struct *vma)
921
{
922
vma_assert_write_locked(vma);
923
vma_assert_attached(vma);
924
/* We are the only writer, so no need to use vma_refcount_put(). */
925
if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
926
/*
927
* Reader must have temporarily raised vm_refcnt but it will
928
* drop it without using the vma since vma is write-locked.
929
*/
930
}
931
}
932
933
extern const struct vm_operations_struct vma_dummy_vm_ops;
934
935
extern unsigned long rlimit(unsigned int limit);
936
937
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
938
{
939
memset(vma, 0, sizeof(*vma));
940
vma->vm_mm = mm;
941
vma->vm_ops = &vma_dummy_vm_ops;
942
INIT_LIST_HEAD(&vma->anon_vma_chain);
943
vma->vm_lock_seq = UINT_MAX;
944
}
945
946
/*
947
* These are defined in vma.h, but sadly vm_stat_account() is referenced by
948
* kernel/fork.c, so we have to these broadly available there, and temporarily
949
* define them here to resolve the dependency cycle.
950
*/
951
952
#define is_exec_mapping(flags) \
953
((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
954
955
#define is_stack_mapping(flags) \
956
(((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
957
958
#define is_data_mapping(flags) \
959
((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
960
961
static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
962
long npages)
963
{
964
WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
965
966
if (is_exec_mapping(flags))
967
mm->exec_vm += npages;
968
else if (is_stack_mapping(flags))
969
mm->stack_vm += npages;
970
else if (is_data_mapping(flags))
971
mm->data_vm += npages;
972
}
973
974
#undef is_exec_mapping
975
#undef is_stack_mapping
976
#undef is_data_mapping
977
978
/* Currently stubbed but we may later wish to un-stub. */
979
static inline void vm_acct_memory(long pages);
980
static inline void vm_unacct_memory(long pages)
981
{
982
vm_acct_memory(-pages);
983
}
984
985
static inline void mapping_allow_writable(struct address_space *mapping)
986
{
987
atomic_inc(&mapping->i_mmap_writable);
988
}
989
990
static inline void vma_set_range(struct vm_area_struct *vma,
991
unsigned long start, unsigned long end,
992
pgoff_t pgoff)
993
{
994
vma->vm_start = start;
995
vma->vm_end = end;
996
vma->vm_pgoff = pgoff;
997
}
998
999
static inline
1000
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
1001
{
1002
return mas_find(&vmi->mas, max - 1);
1003
}
1004
1005
static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
1006
unsigned long start, unsigned long end, gfp_t gfp)
1007
{
1008
__mas_set_range(&vmi->mas, start, end - 1);
1009
mas_store_gfp(&vmi->mas, NULL, gfp);
1010
if (unlikely(mas_is_err(&vmi->mas)))
1011
return -ENOMEM;
1012
1013
return 0;
1014
}
1015
1016
static inline void mmap_assert_locked(struct mm_struct *);
1017
static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
1018
unsigned long start_addr,
1019
unsigned long end_addr)
1020
{
1021
unsigned long index = start_addr;
1022
1023
mmap_assert_locked(mm);
1024
return mt_find(&mm->mm_mt, &index, end_addr - 1);
1025
}
1026
1027
static inline
1028
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
1029
{
1030
return mtree_load(&mm->mm_mt, addr);
1031
}
1032
1033
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
1034
{
1035
return mas_prev(&vmi->mas, 0);
1036
}
1037
1038
static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
1039
{
1040
mas_set(&vmi->mas, addr);
1041
}
1042
1043
static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1044
{
1045
return !vma->vm_ops;
1046
}
1047
1048
/* Defined in vma.h, so temporarily define here to avoid circular dependency. */
1049
#define vma_iter_load(vmi) \
1050
mas_walk(&(vmi)->mas)
1051
1052
static inline struct vm_area_struct *
1053
find_vma_prev(struct mm_struct *mm, unsigned long addr,
1054
struct vm_area_struct **pprev)
1055
{
1056
struct vm_area_struct *vma;
1057
VMA_ITERATOR(vmi, mm, addr);
1058
1059
vma = vma_iter_load(&vmi);
1060
*pprev = vma_prev(&vmi);
1061
if (!vma)
1062
vma = vma_next(&vmi);
1063
return vma;
1064
}
1065
1066
#undef vma_iter_load
1067
1068
static inline void vma_iter_init(struct vma_iterator *vmi,
1069
struct mm_struct *mm, unsigned long addr)
1070
{
1071
mas_init(&vmi->mas, &mm->mm_mt, addr);
1072
}
1073
1074
/* Stubbed functions. */
1075
1076
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
1077
{
1078
return NULL;
1079
}
1080
1081
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
1082
struct vm_userfaultfd_ctx vm_ctx)
1083
{
1084
return true;
1085
}
1086
1087
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
1088
struct anon_vma_name *anon_name2)
1089
{
1090
return true;
1091
}
1092
1093
static inline void might_sleep(void)
1094
{
1095
}
1096
1097
static inline unsigned long vma_pages(struct vm_area_struct *vma)
1098
{
1099
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1100
}
1101
1102
static inline void fput(struct file *file)
1103
{
1104
}
1105
1106
static inline void mpol_put(struct mempolicy *pol)
1107
{
1108
}
1109
1110
static inline void lru_add_drain(void)
1111
{
1112
}
1113
1114
static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
1115
{
1116
}
1117
1118
static inline void update_hiwater_rss(struct mm_struct *mm)
1119
{
1120
}
1121
1122
static inline void update_hiwater_vm(struct mm_struct *mm)
1123
{
1124
}
1125
1126
static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
1127
struct vm_area_struct *vma, unsigned long start_addr,
1128
unsigned long end_addr, unsigned long tree_end)
1129
{
1130
}
1131
1132
static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
1133
struct vm_area_struct *vma, unsigned long floor,
1134
unsigned long ceiling, bool mm_wr_locked)
1135
{
1136
}
1137
1138
static inline void mapping_unmap_writable(struct address_space *mapping)
1139
{
1140
}
1141
1142
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
1143
{
1144
}
1145
1146
static inline void tlb_finish_mmu(struct mmu_gather *tlb)
1147
{
1148
}
1149
1150
static inline struct file *get_file(struct file *f)
1151
{
1152
return f;
1153
}
1154
1155
static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
1156
{
1157
return 0;
1158
}
1159
1160
static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
1161
{
1162
/* For testing purposes. We indicate that an anon_vma has been cloned. */
1163
if (src->anon_vma != NULL) {
1164
dst->anon_vma = src->anon_vma;
1165
dst->anon_vma->was_cloned = true;
1166
}
1167
1168
return 0;
1169
}
1170
1171
static inline void vma_start_write(struct vm_area_struct *vma)
1172
{
1173
/* Used to indicate to tests that a write operation has begun. */
1174
vma->vm_lock_seq++;
1175
}
1176
1177
static inline __must_check
1178
int vma_start_write_killable(struct vm_area_struct *vma)
1179
{
1180
/* Used to indicate to tests that a write operation has begun. */
1181
vma->vm_lock_seq++;
1182
return 0;
1183
}
1184
1185
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
1186
unsigned long start,
1187
unsigned long end,
1188
struct vm_area_struct *next)
1189
{
1190
}
1191
1192
static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
1193
1194
static inline void vma_iter_free(struct vma_iterator *vmi)
1195
{
1196
mas_destroy(&vmi->mas);
1197
}
1198
1199
static inline
1200
struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
1201
{
1202
return mas_next_range(&vmi->mas, ULONG_MAX);
1203
}
1204
1205
static inline void vm_acct_memory(long pages)
1206
{
1207
}
1208
1209
static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
1210
struct rb_root_cached *rb)
1211
{
1212
}
1213
1214
static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
1215
struct rb_root_cached *rb)
1216
{
1217
}
1218
1219
static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
1220
{
1221
}
1222
1223
static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
1224
struct rb_root_cached *rb)
1225
{
1226
}
1227
1228
static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
1229
struct rb_root_cached *rb)
1230
{
1231
}
1232
1233
static inline void uprobe_mmap(struct vm_area_struct *vma)
1234
{
1235
}
1236
1237
static inline void uprobe_munmap(struct vm_area_struct *vma,
1238
unsigned long start, unsigned long end)
1239
{
1240
}
1241
1242
static inline void i_mmap_lock_write(struct address_space *mapping)
1243
{
1244
}
1245
1246
static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
1247
{
1248
}
1249
1250
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
1251
{
1252
}
1253
1254
static inline void unlink_anon_vmas(struct vm_area_struct *vma)
1255
{
1256
/* For testing purposes, indicate that the anon_vma was unlinked. */
1257
vma->anon_vma->was_unlinked = true;
1258
}
1259
1260
static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
1261
{
1262
}
1263
1264
static inline void i_mmap_unlock_write(struct address_space *mapping)
1265
{
1266
}
1267
1268
static inline void anon_vma_merge(struct vm_area_struct *vma,
1269
struct vm_area_struct *next)
1270
{
1271
}
1272
1273
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
1274
unsigned long start,
1275
unsigned long end,
1276
struct list_head *unmaps)
1277
{
1278
return 0;
1279
}
1280
1281
static inline void mmap_write_downgrade(struct mm_struct *mm)
1282
{
1283
}
1284
1285
static inline void mmap_read_unlock(struct mm_struct *mm)
1286
{
1287
}
1288
1289
static inline void mmap_write_unlock(struct mm_struct *mm)
1290
{
1291
}
1292
1293
static inline int mmap_write_lock_killable(struct mm_struct *mm)
1294
{
1295
return 0;
1296
}
1297
1298
static inline bool can_modify_mm(struct mm_struct *mm,
1299
unsigned long start,
1300
unsigned long end)
1301
{
1302
return true;
1303
}
1304
1305
static inline void arch_unmap(struct mm_struct *mm,
1306
unsigned long start,
1307
unsigned long end)
1308
{
1309
}
1310
1311
static inline void mmap_assert_locked(struct mm_struct *mm)
1312
{
1313
}
1314
1315
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
1316
{
1317
return true;
1318
}
1319
1320
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
1321
vm_flags_t vm_flags)
1322
{
1323
}
1324
1325
static inline bool mapping_can_writeback(struct address_space *mapping)
1326
{
1327
return true;
1328
}
1329
1330
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
1331
{
1332
return false;
1333
}
1334
1335
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
1336
{
1337
return false;
1338
}
1339
1340
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
1341
{
1342
return false;
1343
}
1344
1345
static inline void mmap_assert_write_locked(struct mm_struct *mm)
1346
{
1347
}
1348
1349
static inline void mutex_lock(struct mutex *lock)
1350
{
1351
}
1352
1353
static inline void mutex_unlock(struct mutex *lock)
1354
{
1355
}
1356
1357
static inline bool mutex_is_locked(struct mutex *lock)
1358
{
1359
return true;
1360
}
1361
1362
static inline bool signal_pending(void *p)
1363
{
1364
return false;
1365
}
1366
1367
static inline bool is_file_hugepages(struct file *file)
1368
{
1369
return false;
1370
}
1371
1372
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
1373
{
1374
return 0;
1375
}
1376
1377
static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
1378
unsigned long npages)
1379
{
1380
return true;
1381
}
1382
1383
static inline int shmem_zero_setup(struct vm_area_struct *vma)
1384
{
1385
return 0;
1386
}
1387
1388
static inline void vma_set_anonymous(struct vm_area_struct *vma)
1389
{
1390
vma->vm_ops = NULL;
1391
}
1392
1393
static inline void ksm_add_vma(struct vm_area_struct *vma)
1394
{
1395
}
1396
1397
static inline void perf_event_mmap(struct vm_area_struct *vma)
1398
{
1399
}
1400
1401
static inline bool vma_is_dax(struct vm_area_struct *vma)
1402
{
1403
return false;
1404
}
1405
1406
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
1407
{
1408
return NULL;
1409
}
1410
1411
bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
1412
1413
/* Update vma->vm_page_prot to reflect vma->vm_flags. */
1414
static inline void vma_set_page_prot(struct vm_area_struct *vma)
1415
{
1416
vm_flags_t vm_flags = vma->vm_flags;
1417
pgprot_t vm_page_prot;
1418
1419
/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1420
vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
1421
1422
if (vma_wants_writenotify(vma, vm_page_prot)) {
1423
vm_flags &= ~VM_SHARED;
1424
/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1425
vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
1426
}
1427
/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
1428
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
1429
}
1430
1431
static inline bool arch_validate_flags(vm_flags_t flags)
1432
{
1433
return true;
1434
}
1435
1436
static inline void vma_close(struct vm_area_struct *vma)
1437
{
1438
}
1439
1440
static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
1441
{
1442
return 0;
1443
}
1444
1445
static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
1446
{
1447
if (vma->vm_flags & VM_GROWSDOWN)
1448
return stack_guard_gap;
1449
1450
/* See reasoning around the VM_SHADOW_STACK definition */
1451
if (vma->vm_flags & VM_SHADOW_STACK)
1452
return PAGE_SIZE;
1453
1454
return 0;
1455
}
1456
1457
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
1458
{
1459
unsigned long gap = stack_guard_start_gap(vma);
1460
unsigned long vm_start = vma->vm_start;
1461
1462
vm_start -= gap;
1463
if (vm_start > vma->vm_start)
1464
vm_start = 0;
1465
return vm_start;
1466
}
1467
1468
static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
1469
{
1470
unsigned long vm_end = vma->vm_end;
1471
1472
if (vma->vm_flags & VM_GROWSUP) {
1473
vm_end += stack_guard_gap;
1474
if (vm_end < vma->vm_end)
1475
vm_end = -PAGE_SIZE;
1476
}
1477
return vm_end;
1478
}
1479
1480
static inline int is_hugepage_only_range(struct mm_struct *mm,
1481
unsigned long addr, unsigned long len)
1482
{
1483
return 0;
1484
}
1485
1486
static inline bool vma_is_accessible(struct vm_area_struct *vma)
1487
{
1488
return vma->vm_flags & VM_ACCESS_FLAGS;
1489
}
1490
1491
static inline bool capable(int cap)
1492
{
1493
return true;
1494
}
1495
1496
static inline bool mlock_future_ok(const struct mm_struct *mm,
1497
vm_flags_t vm_flags, unsigned long bytes)
1498
{
1499
unsigned long locked_pages, limit_pages;
1500
1501
if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
1502
return true;
1503
1504
locked_pages = bytes >> PAGE_SHIFT;
1505
locked_pages += mm->locked_vm;
1506
1507
limit_pages = rlimit(RLIMIT_MEMLOCK);
1508
limit_pages >>= PAGE_SHIFT;
1509
1510
return locked_pages <= limit_pages;
1511
}
1512
1513
static inline int __anon_vma_prepare(struct vm_area_struct *vma)
1514
{
1515
struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
1516
1517
if (!anon_vma)
1518
return -ENOMEM;
1519
1520
anon_vma->root = anon_vma;
1521
vma->anon_vma = anon_vma;
1522
1523
return 0;
1524
}
1525
1526
static inline int anon_vma_prepare(struct vm_area_struct *vma)
1527
{
1528
if (likely(vma->anon_vma))
1529
return 0;
1530
1531
return __anon_vma_prepare(vma);
1532
}
1533
1534
static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
1535
struct list_head *uf)
1536
{
1537
}
1538
1539
#define ACCESS_PRIVATE(p, member) ((p)->member)
1540
1541
#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
1542
1543
static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
1544
{
1545
unsigned int len = bitmap_size(nbits);
1546
1547
if (small_const_nbits(nbits))
1548
*dst = 0;
1549
else
1550
memset(dst, 0, len);
1551
}
1552
1553
static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
1554
{
1555
return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
1556
}
1557
1558
/* Clears all bits in the VMA flags bitmap, non-atomically. */
1559
static inline void vma_flags_clear_all(vma_flags_t *flags)
1560
{
1561
bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
1562
}
1563
1564
/*
1565
* Copy value to the first system word of VMA flags, non-atomically.
1566
*
1567
* IMPORTANT: This does not overwrite bytes past the first system word. The
1568
* caller must account for this.
1569
*/
1570
static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
1571
{
1572
*ACCESS_PRIVATE(flags, __vma_flags) = value;
1573
}
1574
1575
/*
1576
* Copy value to the first system word of VMA flags ONCE, non-atomically.
1577
*
1578
* IMPORTANT: This does not overwrite bytes past the first system word. The
1579
* caller must account for this.
1580
*/
1581
static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
1582
{
1583
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
1584
1585
WRITE_ONCE(*bitmap, value);
1586
}
1587
1588
/* Update the first system word of VMA flags setting bits, non-atomically. */
1589
static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
1590
{
1591
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
1592
1593
*bitmap |= value;
1594
}
1595
1596
/* Update the first system word of VMA flags clearing bits, non-atomically. */
1597
static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
1598
{
1599
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
1600
1601
*bitmap &= ~value;
1602
}
1603
1604
1605
/* Use when VMA is not part of the VMA tree and needs no locking */
1606
static inline void vm_flags_init(struct vm_area_struct *vma,
1607
vm_flags_t flags)
1608
{
1609
vma_flags_clear_all(&vma->flags);
1610
vma_flags_overwrite_word(&vma->flags, flags);
1611
}
1612
1613
/*
1614
* Use when VMA is part of the VMA tree and modifications need coordination
1615
* Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
1616
* it should be locked explicitly beforehand.
1617
*/
1618
static inline void vm_flags_reset(struct vm_area_struct *vma,
1619
vm_flags_t flags)
1620
{
1621
vma_assert_write_locked(vma);
1622
vm_flags_init(vma, flags);
1623
}
1624
1625
static inline void vm_flags_reset_once(struct vm_area_struct *vma,
1626
vm_flags_t flags)
1627
{
1628
vma_assert_write_locked(vma);
1629
/*
1630
* The user should only be interested in avoiding reordering of
1631
* assignment to the first word.
1632
*/
1633
vma_flags_clear_all(&vma->flags);
1634
vma_flags_overwrite_word_once(&vma->flags, flags);
1635
}
1636
1637
static inline void vm_flags_set(struct vm_area_struct *vma,
1638
vm_flags_t flags)
1639
{
1640
vma_start_write(vma);
1641
vma_flags_set_word(&vma->flags, flags);
1642
}
1643
1644
static inline void vm_flags_clear(struct vm_area_struct *vma,
1645
vm_flags_t flags)
1646
{
1647
vma_start_write(vma);
1648
vma_flags_clear_word(&vma->flags, flags);
1649
}
1650
1651
/*
1652
* Denies creating a writable executable mapping or gaining executable permissions.
1653
*
1654
* This denies the following:
1655
*
1656
* a) mmap(PROT_WRITE | PROT_EXEC)
1657
*
1658
* b) mmap(PROT_WRITE)
1659
* mprotect(PROT_EXEC)
1660
*
1661
* c) mmap(PROT_WRITE)
1662
* mprotect(PROT_READ)
1663
* mprotect(PROT_EXEC)
1664
*
1665
* But allows the following:
1666
*
1667
* d) mmap(PROT_READ | PROT_EXEC)
1668
* mmap(PROT_READ | PROT_EXEC | PROT_BTI)
1669
*
1670
* This is only applicable if the user has set the Memory-Deny-Write-Execute
1671
* (MDWE) protection mask for the current process.
1672
*
1673
* @old specifies the VMA flags the VMA originally possessed, and @new the ones
1674
* we propose to set.
1675
*
1676
* Return: false if proposed change is OK, true if not ok and should be denied.
1677
*/
1678
static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
1679
{
1680
/* If MDWE is disabled, we have nothing to deny. */
1681
if (mm_flags_test(MMF_HAS_MDWE, current->mm))
1682
return false;
1683
1684
/* If the new VMA is not executable, we have nothing to deny. */
1685
if (!(new & VM_EXEC))
1686
return false;
1687
1688
/* Under MDWE we do not accept newly writably executable VMAs... */
1689
if (new & VM_WRITE)
1690
return true;
1691
1692
/* ...nor previously non-executable VMAs becoming executable. */
1693
if (!(old & VM_EXEC))
1694
return true;
1695
1696
return false;
1697
}
1698
1699
static inline int mapping_map_writable(struct address_space *mapping)
1700
{
1701
return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
1702
0 : -EPERM;
1703
}
1704
1705
static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
1706
{
1707
return 0;
1708
}
1709
1710
static inline void free_pgd_range(struct mmu_gather *tlb,
1711
unsigned long addr, unsigned long end,
1712
unsigned long floor, unsigned long ceiling)
1713
{
1714
}
1715
1716
static inline int ksm_execve(struct mm_struct *mm)
1717
{
1718
return 0;
1719
}
1720
1721
static inline void ksm_exit(struct mm_struct *mm)
1722
{
1723
}
1724
1725
static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
1726
{
1727
if (reset_refcnt)
1728
refcount_set(&vma->vm_refcnt, 0);
1729
}
1730
1731
static inline void vma_numab_state_init(struct vm_area_struct *vma)
1732
{
1733
}
1734
1735
static inline void vma_numab_state_free(struct vm_area_struct *vma)
1736
{
1737
}
1738
1739
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
1740
struct vm_area_struct *new_vma)
1741
{
1742
}
1743
1744
static inline void free_anon_vma_name(struct vm_area_struct *vma)
1745
{
1746
}
1747
1748
/* Declared in vma.h. */
1749
static inline void set_vma_from_desc(struct vm_area_struct *vma,
1750
struct vm_area_desc *desc);
1751
1752
static inline void mmap_action_prepare(struct mmap_action *action,
1753
struct vm_area_desc *desc)
1754
{
1755
}
1756
1757
static inline int mmap_action_complete(struct mmap_action *action,
1758
struct vm_area_struct *vma)
1759
{
1760
return 0;
1761
}
1762
1763
static inline int __compat_vma_mmap(const struct file_operations *f_op,
1764
struct file *file, struct vm_area_struct *vma)
1765
{
1766
struct vm_area_desc desc = {
1767
.mm = vma->vm_mm,
1768
.file = file,
1769
.start = vma->vm_start,
1770
.end = vma->vm_end,
1771
1772
.pgoff = vma->vm_pgoff,
1773
.vm_file = vma->vm_file,
1774
.vm_flags = vma->vm_flags,
1775
.page_prot = vma->vm_page_prot,
1776
1777
.action.type = MMAP_NOTHING, /* Default */
1778
};
1779
int err;
1780
1781
err = f_op->mmap_prepare(&desc);
1782
if (err)
1783
return err;
1784
1785
mmap_action_prepare(&desc.action, &desc);
1786
set_vma_from_desc(vma, &desc);
1787
return mmap_action_complete(&desc.action, vma);
1788
}
1789
1790
static inline int compat_vma_mmap(struct file *file,
1791
struct vm_area_struct *vma)
1792
{
1793
return __compat_vma_mmap(file->f_op, file, vma);
1794
}
1795
1796
/* Did the driver provide valid mmap hook configuration? */
1797
static inline bool can_mmap_file(struct file *file)
1798
{
1799
bool has_mmap = file->f_op->mmap;
1800
bool has_mmap_prepare = file->f_op->mmap_prepare;
1801
1802
/* Hooks are mutually exclusive. */
1803
if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
1804
return false;
1805
if (!has_mmap && !has_mmap_prepare)
1806
return false;
1807
1808
return true;
1809
}
1810
1811
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
1812
{
1813
if (file->f_op->mmap_prepare)
1814
return compat_vma_mmap(file, vma);
1815
1816
return file->f_op->mmap(file, vma);
1817
}
1818
1819
static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
1820
{
1821
return file->f_op->mmap_prepare(desc);
1822
}
1823
1824
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
1825
{
1826
}
1827
1828
static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
1829
{
1830
/* Changing an anonymous vma with this is illegal */
1831
get_file(file);
1832
swap(vma->vm_file, file);
1833
fput(file);
1834
}
1835
1836
static inline bool shmem_file(struct file *file)
1837
{
1838
return false;
1839
}
1840
1841
static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
1842
const struct file *file, vm_flags_t vm_flags)
1843
{
1844
return vm_flags;
1845
}
1846
1847
static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
1848
{
1849
}
1850
1851
static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
1852
unsigned long pfn, unsigned long size, pgprot_t pgprot)
1853
{
1854
return 0;
1855
}
1856
1857
static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
1858
struct list_head *uf)
1859
{
1860
return 0;
1861
}
1862
1863
#endif /* __MM_VMA_INTERNAL_H */
1864
1865