Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/vm/vm_mmap.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1988 University of Utah.
5
* Copyright (c) 1991, 1993
6
* The Regents of the University of California. All rights reserved.
7
*
8
* This code is derived from software contributed to Berkeley by
9
* the Systems Programming Group of the University of Utah Computer
10
* Science Department.
11
*
12
* Redistribution and use in source and binary forms, with or without
13
* modification, are permitted provided that the following conditions
14
* are met:
15
* 1. Redistributions of source code must retain the above copyright
16
* notice, this list of conditions and the following disclaimer.
17
* 2. Redistributions in binary form must reproduce the above copyright
18
* notice, this list of conditions and the following disclaimer in the
19
* documentation and/or other materials provided with the distribution.
20
* 3. Neither the name of the University nor the names of its contributors
21
* may be used to endorse or promote products derived from this software
22
* without specific prior written permission.
23
*
24
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
* SUCH DAMAGE.
35
*
36
* from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
37
*/
38
39
/*
40
* Mapped file (mmap) interface to VM
41
*/
42
43
#include "opt_hwpmc_hooks.h"
44
#include "opt_hwt_hooks.h"
45
#include "opt_vm.h"
46
47
#define EXTERR_CATEGORY EXTERR_CAT_MMAP
48
#include <sys/param.h>
49
#include <sys/systm.h>
50
#include <sys/capsicum.h>
51
#include <sys/exterrvar.h>
52
#include <sys/kernel.h>
53
#include <sys/lock.h>
54
#include <sys/mutex.h>
55
#include <sys/sysproto.h>
56
#include <sys/elf.h>
57
#include <sys/filedesc.h>
58
#include <sys/priv.h>
59
#include <sys/proc.h>
60
#include <sys/procctl.h>
61
#include <sys/racct.h>
62
#include <sys/resource.h>
63
#include <sys/resourcevar.h>
64
#include <sys/rwlock.h>
65
#include <sys/sysctl.h>
66
#include <sys/vnode.h>
67
#include <sys/fcntl.h>
68
#include <sys/file.h>
69
#include <sys/mman.h>
70
#include <sys/mount.h>
71
#include <sys/conf.h>
72
#include <sys/stat.h>
73
#include <sys/syscallsubr.h>
74
#include <sys/sysent.h>
75
#include <sys/vmmeter.h>
76
#if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */
77
#include <machine/md_var.h>
78
#endif
79
80
#include <security/audit/audit.h>
81
#include <security/mac/mac_framework.h>
82
83
#include <vm/vm.h>
84
#include <vm/vm_param.h>
85
#include <vm/pmap.h>
86
#include <vm/vm_map.h>
87
#include <vm/vm_object.h>
88
#include <vm/vm_page.h>
89
#include <vm/vm_pager.h>
90
#include <vm/vm_pageout.h>
91
#include <vm/vm_extern.h>
92
#include <vm/vm_page.h>
93
#include <vm/vnode_pager.h>
94
95
#ifdef HWPMC_HOOKS
96
#include <sys/pmckern.h>
97
#endif
98
99
#ifdef HWT_HOOKS
100
#include <dev/hwt/hwt_hook.h>
101
#endif
102
103
int old_mlock = 0;
104
SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
105
"Do not apply RLIMIT_MEMLOCK on mlockall");
106
static int mincore_mapped = 1;
107
SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0,
108
"mincore reports mappings, not residency");
109
static int imply_prot_max = 0;
110
SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0,
111
"Imply maximum page protections in mmap() when none are specified");
112
113
_Static_assert(MAXPAGESIZES <= 4, "MINCORE_SUPER too narrow");
114
115
#if defined(COMPAT_43)
116
int
117
ogetpagesize(struct thread *td, struct ogetpagesize_args *uap)
118
{
119
120
td->td_retval[0] = PAGE_SIZE;
121
return (0);
122
}
123
#endif /* COMPAT_43 */
124
125
/*
126
* Memory Map (mmap) system call. Note that the file offset
127
* and address are allowed to be NOT page aligned, though if
128
* the MAP_FIXED flag it set, both must have the same remainder
129
* modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not
130
* page-aligned, the actual mapping starts at trunc_page(addr)
131
* and the return value is adjusted up by the page offset.
132
*
133
* Generally speaking, only character devices which are themselves
134
* memory-based, such as a video framebuffer, can be mmap'd. Otherwise
135
* there would be no cache coherency between a descriptor and a VM mapping
136
* both to the same character device.
137
*/
138
#ifndef _SYS_SYSPROTO_H_
139
struct mmap_args {
140
void *addr;
141
size_t len;
142
int prot;
143
int flags;
144
int fd;
145
long pad;
146
off_t pos;
147
};
148
#endif
149
150
int
151
sys_mmap(struct thread *td, struct mmap_args *uap)
152
{
153
154
return (kern_mmap(td, &(struct mmap_req){
155
.mr_hint = (uintptr_t)uap->addr,
156
.mr_len = uap->len,
157
.mr_prot = uap->prot,
158
.mr_flags = uap->flags,
159
.mr_fd = uap->fd,
160
.mr_pos = uap->pos,
161
}));
162
}
163
164
int
165
kern_mmap_maxprot(struct proc *p, int prot)
166
{
167
168
if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 ||
169
(p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0)
170
return (_PROT_ALL);
171
if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) &&
172
prot != PROT_NONE)
173
return (prot);
174
return (_PROT_ALL);
175
}
176
177
int
178
kern_mmap(struct thread *td, const struct mmap_req *mrp)
179
{
180
struct vmspace *vms;
181
struct file *fp;
182
struct proc *p;
183
off_t pos;
184
vm_offset_t addr, orig_addr;
185
vm_size_t len, pageoff, size;
186
vm_prot_t cap_maxprot;
187
int align, error, fd, flags, max_prot, prot;
188
cap_rights_t rights;
189
mmap_check_fp_fn check_fp_fn;
190
191
orig_addr = addr = mrp->mr_hint;
192
len = mrp->mr_len;
193
prot = mrp->mr_prot;
194
flags = mrp->mr_flags;
195
fd = mrp->mr_fd;
196
pos = mrp->mr_pos;
197
check_fp_fn = mrp->mr_check_fp_fn;
198
199
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) {
200
return (EXTERROR(EINVAL, "unknown PROT bits"));
201
}
202
max_prot = PROT_MAX_EXTRACT(prot);
203
prot = PROT_EXTRACT(prot);
204
if (max_prot != 0 && (max_prot & prot) != prot) {
205
return (EXTERROR(ENOTSUP, "prot is not subset of max_prot"));
206
}
207
208
p = td->td_proc;
209
210
/*
211
* Always honor PROT_MAX if set. If not, default to all
212
* permissions unless we're implying maximum permissions.
213
*/
214
if (max_prot == 0)
215
max_prot = kern_mmap_maxprot(p, prot);
216
217
vms = p->p_vmspace;
218
fp = NULL;
219
AUDIT_ARG_FD(fd);
220
221
/*
222
* Ignore old flags that used to be defined but did not do anything.
223
*/
224
flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040);
225
226
/*
227
* Enforce the constraints.
228
* Mapping of length 0 is only allowed for old binaries.
229
* Anonymous mapping shall specify -1 as filedescriptor and
230
* zero position for new code. Be nice to ancient a.out
231
* binaries and correct pos for anonymous mapping, since old
232
* ld.so sometimes issues anonymous map requests with non-zero
233
* pos.
234
*/
235
if (!SV_CURPROC_FLAG(SV_AOUT)) {
236
if ((len == 0 && p->p_osrel >= P_OSREL_MAP_ANON) ||
237
((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) {
238
return (EXTERROR(EINVAL,
239
"offset not zero/fd not -1 for MAP_ANON",
240
fd, pos));
241
}
242
} else {
243
if ((flags & MAP_ANON) != 0)
244
pos = 0;
245
}
246
247
if (flags & MAP_STACK) {
248
if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) !=
249
(PROT_READ | PROT_WRITE))) {
250
return (EXTERROR(EINVAL, "MAP_STACK with prot < rw",
251
prot));
252
}
253
flags |= MAP_ANON;
254
pos = 0;
255
}
256
if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
257
MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
258
MAP_PREFAULT_READ | MAP_GUARD | MAP_32BIT |
259
MAP_ALIGNMENT_MASK)) != 0) {
260
return (EXTERROR(EINVAL, "reserved flag set"));
261
}
262
if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) {
263
return (EXTERROR(EINVAL, "EXCL without FIXED"));
264
}
265
if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED |
266
MAP_PRIVATE)) {
267
return (EXTERROR(EINVAL, "both SHARED and PRIVATE set"));
268
}
269
if (prot != PROT_NONE &&
270
(prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) {
271
return (EXTERROR(EINVAL, "invalid prot", prot));
272
}
273
if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
274
pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL |
275
MAP_32BIT | MAP_ALIGNMENT_MASK)) != 0)) {
276
return (EXTERROR(EINVAL, "GUARD with wrong parameters"));
277
}
278
279
/*
280
* Align the file position to a page boundary,
281
* and save its page offset component.
282
*/
283
pageoff = (pos & PAGE_MASK);
284
pos -= pageoff;
285
286
/* Compute size from len by rounding (on both ends). */
287
size = len + pageoff; /* low end... */
288
size = round_page(size); /* hi end */
289
/* Check for rounding up to zero. */
290
if (len > size)
291
return (ENOMEM);
292
293
/* Ensure alignment is at least a page and fits in a pointer. */
294
align = flags & MAP_ALIGNMENT_MASK;
295
if (align != 0 && align != MAP_ALIGNED_SUPER &&
296
(align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
297
align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) {
298
return (EXTERROR(EINVAL, "bad alignment", align));
299
}
300
301
/*
302
* Check for illegal addresses. Watch out for address wrap... Note
303
* that VM_*_ADDRESS are not constants due to casts (argh).
304
*/
305
if (flags & MAP_FIXED) {
306
/*
307
* The specified address must have the same remainder
308
* as the file offset taken modulo PAGE_SIZE, so it
309
* should be aligned after adjustment by pageoff.
310
*/
311
addr -= pageoff;
312
if ((addr & PAGE_MASK) != 0) {
313
return (EXTERROR(EINVAL, "fixed mapping not aligned",
314
addr));
315
}
316
317
/* Address range must be all in user VM space. */
318
if (!vm_map_range_valid(&vms->vm_map, addr, addr + size)) {
319
EXTERROR(EINVAL, "mapping outside vm_map");
320
return (EINVAL);
321
}
322
if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) {
323
return (EXTERROR(EINVAL,
324
"fixed 32bit mapping does not fit into 4G"));
325
}
326
} else if (flags & MAP_32BIT) {
327
/*
328
* For MAP_32BIT, override the hint if it is too high and
329
* do not bother moving the mapping past the heap (since
330
* the heap is usually above 2GB).
331
*/
332
if (addr + size > MAP_32BIT_MAX_ADDR)
333
addr = 0;
334
} else {
335
/*
336
* XXX for non-fixed mappings where no hint is provided or
337
* the hint would fall in the potential heap space,
338
* place it after the end of the largest possible heap.
339
*
340
* For anonymous mappings within the address space of the
341
* calling process, the absence of a hint is handled at a
342
* lower level in order to implement different clustering
343
* strategies for ASLR.
344
*/
345
if (((flags & MAP_ANON) == 0 && addr == 0) ||
346
(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
347
addr < round_page((vm_offset_t)vms->vm_daddr +
348
lim_max(td, RLIMIT_DATA))))
349
addr = round_page((vm_offset_t)vms->vm_daddr +
350
lim_max(td, RLIMIT_DATA));
351
}
352
if (len == 0) {
353
/*
354
* Return success without mapping anything for old
355
* binaries that request a page-aligned mapping of
356
* length 0. For modern binaries, this function
357
* returns an error earlier.
358
*/
359
error = 0;
360
} else if ((flags & MAP_GUARD) != 0) {
361
error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
362
VM_PROT_NONE, flags, NULL, pos, FALSE, td);
363
} else if ((flags & MAP_ANON) != 0) {
364
/*
365
* Mapping blank space is trivial.
366
*
367
* This relies on VM_PROT_* matching PROT_*.
368
*/
369
error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
370
max_prot, flags, NULL, pos, FALSE, td);
371
} else {
372
/*
373
* Mapping file, get fp for validation and don't let the
374
* descriptor disappear on us if we block. Check capability
375
* rights, but also return the maximum rights to be combined
376
* with maxprot later.
377
*/
378
cap_rights_init_one(&rights, CAP_MMAP);
379
if (prot & PROT_READ)
380
cap_rights_set_one(&rights, CAP_MMAP_R);
381
if ((flags & MAP_SHARED) != 0) {
382
if (prot & PROT_WRITE)
383
cap_rights_set_one(&rights, CAP_MMAP_W);
384
}
385
if (prot & PROT_EXEC)
386
cap_rights_set_one(&rights, CAP_MMAP_X);
387
error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp);
388
if (error != 0)
389
goto done;
390
if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 &&
391
p->p_osrel >= P_OSREL_MAP_FSTRICT) {
392
EXTERROR(EINVAL, "neither SHARED nor PRIVATE req");
393
error = EINVAL;
394
goto done;
395
}
396
if (check_fp_fn != NULL) {
397
error = check_fp_fn(fp, prot, max_prot & cap_maxprot,
398
flags);
399
if (error != 0)
400
goto done;
401
}
402
if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data))
403
addr = orig_addr;
404
/* This relies on VM_PROT_* matching PROT_*. */
405
error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
406
max_prot & cap_maxprot, flags, pos, td);
407
}
408
409
if (error == 0)
410
td->td_retval[0] = addr + pageoff;
411
done:
412
if (fp)
413
fdrop(fp, td);
414
415
return (error);
416
}
417
418
#if defined(COMPAT_FREEBSD6)
419
int
420
freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap)
421
{
422
return (kern_mmap(td, &(struct mmap_req){
423
.mr_hint = (uintptr_t)uap->addr,
424
.mr_len = uap->len,
425
.mr_prot = uap->prot,
426
.mr_flags = uap->flags,
427
.mr_fd = uap->fd,
428
.mr_pos = uap->pos,
429
}));
430
}
431
#endif
432
433
#ifdef COMPAT_43
434
#ifndef _SYS_SYSPROTO_H_
435
struct ommap_args {
436
caddr_t addr;
437
int len;
438
int prot;
439
int flags;
440
int fd;
441
long pos;
442
};
443
#endif
444
int
445
ommap(struct thread *td, struct ommap_args *uap)
446
{
447
return (kern_ommap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
448
uap->flags, uap->fd, uap->pos));
449
}
450
451
int
452
kern_ommap(struct thread *td, uintptr_t hint, int len, int oprot,
453
int oflags, int fd, long pos)
454
{
455
static const char cvtbsdprot[8] = {
456
0,
457
PROT_EXEC,
458
PROT_WRITE,
459
PROT_EXEC | PROT_WRITE,
460
PROT_READ,
461
PROT_EXEC | PROT_READ,
462
PROT_WRITE | PROT_READ,
463
PROT_EXEC | PROT_WRITE | PROT_READ,
464
};
465
int flags, prot;
466
467
if (len < 0)
468
return (EINVAL);
469
470
#define OMAP_ANON 0x0002
471
#define OMAP_COPY 0x0020
472
#define OMAP_SHARED 0x0010
473
#define OMAP_FIXED 0x0100
474
475
prot = cvtbsdprot[oprot & 0x7];
476
#if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__)
477
if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
478
prot != 0)
479
prot |= PROT_EXEC;
480
#endif
481
flags = 0;
482
if (oflags & OMAP_ANON)
483
flags |= MAP_ANON;
484
if (oflags & OMAP_COPY)
485
flags |= MAP_COPY;
486
if (oflags & OMAP_SHARED)
487
flags |= MAP_SHARED;
488
else
489
flags |= MAP_PRIVATE;
490
if (oflags & OMAP_FIXED)
491
flags |= MAP_FIXED;
492
return (kern_mmap(td, &(struct mmap_req){
493
.mr_hint = hint,
494
.mr_len = len,
495
.mr_prot = prot,
496
.mr_flags = flags,
497
.mr_fd = fd,
498
.mr_pos = pos,
499
}));
500
}
501
#endif /* COMPAT_43 */
502
503
#ifndef _SYS_SYSPROTO_H_
504
struct msync_args {
505
void *addr;
506
size_t len;
507
int flags;
508
};
509
#endif
510
int
511
sys_msync(struct thread *td, struct msync_args *uap)
512
{
513
514
return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags));
515
}
516
517
int
518
kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags)
519
{
520
vm_offset_t addr;
521
vm_size_t pageoff;
522
vm_map_t map;
523
int rv;
524
525
addr = addr0;
526
pageoff = (addr & PAGE_MASK);
527
addr -= pageoff;
528
size += pageoff;
529
size = (vm_size_t) round_page(size);
530
if (addr + size < addr)
531
return (EINVAL);
532
533
if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
534
return (EINVAL);
535
536
map = &td->td_proc->p_vmspace->vm_map;
537
538
/*
539
* Clean the pages and interpret the return value.
540
*/
541
rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0,
542
(flags & MS_INVALIDATE) != 0);
543
switch (rv) {
544
case KERN_SUCCESS:
545
return (0);
546
case KERN_INVALID_ADDRESS:
547
return (ENOMEM);
548
case KERN_INVALID_ARGUMENT:
549
return (EBUSY);
550
case KERN_FAILURE:
551
return (EIO);
552
default:
553
return (EINVAL);
554
}
555
}
556
557
#ifndef _SYS_SYSPROTO_H_
558
struct munmap_args {
559
void *addr;
560
size_t len;
561
};
562
#endif
563
int
564
sys_munmap(struct thread *td, struct munmap_args *uap)
565
{
566
567
return (kern_munmap(td, (uintptr_t)uap->addr, uap->len));
568
}
569
570
int
571
kern_munmap(struct thread *td, uintptr_t addr0, size_t size)
572
{
573
#ifdef HWPMC_HOOKS
574
struct pmckern_map_out pkm;
575
vm_map_entry_t entry;
576
bool pmc_handled;
577
#endif
578
vm_offset_t addr, end;
579
vm_size_t pageoff;
580
vm_map_t map;
581
int rv;
582
583
if (size == 0)
584
return (EINVAL);
585
586
addr = addr0;
587
pageoff = (addr & PAGE_MASK);
588
addr -= pageoff;
589
size += pageoff;
590
size = (vm_size_t) round_page(size);
591
end = addr + size;
592
map = &td->td_proc->p_vmspace->vm_map;
593
if (!vm_map_range_valid(map, addr, end))
594
return (EINVAL);
595
596
vm_map_lock(map);
597
#ifdef HWPMC_HOOKS
598
pmc_handled = false;
599
if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) {
600
pmc_handled = true;
601
/*
602
* Inform hwpmc if the address range being unmapped contains
603
* an executable region.
604
*/
605
pkm.pm_address = (uintptr_t) NULL;
606
if (vm_map_lookup_entry(map, addr, &entry)) {
607
for (; entry->start < end;
608
entry = vm_map_entry_succ(entry)) {
609
if (vm_map_check_protection(map, entry->start,
610
entry->end, VM_PROT_EXECUTE) == TRUE) {
611
pkm.pm_address = (uintptr_t) addr;
612
pkm.pm_size = (size_t) size;
613
break;
614
}
615
}
616
}
617
}
618
#endif
619
rv = vm_map_delete(map, addr, end);
620
621
#ifdef HWT_HOOKS
622
if (HWT_HOOK_INSTALLED && rv == KERN_SUCCESS) {
623
struct hwt_record_entry ent;
624
625
ent.addr = (uintptr_t) addr;
626
ent.fullpath = NULL;
627
ent.record_type = HWT_RECORD_MUNMAP;
628
HWT_CALL_HOOK(td, HWT_RECORD, &ent);
629
}
630
#endif
631
632
#ifdef HWPMC_HOOKS
633
if (rv == KERN_SUCCESS && __predict_false(pmc_handled)) {
634
/* downgrade the lock to prevent a LOR with the pmc-sx lock */
635
vm_map_lock_downgrade(map);
636
if (pkm.pm_address != (uintptr_t) NULL)
637
PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm);
638
vm_map_unlock_read(map);
639
} else
640
#endif
641
vm_map_unlock(map);
642
643
return (vm_mmap_to_errno(rv));
644
}
645
646
#ifndef _SYS_SYSPROTO_H_
647
struct mprotect_args {
648
const void *addr;
649
size_t len;
650
int prot;
651
};
652
#endif
653
int
654
sys_mprotect(struct thread *td, struct mprotect_args *uap)
655
{
656
657
return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len,
658
uap->prot, 0));
659
}
660
661
int
662
kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot,
663
int flags)
664
{
665
vm_offset_t addr;
666
vm_size_t pageoff;
667
int vm_error, max_prot;
668
669
addr = addr0;
670
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
671
return (EINVAL);
672
max_prot = PROT_MAX_EXTRACT(prot);
673
prot = PROT_EXTRACT(prot);
674
pageoff = (addr & PAGE_MASK);
675
addr -= pageoff;
676
size += pageoff;
677
size = (vm_size_t) round_page(size);
678
#ifdef COMPAT_FREEBSD32
679
if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
680
if (((addr + size) & 0xffffffff) < addr)
681
return (EINVAL);
682
} else
683
#endif
684
if (addr + size < addr)
685
return (EINVAL);
686
687
flags |= VM_MAP_PROTECT_SET_PROT;
688
if (max_prot != 0)
689
flags |= VM_MAP_PROTECT_SET_MAXPROT;
690
vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
691
addr, addr + size, prot, max_prot, flags);
692
693
switch (vm_error) {
694
case KERN_SUCCESS:
695
return (0);
696
case KERN_PROTECTION_FAILURE:
697
return (EACCES);
698
case KERN_RESOURCE_SHORTAGE:
699
return (ENOMEM);
700
case KERN_OUT_OF_BOUNDS:
701
return (ENOTSUP);
702
}
703
return (EINVAL);
704
}
705
706
#ifndef _SYS_SYSPROTO_H_
707
struct minherit_args {
708
void *addr;
709
size_t len;
710
int inherit;
711
};
712
#endif
713
int
714
sys_minherit(struct thread *td, struct minherit_args *uap)
715
{
716
717
return (kern_minherit(td, (uintptr_t)uap->addr, uap->len,
718
uap->inherit));
719
}
720
721
int
722
kern_minherit(struct thread *td, uintptr_t addr0, size_t len, int inherit0)
723
{
724
vm_offset_t addr;
725
vm_size_t size, pageoff;
726
vm_inherit_t inherit;
727
728
addr = (vm_offset_t)addr0;
729
size = len;
730
inherit = inherit0;
731
732
pageoff = (addr & PAGE_MASK);
733
addr -= pageoff;
734
size += pageoff;
735
size = (vm_size_t) round_page(size);
736
if (addr + size < addr)
737
return (EINVAL);
738
739
switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr,
740
addr + size, inherit)) {
741
case KERN_SUCCESS:
742
return (0);
743
case KERN_PROTECTION_FAILURE:
744
return (EACCES);
745
}
746
return (EINVAL);
747
}
748
749
#ifndef _SYS_SYSPROTO_H_
750
struct madvise_args {
751
void *addr;
752
size_t len;
753
int behav;
754
};
755
#endif
756
757
int
758
sys_madvise(struct thread *td, struct madvise_args *uap)
759
{
760
761
return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav));
762
}
763
764
int
765
kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav)
766
{
767
vm_map_t map;
768
vm_offset_t addr, end, start;
769
int flags;
770
771
/*
772
* Check for our special case, advising the swap pager we are
773
* "immortal."
774
*/
775
if (behav == MADV_PROTECT) {
776
flags = PPROT_SET;
777
return (kern_procctl(td, P_PID, td->td_proc->p_pid,
778
PROC_SPROTECT, &flags));
779
}
780
781
/*
782
* Check for illegal addresses. Watch out for address wrap... Note
783
* that VM_*_ADDRESS are not constants due to casts (argh).
784
*/
785
map = &td->td_proc->p_vmspace->vm_map;
786
addr = addr0;
787
if (!vm_map_range_valid(map, addr, addr + len))
788
return (EINVAL);
789
790
/*
791
* Since this routine is only advisory, we default to conservative
792
* behavior.
793
*/
794
start = trunc_page(addr);
795
end = round_page(addr + len);
796
797
/*
798
* vm_map_madvise() checks for illegal values of behav.
799
*/
800
return (vm_map_madvise(map, start, end, behav));
801
}
802
803
#ifndef _SYS_SYSPROTO_H_
804
struct mincore_args {
805
const void *addr;
806
size_t len;
807
char *vec;
808
};
809
#endif
810
811
int
812
sys_mincore(struct thread *td, struct mincore_args *uap)
813
{
814
815
return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec));
816
}
817
818
int
819
kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec)
820
{
821
pmap_t pmap;
822
vm_map_t map;
823
vm_map_entry_t current, entry;
824
vm_object_t object;
825
vm_offset_t addr, cend, end, first_addr;
826
vm_paddr_t pa;
827
vm_page_t m;
828
vm_pindex_t pindex;
829
int error, lastvecindex, mincoreinfo, vecindex;
830
unsigned int timestamp;
831
832
/*
833
* Make sure that the addresses presented are valid for user
834
* mode.
835
*/
836
first_addr = addr = trunc_page(addr0);
837
end = round_page(addr0 + len);
838
map = &td->td_proc->p_vmspace->vm_map;
839
if (end > vm_map_max(map) || end < addr)
840
return (ENOMEM);
841
842
pmap = vmspace_pmap(td->td_proc->p_vmspace);
843
844
vm_map_lock_read(map);
845
RestartScan:
846
timestamp = map->timestamp;
847
848
if (!vm_map_lookup_entry(map, addr, &entry)) {
849
vm_map_unlock_read(map);
850
return (ENOMEM);
851
}
852
853
/*
854
* Do this on a map entry basis so that if the pages are not
855
* in the current processes address space, we can easily look
856
* up the pages elsewhere.
857
*/
858
lastvecindex = -1;
859
while (entry->start < end) {
860
/*
861
* check for contiguity
862
*/
863
current = entry;
864
entry = vm_map_entry_succ(current);
865
if (current->end < end &&
866
entry->start > current->end) {
867
vm_map_unlock_read(map);
868
return (ENOMEM);
869
}
870
871
/*
872
* ignore submaps (for now) or null objects
873
*/
874
if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
875
current->object.vm_object == NULL)
876
continue;
877
878
/*
879
* limit this scan to the current map entry and the
880
* limits for the mincore call
881
*/
882
if (addr < current->start)
883
addr = current->start;
884
cend = current->end;
885
if (cend > end)
886
cend = end;
887
888
for (; addr < cend; addr += PAGE_SIZE) {
889
/*
890
* Check pmap first, it is likely faster, also
891
* it can provide info as to whether we are the
892
* one referencing or modifying the page.
893
*/
894
m = NULL;
895
object = NULL;
896
retry:
897
pa = 0;
898
mincoreinfo = pmap_mincore(pmap, addr, &pa);
899
if (mincore_mapped) {
900
/*
901
* We only care about this pmap's
902
* mapping of the page, if any.
903
*/
904
;
905
} else if (pa != 0) {
906
/*
907
* The page is mapped by this process but not
908
* both accessed and modified. It is also
909
* managed. Acquire the object lock so that
910
* other mappings might be examined. The page's
911
* identity may change at any point before its
912
* object lock is acquired, so re-validate if
913
* necessary.
914
*/
915
m = PHYS_TO_VM_PAGE(pa);
916
while (object == NULL || m->object != object) {
917
if (object != NULL)
918
VM_OBJECT_WUNLOCK(object);
919
object = atomic_load_ptr(&m->object);
920
if (object == NULL)
921
goto retry;
922
VM_OBJECT_WLOCK(object);
923
}
924
if (pa != pmap_extract(pmap, addr))
925
goto retry;
926
KASSERT(vm_page_all_valid(m),
927
("mincore: page %p is mapped but invalid",
928
m));
929
} else if (mincoreinfo == 0) {
930
/*
931
* The page is not mapped by this process. If
932
* the object implements managed pages, then
933
* determine if the page is resident so that
934
* the mappings might be examined.
935
*/
936
if (current->object.vm_object != object) {
937
if (object != NULL)
938
VM_OBJECT_WUNLOCK(object);
939
object = current->object.vm_object;
940
VM_OBJECT_WLOCK(object);
941
}
942
if ((object->flags & OBJ_SWAP) != 0 ||
943
object->type == OBJT_VNODE) {
944
pindex = OFF_TO_IDX(current->offset +
945
(addr - current->start));
946
m = vm_page_lookup(object, pindex);
947
if (m != NULL && vm_page_none_valid(m))
948
m = NULL;
949
if (m != NULL)
950
mincoreinfo = MINCORE_INCORE;
951
}
952
}
953
if (m != NULL) {
954
VM_OBJECT_ASSERT_WLOCKED(m->object);
955
956
/* Examine other mappings of the page. */
957
if (m->dirty == 0 && pmap_is_modified(m))
958
vm_page_dirty(m);
959
if (m->dirty != 0)
960
mincoreinfo |= MINCORE_MODIFIED_OTHER;
961
962
/*
963
* The first test for PGA_REFERENCED is an
964
* optimization. The second test is
965
* required because a concurrent pmap
966
* operation could clear the last reference
967
* and set PGA_REFERENCED before the call to
968
* pmap_is_referenced().
969
*/
970
if ((m->a.flags & PGA_REFERENCED) != 0 ||
971
pmap_is_referenced(m) ||
972
(m->a.flags & PGA_REFERENCED) != 0)
973
mincoreinfo |= MINCORE_REFERENCED_OTHER;
974
}
975
if (object != NULL)
976
VM_OBJECT_WUNLOCK(object);
977
978
/*
979
* subyte may page fault. In case it needs to modify
980
* the map, we release the lock.
981
*/
982
vm_map_unlock_read(map);
983
984
/*
985
* calculate index into user supplied byte vector
986
*/
987
vecindex = atop(addr - first_addr);
988
989
/*
990
* If we have skipped map entries, we need to make sure that
991
* the byte vector is zeroed for those skipped entries.
992
*/
993
while ((lastvecindex + 1) < vecindex) {
994
++lastvecindex;
995
error = subyte(vec + lastvecindex, 0);
996
if (error) {
997
error = EFAULT;
998
goto done2;
999
}
1000
}
1001
1002
/*
1003
* Pass the page information to the user
1004
*/
1005
error = subyte(vec + vecindex, mincoreinfo);
1006
if (error) {
1007
error = EFAULT;
1008
goto done2;
1009
}
1010
1011
/*
1012
* If the map has changed, due to the subyte, the previous
1013
* output may be invalid.
1014
*/
1015
vm_map_lock_read(map);
1016
if (timestamp != map->timestamp)
1017
goto RestartScan;
1018
1019
lastvecindex = vecindex;
1020
}
1021
}
1022
1023
/*
1024
* subyte may page fault. In case it needs to modify
1025
* the map, we release the lock.
1026
*/
1027
vm_map_unlock_read(map);
1028
1029
/*
1030
* Zero the last entries in the byte vector.
1031
*/
1032
vecindex = atop(end - first_addr);
1033
while ((lastvecindex + 1) < vecindex) {
1034
++lastvecindex;
1035
error = subyte(vec + lastvecindex, 0);
1036
if (error) {
1037
error = EFAULT;
1038
goto done2;
1039
}
1040
}
1041
1042
/*
1043
* If the map has changed, due to the subyte, the previous
1044
* output may be invalid.
1045
*/
1046
vm_map_lock_read(map);
1047
if (timestamp != map->timestamp)
1048
goto RestartScan;
1049
vm_map_unlock_read(map);
1050
done2:
1051
return (error);
1052
}
1053
1054
#ifndef _SYS_SYSPROTO_H_
1055
struct mlock_args {
1056
const void *addr;
1057
size_t len;
1058
};
1059
#endif
1060
int
1061
sys_mlock(struct thread *td, struct mlock_args *uap)
1062
{
1063
1064
return (kern_mlock(td->td_proc, td->td_ucred,
1065
__DECONST(uintptr_t, uap->addr), uap->len));
1066
}
1067
1068
int
1069
kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len)
1070
{
1071
vm_offset_t addr, end, last, start;
1072
vm_size_t npages, size;
1073
vm_map_t map;
1074
unsigned long nsize;
1075
int error;
1076
1077
error = priv_check_cred(cred, PRIV_VM_MLOCK);
1078
if (error)
1079
return (error);
1080
addr = addr0;
1081
size = len;
1082
last = addr + size;
1083
start = trunc_page(addr);
1084
end = round_page(last);
1085
if (last < addr || end < addr)
1086
return (EINVAL);
1087
npages = atop(end - start);
1088
if (npages > vm_page_max_user_wired)
1089
return (ENOMEM);
1090
map = &proc->p_vmspace->vm_map;
1091
PROC_LOCK(proc);
1092
nsize = ptoa(npages + pmap_wired_count(map->pmap));
1093
if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
1094
PROC_UNLOCK(proc);
1095
return (ENOMEM);
1096
}
1097
PROC_UNLOCK(proc);
1098
#ifdef RACCT
1099
if (racct_enable) {
1100
PROC_LOCK(proc);
1101
error = racct_set(proc, RACCT_MEMLOCK, nsize);
1102
PROC_UNLOCK(proc);
1103
if (error != 0)
1104
return (ENOMEM);
1105
}
1106
#endif
1107
error = vm_map_wire(map, start, end,
1108
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1109
#ifdef RACCT
1110
if (racct_enable && error != KERN_SUCCESS) {
1111
PROC_LOCK(proc);
1112
racct_set(proc, RACCT_MEMLOCK,
1113
ptoa(pmap_wired_count(map->pmap)));
1114
PROC_UNLOCK(proc);
1115
}
1116
#endif
1117
switch (error) {
1118
case KERN_SUCCESS:
1119
return (0);
1120
case KERN_INVALID_ARGUMENT:
1121
return (EINVAL);
1122
default:
1123
return (ENOMEM);
1124
}
1125
}
1126
1127
#ifndef _SYS_SYSPROTO_H_
1128
struct mlockall_args {
1129
int how;
1130
};
1131
#endif
1132
1133
int
1134
sys_mlockall(struct thread *td, struct mlockall_args *uap)
1135
{
1136
vm_map_t map;
1137
int error;
1138
1139
map = &td->td_proc->p_vmspace->vm_map;
1140
error = priv_check(td, PRIV_VM_MLOCK);
1141
if (error)
1142
return (error);
1143
1144
if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
1145
return (EINVAL);
1146
1147
/*
1148
* If wiring all pages in the process would cause it to exceed
1149
* a hard resource limit, return ENOMEM.
1150
*/
1151
if (!old_mlock && uap->how & MCL_CURRENT) {
1152
if (map->size > lim_cur(td, RLIMIT_MEMLOCK))
1153
return (ENOMEM);
1154
}
1155
#ifdef RACCT
1156
if (racct_enable) {
1157
PROC_LOCK(td->td_proc);
1158
error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
1159
PROC_UNLOCK(td->td_proc);
1160
if (error != 0)
1161
return (ENOMEM);
1162
}
1163
#endif
1164
1165
if (uap->how & MCL_FUTURE) {
1166
vm_map_lock(map);
1167
vm_map_modflags(map, MAP_WIREFUTURE, 0);
1168
vm_map_unlock(map);
1169
error = 0;
1170
}
1171
1172
if (uap->how & MCL_CURRENT) {
1173
/*
1174
* P1003.1-2001 mandates that all currently mapped pages
1175
* will be memory resident and locked (wired) upon return
1176
* from mlockall(). vm_map_wire() will wire pages, by
1177
* calling vm_fault_wire() for each page in the region.
1178
*/
1179
error = vm_map_wire(map, vm_map_min(map), vm_map_max(map),
1180
VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1181
if (error == KERN_SUCCESS)
1182
error = 0;
1183
else if (error == KERN_RESOURCE_SHORTAGE)
1184
error = ENOMEM;
1185
else
1186
error = EAGAIN;
1187
}
1188
#ifdef RACCT
1189
if (racct_enable && error != KERN_SUCCESS) {
1190
PROC_LOCK(td->td_proc);
1191
racct_set(td->td_proc, RACCT_MEMLOCK,
1192
ptoa(pmap_wired_count(map->pmap)));
1193
PROC_UNLOCK(td->td_proc);
1194
}
1195
#endif
1196
1197
return (error);
1198
}
1199
1200
#ifndef _SYS_SYSPROTO_H_
1201
struct munlockall_args {
1202
register_t dummy;
1203
};
1204
#endif
1205
1206
int
1207
sys_munlockall(struct thread *td, struct munlockall_args *uap)
1208
{
1209
vm_map_t map;
1210
int error;
1211
1212
map = &td->td_proc->p_vmspace->vm_map;
1213
error = priv_check(td, PRIV_VM_MUNLOCK);
1214
if (error)
1215
return (error);
1216
1217
/* Clear the MAP_WIREFUTURE flag from this vm_map. */
1218
vm_map_lock(map);
1219
vm_map_modflags(map, 0, MAP_WIREFUTURE);
1220
vm_map_unlock(map);
1221
1222
/* Forcibly unwire all pages. */
1223
error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
1224
VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1225
#ifdef RACCT
1226
if (racct_enable && error == KERN_SUCCESS) {
1227
PROC_LOCK(td->td_proc);
1228
racct_set(td->td_proc, RACCT_MEMLOCK, 0);
1229
PROC_UNLOCK(td->td_proc);
1230
}
1231
#endif
1232
1233
return (error);
1234
}
1235
1236
#ifndef _SYS_SYSPROTO_H_
1237
struct munlock_args {
1238
const void *addr;
1239
size_t len;
1240
};
1241
#endif
1242
int
1243
sys_munlock(struct thread *td, struct munlock_args *uap)
1244
{
1245
1246
return (kern_munlock(td, (uintptr_t)uap->addr, uap->len));
1247
}
1248
1249
int
1250
kern_munlock(struct thread *td, uintptr_t addr0, size_t size)
1251
{
1252
vm_offset_t addr, end, last, start;
1253
#ifdef RACCT
1254
vm_map_t map;
1255
#endif
1256
int error;
1257
1258
error = priv_check(td, PRIV_VM_MUNLOCK);
1259
if (error)
1260
return (error);
1261
addr = addr0;
1262
last = addr + size;
1263
start = trunc_page(addr);
1264
end = round_page(last);
1265
if (last < addr || end < addr)
1266
return (EINVAL);
1267
error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
1268
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1269
#ifdef RACCT
1270
if (racct_enable && error == KERN_SUCCESS) {
1271
PROC_LOCK(td->td_proc);
1272
map = &td->td_proc->p_vmspace->vm_map;
1273
racct_set(td->td_proc, RACCT_MEMLOCK,
1274
ptoa(pmap_wired_count(map->pmap)));
1275
PROC_UNLOCK(td->td_proc);
1276
}
1277
#endif
1278
return (error == KERN_SUCCESS ? 0 : ENOMEM);
1279
}
1280
1281
/*
1282
* vm_mmap_vnode()
1283
*
1284
* Helper function for vm_mmap. Perform sanity check specific for mmap
1285
* operations on vnodes.
1286
*/
1287
int
1288
vm_mmap_vnode(struct thread *td, vm_size_t objsize,
1289
vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
1290
struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
1291
boolean_t *writecounted)
1292
{
1293
struct vattr va;
1294
vm_object_t obj;
1295
vm_ooffset_t foff;
1296
struct ucred *cred;
1297
int error, flags;
1298
bool writex;
1299
1300
cred = td->td_ucred;
1301
writex = (*maxprotp & VM_PROT_WRITE) != 0 &&
1302
(*flagsp & MAP_SHARED) != 0;
1303
if ((error = vget(vp, LK_SHARED)) != 0)
1304
return (error);
1305
AUDIT_ARG_VNODE1(vp);
1306
foff = *foffp;
1307
flags = *flagsp;
1308
obj = vp->v_object;
1309
if (vp->v_type == VREG) {
1310
/*
1311
* Get the proper underlying object
1312
*/
1313
if (obj == NULL) {
1314
error = EINVAL;
1315
goto done;
1316
}
1317
if (obj->type == OBJT_VNODE && obj->handle != vp) {
1318
vput(vp);
1319
vp = (struct vnode *)obj->handle;
1320
/*
1321
* Bypass filesystems obey the mpsafety of the
1322
* underlying fs. Tmpfs never bypasses.
1323
*/
1324
error = vget(vp, LK_SHARED);
1325
if (error != 0)
1326
return (error);
1327
}
1328
if (writex) {
1329
*writecounted = TRUE;
1330
vm_pager_update_writecount(obj, 0, objsize);
1331
}
1332
} else {
1333
error = EXTERROR(EINVAL, "non-reg file");
1334
goto done;
1335
}
1336
if ((error = VOP_GETATTR(vp, &va, cred)))
1337
goto done;
1338
#ifdef MAC
1339
/* This relies on VM_PROT_* matching PROT_*. */
1340
error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
1341
if (error != 0)
1342
goto done;
1343
#endif
1344
if ((flags & MAP_SHARED) != 0) {
1345
if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
1346
if (prot & VM_PROT_WRITE) {
1347
error = EPERM;
1348
goto done;
1349
}
1350
*maxprotp &= ~VM_PROT_WRITE;
1351
}
1352
}
1353
/*
1354
* If it is a regular file without any references
1355
* we do not need to sync it.
1356
* Adjust object size to be the size of actual file.
1357
*/
1358
objsize = round_page(va.va_size);
1359
if (va.va_nlink == 0)
1360
flags |= MAP_NOSYNC;
1361
if (obj->type == OBJT_VNODE) {
1362
obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff,
1363
cred);
1364
if (obj == NULL) {
1365
error = ENOMEM;
1366
goto done;
1367
}
1368
} else {
1369
KASSERT((obj->flags & OBJ_SWAP) != 0, ("wrong object type"));
1370
vm_object_reference(obj);
1371
#if VM_NRESERVLEVEL > 0
1372
if ((obj->flags & OBJ_COLORED) == 0) {
1373
VM_OBJECT_WLOCK(obj);
1374
vm_object_color(obj, 0);
1375
VM_OBJECT_WUNLOCK(obj);
1376
}
1377
#endif
1378
}
1379
*objp = obj;
1380
*flagsp = flags;
1381
1382
VOP_MMAPPED(vp);
1383
1384
done:
1385
if (error != 0 && *writecounted) {
1386
*writecounted = FALSE;
1387
vm_pager_update_writecount(obj, objsize, 0);
1388
}
1389
vput(vp);
1390
return (error);
1391
}
1392
1393
/*
1394
* vm_mmap_cdev()
1395
*
1396
* Helper function for vm_mmap. Perform sanity check specific for mmap
1397
* operations on cdevs.
1398
*/
1399
int
1400
vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
1401
vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
1402
vm_ooffset_t *foff, vm_object_t *objp)
1403
{
1404
vm_object_t obj;
1405
int error, flags;
1406
1407
flags = *flagsp;
1408
1409
if (dsw->d_flags & D_MMAP_ANON) {
1410
*objp = NULL;
1411
*foff = 0;
1412
*maxprotp = VM_PROT_ALL;
1413
*flagsp |= MAP_ANON;
1414
return (0);
1415
}
1416
1417
/*
1418
* cdevs do not provide private mappings of any kind.
1419
*/
1420
if ((*maxprotp & VM_PROT_WRITE) == 0 &&
1421
(prot & VM_PROT_WRITE) != 0)
1422
return (EACCES);
1423
if ((flags & (MAP_PRIVATE | MAP_COPY)) != 0) {
1424
return (EXTERROR(EINVAL, "cdev mapping must be shared"));
1425
}
1426
1427
/*
1428
* Force device mappings to be shared.
1429
*/
1430
flags |= MAP_SHARED;
1431
#ifdef MAC_XXX
1432
error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
1433
if (error != 0)
1434
return (error);
1435
#endif
1436
/*
1437
* First, try d_mmap_single(). If that is not implemented
1438
* (returns ENODEV), fall back to using the device pager.
1439
* Note that d_mmap_single() must return a reference to the
1440
* object (it needs to bump the reference count of the object
1441
* it returns somehow).
1442
*
1443
* XXX assumes VM_PROT_* == PROT_*
1444
*/
1445
error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
1446
if (error != ENODEV)
1447
return (error);
1448
obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
1449
td->td_ucred);
1450
if (obj == NULL) {
1451
return (EXTERROR(EINVAL,
1452
"cdev driver does not support mmap"));
1453
}
1454
*objp = obj;
1455
*flagsp = flags;
1456
return (0);
1457
}
1458
1459
int
1460
vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1461
vm_prot_t maxprot, int flags,
1462
objtype_t handle_type, void *handle,
1463
vm_ooffset_t foff)
1464
{
1465
vm_object_t object;
1466
struct thread *td = curthread;
1467
int error;
1468
boolean_t writecounted;
1469
1470
if (size == 0) {
1471
return (EXTERROR(EINVAL, "zero-sized req"));
1472
}
1473
1474
size = round_page(size);
1475
object = NULL;
1476
writecounted = FALSE;
1477
1478
switch (handle_type) {
1479
case OBJT_DEVICE: {
1480
struct cdevsw *dsw;
1481
struct cdev *cdev;
1482
int ref;
1483
1484
cdev = handle;
1485
dsw = dev_refthread(cdev, &ref);
1486
if (dsw == NULL)
1487
return (ENXIO);
1488
error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
1489
dsw, &foff, &object);
1490
dev_relthread(cdev, ref);
1491
break;
1492
}
1493
case OBJT_VNODE:
1494
error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
1495
handle, &foff, &object, &writecounted);
1496
break;
1497
default:
1498
error = EXTERROR(EINVAL, "unsupported backing obj type",
1499
handle_type);
1500
break;
1501
}
1502
if (error)
1503
return (error);
1504
1505
error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
1506
foff, writecounted, td);
1507
if (error != 0 && object != NULL) {
1508
/*
1509
* If this mapping was accounted for in the vnode's
1510
* writecount, then undo that now.
1511
*/
1512
if (writecounted)
1513
vm_pager_release_writecount(object, 0, size);
1514
vm_object_deallocate(object);
1515
}
1516
return (error);
1517
}
1518
1519
int
1520
kern_mmap_racct_check(struct thread *td, vm_map_t map, vm_size_t size)
1521
{
1522
int error;
1523
1524
RACCT_PROC_LOCK(td->td_proc);
1525
if (map->size + size > lim_cur(td, RLIMIT_VMEM)) {
1526
RACCT_PROC_UNLOCK(td->td_proc);
1527
return (ENOMEM);
1528
}
1529
if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
1530
RACCT_PROC_UNLOCK(td->td_proc);
1531
return (ENOMEM);
1532
}
1533
if (!old_mlock && map->flags & MAP_WIREFUTURE) {
1534
if (ptoa(pmap_wired_count(map->pmap)) + size >
1535
lim_cur(td, RLIMIT_MEMLOCK)) {
1536
racct_set_force(td->td_proc, RACCT_VMEM, map->size);
1537
RACCT_PROC_UNLOCK(td->td_proc);
1538
return (ENOMEM);
1539
}
1540
error = racct_set(td->td_proc, RACCT_MEMLOCK,
1541
ptoa(pmap_wired_count(map->pmap)) + size);
1542
if (error != 0) {
1543
racct_set_force(td->td_proc, RACCT_VMEM, map->size);
1544
RACCT_PROC_UNLOCK(td->td_proc);
1545
return (error);
1546
}
1547
}
1548
RACCT_PROC_UNLOCK(td->td_proc);
1549
return (0);
1550
}
1551
1552
/*
1553
* Internal version of mmap that maps a specific VM object into an
1554
* map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
1555
*/
1556
int
1557
vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1558
vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
1559
boolean_t writecounted, struct thread *td)
1560
{
1561
vm_offset_t default_addr, max_addr;
1562
int docow, error, findspace, rv;
1563
bool curmap, fitit;
1564
1565
curmap = map == &td->td_proc->p_vmspace->vm_map;
1566
if (curmap) {
1567
error = kern_mmap_racct_check(td, map, size);
1568
if (error != 0)
1569
return (error);
1570
}
1571
1572
/*
1573
* We currently can only deal with page aligned file offsets.
1574
* The mmap() system call already enforces this by subtracting
1575
* the page offset from the file offset, but checking here
1576
* catches errors in device drivers (e.g. d_single_mmap()
1577
* callbacks) and other internal mapping requests (such as in
1578
* exec).
1579
*/
1580
if ((foff & PAGE_MASK) != 0) {
1581
return (EXTERROR(EINVAL, "offset not page-aligned", foff));
1582
}
1583
1584
if ((flags & MAP_FIXED) == 0) {
1585
fitit = true;
1586
*addr = round_page(*addr);
1587
} else {
1588
if (*addr != trunc_page(*addr)) {
1589
return (EXTERROR(EINVAL,
1590
"non-fixed mapping address not aligned", *addr));
1591
}
1592
fitit = false;
1593
}
1594
1595
if (flags & MAP_ANON) {
1596
if (object != NULL) {
1597
return (EXTERROR(EINVAL,
1598
"anon mapping backed by an object"));
1599
}
1600
if (foff != 0) {
1601
return (EXTERROR(EINVAL,
1602
"anon mapping with non-zero offset"));
1603
}
1604
docow = 0;
1605
} else if (flags & MAP_PREFAULT_READ)
1606
docow = MAP_PREFAULT;
1607
else
1608
docow = MAP_PREFAULT_PARTIAL;
1609
1610
if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
1611
docow |= MAP_COPY_ON_WRITE;
1612
if (flags & MAP_NOSYNC)
1613
docow |= MAP_DISABLE_SYNCER;
1614
if (flags & MAP_NOCORE)
1615
docow |= MAP_DISABLE_COREDUMP;
1616
/* Shared memory is also shared with children. */
1617
if (flags & MAP_SHARED)
1618
docow |= MAP_INHERIT_SHARE;
1619
if (writecounted)
1620
docow |= MAP_WRITECOUNT;
1621
if (flags & MAP_STACK) {
1622
if (object != NULL) {
1623
return (EXTERROR(EINVAL,
1624
"stack mapping backed by an object"));
1625
}
1626
docow |= MAP_STACK_AREA;
1627
}
1628
if ((flags & MAP_EXCL) != 0)
1629
docow |= MAP_CHECK_EXCL;
1630
if ((flags & MAP_GUARD) != 0)
1631
docow |= MAP_CREATE_GUARD;
1632
1633
if (fitit) {
1634
if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
1635
findspace = VMFS_SUPER_SPACE;
1636
else if ((flags & MAP_ALIGNMENT_MASK) != 0)
1637
findspace = VMFS_ALIGNED_SPACE(flags >>
1638
MAP_ALIGNMENT_SHIFT);
1639
else
1640
findspace = VMFS_OPTIMAL_SPACE;
1641
max_addr = 0;
1642
if ((flags & MAP_32BIT) != 0)
1643
max_addr = MAP_32BIT_MAX_ADDR;
1644
if (curmap) {
1645
default_addr =
1646
round_page((vm_offset_t)td->td_proc->p_vmspace->
1647
vm_daddr + lim_max(td, RLIMIT_DATA));
1648
if ((flags & MAP_32BIT) != 0)
1649
default_addr = 0;
1650
rv = vm_map_find_min(map, object, foff, addr, size,
1651
default_addr, max_addr, findspace, prot, maxprot,
1652
docow);
1653
} else {
1654
rv = vm_map_find(map, object, foff, addr, size,
1655
max_addr, findspace, prot, maxprot, docow);
1656
}
1657
} else {
1658
rv = vm_map_fixed(map, object, foff, *addr, size,
1659
prot, maxprot, docow);
1660
}
1661
1662
if (rv == KERN_SUCCESS) {
1663
/*
1664
* If the process has requested that all future mappings
1665
* be wired, then heed this.
1666
*/
1667
if ((map->flags & MAP_WIREFUTURE) != 0) {
1668
vm_map_lock(map);
1669
if ((map->flags & MAP_WIREFUTURE) != 0)
1670
(void)vm_map_wire_locked(map, *addr,
1671
*addr + size, VM_MAP_WIRE_USER |
1672
((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK :
1673
VM_MAP_WIRE_NOHOLES));
1674
vm_map_unlock(map);
1675
}
1676
}
1677
return (vm_mmap_to_errno(rv));
1678
}
1679
1680
/*
1681
* Translate a Mach VM return code to zero on success or the appropriate errno
1682
* on failure.
1683
*/
1684
int
1685
vm_mmap_to_errno(int rv)
1686
{
1687
int error;
1688
1689
switch (rv) {
1690
case KERN_SUCCESS:
1691
return (0);
1692
case KERN_INVALID_ADDRESS:
1693
case KERN_NO_SPACE:
1694
error = ENOMEM;
1695
break;
1696
case KERN_PROTECTION_FAILURE:
1697
error = EACCES;
1698
break;
1699
default:
1700
error = EINVAL;
1701
break;
1702
}
1703
if ((curthread->td_pflags2 & (TDP2_UEXTERR | TDP2_EXTERR)) ==
1704
TDP2_UEXTERR)
1705
EXTERROR(error, "mach error", rv);
1706
return (error);
1707
}
1708
1709