Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/vm/vm_object.c
102381 views
1
/*-
2
* SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
3
*
4
* Copyright (c) 1991, 1993
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This code is derived from software contributed to Berkeley by
8
* The Mach Operating System project at Carnegie-Mellon University.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
* 3. Neither the name of the University nor the names of its contributors
19
* may be used to endorse or promote products derived from this software
20
* without specific prior written permission.
21
*
22
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
* SUCH DAMAGE.
33
*
34
*
35
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
36
* All rights reserved.
37
*
38
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
39
*
40
* Permission to use, copy, modify and distribute this software and
41
* its documentation is hereby granted, provided that both the copyright
42
* notice and this permission notice appear in all copies of the
43
* software, derivative works or modified versions, and any portions
44
* thereof, and that both notices appear in supporting documentation.
45
*
46
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49
*
50
* Carnegie Mellon requests users of this software to return to
51
*
52
* Software Distribution Coordinator or [email protected]
53
* School of Computer Science
54
* Carnegie Mellon University
55
* Pittsburgh PA 15213-3890
56
*
57
* any improvements or extensions that they make and grant Carnegie the
58
* rights to redistribute these changes.
59
*/
60
61
/*
62
* Virtual memory object module.
63
*/
64
65
#include "opt_vm.h"
66
67
#include <sys/systm.h>
68
#include <sys/blockcount.h>
69
#include <sys/conf.h>
70
#include <sys/cpuset.h>
71
#include <sys/ipc.h>
72
#include <sys/jail.h>
73
#include <sys/limits.h>
74
#include <sys/lock.h>
75
#include <sys/mman.h>
76
#include <sys/mount.h>
77
#include <sys/kernel.h>
78
#include <sys/mutex.h>
79
#include <sys/pctrie.h>
80
#include <sys/proc.h>
81
#include <sys/refcount.h>
82
#include <sys/shm.h>
83
#include <sys/sx.h>
84
#include <sys/sysctl.h>
85
#include <sys/resourcevar.h>
86
#include <sys/refcount.h>
87
#include <sys/rwlock.h>
88
#include <sys/user.h>
89
#include <sys/vnode.h>
90
#include <sys/vmmeter.h>
91
92
#include <vm/vm.h>
93
#include <vm/vm_param.h>
94
#include <vm/pmap.h>
95
#include <vm/vm_map.h>
96
#include <vm/vm_object.h>
97
#include <vm/vm_page.h>
98
#include <vm/vm_pageout.h>
99
#include <vm/vm_pager.h>
100
#include <vm/vm_phys.h>
101
#include <vm/vm_pagequeue.h>
102
#include <vm/swap_pager.h>
103
#include <vm/vm_kern.h>
104
#include <vm/vm_extern.h>
105
#include <vm/vm_radix.h>
106
#include <vm/vm_reserv.h>
107
#include <vm/uma.h>
108
109
static int old_msync;
110
SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
111
"Use old (insecure) msync behavior");
112
113
static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
114
boolean_t *allclean);
115
static void vm_object_backing_remove(vm_object_t object);
116
117
/*
118
* Virtual memory objects maintain the actual data
119
* associated with allocated virtual memory. A given
120
* page of memory exists within exactly one object.
121
*
122
* An object is only deallocated when all "references"
123
* are given up. Only one "reference" to a given
124
* region of an object should be writeable.
125
*
126
* Associated with each object is a list of all resident
127
* memory pages belonging to that object; this list is
128
* maintained by the "vm_page" module, and locked by the object's
129
* lock.
130
*
131
* Each object also records a "pager" routine which is
132
* used to retrieve (and store) pages to the proper backing
133
* storage. In addition, objects may be backed by other
134
* objects from which they were virtual-copied.
135
*
136
* The only items within the object structure which are
137
* modified after time of creation are:
138
* reference count locked by object's lock
139
* pager routine locked by object's lock
140
*
141
*/
142
143
struct object_q vm_object_list;
144
struct mtx vm_object_list_mtx; /* lock for object list and count */
145
146
struct vm_object kernel_object_store;
147
148
static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
149
"VM object stats");
150
151
static COUNTER_U64_DEFINE_EARLY(object_collapses);
152
SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
153
&object_collapses,
154
"VM object collapses");
155
156
static COUNTER_U64_DEFINE_EARLY(object_bypasses);
157
SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
158
&object_bypasses,
159
"VM object bypasses");
160
161
static COUNTER_U64_DEFINE_EARLY(object_collapse_waits);
162
SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
163
&object_collapse_waits,
164
"Number of sleeps for collapse");
165
166
static uma_zone_t obj_zone;
167
168
static int vm_object_zinit(void *mem, int size, int flags);
169
170
#ifdef INVARIANTS
171
static void vm_object_zdtor(void *mem, int size, void *arg);
172
173
static void
174
vm_object_zdtor(void *mem, int size, void *arg)
175
{
176
vm_object_t object;
177
178
object = (vm_object_t)mem;
179
KASSERT(object->ref_count == 0,
180
("object %p ref_count = %d", object, object->ref_count));
181
KASSERT(vm_radix_is_empty(&object->rtree),
182
("object %p has resident pages in its trie", object));
183
#if VM_NRESERVLEVEL > 0
184
KASSERT(LIST_EMPTY(&object->rvq),
185
("object %p has reservations",
186
object));
187
#endif
188
KASSERT(!vm_object_busied(object),
189
("object %p busy = %d", object, blockcount_read(&object->busy)));
190
KASSERT(object->resident_page_count == 0,
191
("object %p resident_page_count = %d",
192
object, object->resident_page_count));
193
KASSERT(atomic_load_int(&object->shadow_count) == 0,
194
("object %p shadow_count = %d",
195
object, atomic_load_int(&object->shadow_count)));
196
KASSERT(object->type == OBJT_DEAD,
197
("object %p has non-dead type %d",
198
object, object->type));
199
KASSERT(object->cred == NULL,
200
("object %p has non-zero charge cred %p",
201
object, object->cred));
202
}
203
#endif
204
205
static int
206
vm_object_zinit(void *mem, int size, int flags)
207
{
208
vm_object_t object;
209
210
object = (vm_object_t)mem;
211
rw_init_flags(&object->lock, "vmobject", RW_DUPOK | RW_NEW);
212
213
/* These are true for any object that has been freed */
214
object->type = OBJT_DEAD;
215
vm_radix_init(&object->rtree);
216
refcount_init(&object->ref_count, 0);
217
blockcount_init(&object->paging_in_progress);
218
blockcount_init(&object->busy);
219
object->resident_page_count = 0;
220
atomic_store_int(&object->shadow_count, 0);
221
object->flags = OBJ_DEAD;
222
223
mtx_lock(&vm_object_list_mtx);
224
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
225
mtx_unlock(&vm_object_list_mtx);
226
return (0);
227
}
228
229
static void
230
_vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags,
231
vm_object_t object, void *handle)
232
{
233
LIST_INIT(&object->shadow_head);
234
235
object->type = type;
236
object->flags = flags;
237
if ((flags & OBJ_SWAP) != 0) {
238
pctrie_init(&object->un_pager.swp.swp_blks);
239
object->un_pager.swp.writemappings = 0;
240
}
241
242
/*
243
* Ensure that swap_pager_swapoff() iteration over object_list
244
* sees up to date type and pctrie head if it observed
245
* non-dead object.
246
*/
247
atomic_thread_fence_rel();
248
249
object->pg_color = 0;
250
object->size = size;
251
object->domain.dr_policy = NULL;
252
object->generation = 1;
253
object->cleangeneration = 1;
254
refcount_init(&object->ref_count, 1);
255
object->memattr = VM_MEMATTR_DEFAULT;
256
object->cred = NULL;
257
object->handle = handle;
258
object->backing_object = NULL;
259
object->backing_object_offset = (vm_ooffset_t) 0;
260
#if VM_NRESERVLEVEL > 0
261
LIST_INIT(&object->rvq);
262
#endif
263
umtx_shm_object_init(object);
264
}
265
266
/*
267
* vm_object_init:
268
*
269
* Initialize the VM objects module.
270
*/
271
void
272
vm_object_init(void)
273
{
274
TAILQ_INIT(&vm_object_list);
275
mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
276
277
rw_init(&kernel_object->lock, "kernel vm object");
278
vm_radix_init(&kernel_object->rtree);
279
_vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
280
VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL);
281
#if VM_NRESERVLEVEL > 0
282
kernel_object->flags |= OBJ_COLORED;
283
kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
284
#endif
285
kernel_object->un_pager.phys.ops = &default_phys_pg_ops;
286
287
/*
288
* The lock portion of struct vm_object must be type stable due
289
* to vm_pageout_fallback_object_lock locking a vm object
290
* without holding any references to it.
291
*
292
* paging_in_progress is valid always. Lockless references to
293
* the objects may acquire pip and then check OBJ_DEAD.
294
*/
295
obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
296
#ifdef INVARIANTS
297
vm_object_zdtor,
298
#else
299
NULL,
300
#endif
301
vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
302
303
vm_radix_zinit();
304
}
305
306
void
307
vm_object_clear_flag(vm_object_t object, u_short bits)
308
{
309
310
VM_OBJECT_ASSERT_WLOCKED(object);
311
object->flags &= ~bits;
312
}
313
314
/*
315
* Sets the default memory attribute for the specified object. Pages
316
* that are allocated to this object are by default assigned this memory
317
* attribute.
318
*
319
* Presently, this function must be called before any pages are allocated
320
* to the object. In the future, this requirement may be relaxed for
321
* "default" and "swap" objects.
322
*/
323
int
324
vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
325
{
326
327
VM_OBJECT_ASSERT_WLOCKED(object);
328
329
if (object->type == OBJT_DEAD)
330
return (KERN_INVALID_ARGUMENT);
331
if (!vm_radix_is_empty(&object->rtree))
332
return (KERN_FAILURE);
333
334
object->memattr = memattr;
335
return (KERN_SUCCESS);
336
}
337
338
void
339
vm_object_pip_add(vm_object_t object, short i)
340
{
341
342
if (i > 0)
343
blockcount_acquire(&object->paging_in_progress, i);
344
}
345
346
void
347
vm_object_pip_wakeup(vm_object_t object)
348
{
349
350
vm_object_pip_wakeupn(object, 1);
351
}
352
353
void
354
vm_object_pip_wakeupn(vm_object_t object, short i)
355
{
356
357
if (i > 0)
358
blockcount_release(&object->paging_in_progress, i);
359
}
360
361
/*
362
* Atomically drop the object lock and wait for pip to drain. This protects
363
* from sleep/wakeup races due to identity changes. The lock is not re-acquired
364
* on return.
365
*/
366
static void
367
vm_object_pip_sleep(vm_object_t object, const char *waitid)
368
{
369
370
(void)blockcount_sleep(&object->paging_in_progress, &object->lock,
371
waitid, PVM | PDROP);
372
}
373
374
void
375
vm_object_pip_wait(vm_object_t object, const char *waitid)
376
{
377
378
VM_OBJECT_ASSERT_WLOCKED(object);
379
380
blockcount_wait(&object->paging_in_progress, &object->lock, waitid,
381
PVM);
382
}
383
384
void
385
vm_object_pip_wait_unlocked(vm_object_t object, const char *waitid)
386
{
387
388
VM_OBJECT_ASSERT_UNLOCKED(object);
389
390
blockcount_wait(&object->paging_in_progress, NULL, waitid, PVM);
391
}
392
393
/*
394
* vm_object_allocate:
395
*
396
* Returns a new object with the given size.
397
*/
398
vm_object_t
399
vm_object_allocate(objtype_t type, vm_pindex_t size)
400
{
401
vm_object_t object;
402
u_short flags;
403
404
switch (type) {
405
case OBJT_DEAD:
406
panic("vm_object_allocate: can't create OBJT_DEAD");
407
case OBJT_SWAP:
408
flags = OBJ_COLORED | OBJ_SWAP;
409
break;
410
case OBJT_DEVICE:
411
case OBJT_SG:
412
flags = OBJ_FICTITIOUS | OBJ_UNMANAGED;
413
break;
414
case OBJT_MGTDEVICE:
415
flags = OBJ_FICTITIOUS;
416
break;
417
case OBJT_PHYS:
418
flags = OBJ_UNMANAGED;
419
break;
420
case OBJT_VNODE:
421
flags = 0;
422
break;
423
default:
424
panic("vm_object_allocate: type %d is undefined or dynamic",
425
type);
426
}
427
object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
428
_vm_object_allocate(type, size, flags, object, NULL);
429
430
return (object);
431
}
432
433
vm_object_t
434
vm_object_allocate_dyn(objtype_t dyntype, vm_pindex_t size, u_short flags)
435
{
436
vm_object_t object;
437
438
MPASS(dyntype >= OBJT_FIRST_DYN /* && dyntype < nitems(pagertab) */);
439
object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
440
_vm_object_allocate(dyntype, size, flags, object, NULL);
441
442
return (object);
443
}
444
445
/*
446
* vm_object_allocate_anon:
447
*
448
* Returns a new default object of the given size and marked as
449
* anonymous memory for special split/collapse handling. Color
450
* to be initialized by the caller.
451
*/
452
vm_object_t
453
vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
454
struct ucred *cred)
455
{
456
vm_object_t handle, object;
457
458
if (backing_object == NULL)
459
handle = NULL;
460
else if ((backing_object->flags & OBJ_ANON) != 0)
461
handle = backing_object->handle;
462
else
463
handle = backing_object;
464
object = uma_zalloc(obj_zone, M_WAITOK);
465
_vm_object_allocate(OBJT_SWAP, size,
466
OBJ_ANON | OBJ_ONEMAPPING | OBJ_SWAP, object, handle);
467
object->cred = cred;
468
return (object);
469
}
470
471
static void
472
vm_object_reference_vnode(vm_object_t object)
473
{
474
u_int old;
475
476
/*
477
* vnode objects need the lock for the first reference
478
* to serialize with vnode_object_deallocate().
479
*/
480
if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
481
VM_OBJECT_RLOCK(object);
482
old = refcount_acquire(&object->ref_count);
483
if (object->type == OBJT_VNODE && old == 0)
484
vref(object->handle);
485
VM_OBJECT_RUNLOCK(object);
486
}
487
}
488
489
/*
490
* vm_object_reference:
491
*
492
* Acquires a reference to the given object.
493
*/
494
void
495
vm_object_reference(vm_object_t object)
496
{
497
498
if (object == NULL)
499
return;
500
501
if (object->type == OBJT_VNODE)
502
vm_object_reference_vnode(object);
503
else
504
refcount_acquire(&object->ref_count);
505
KASSERT((object->flags & OBJ_DEAD) == 0,
506
("vm_object_reference: Referenced dead object."));
507
}
508
509
/*
510
* vm_object_reference_locked:
511
*
512
* Gets another reference to the given object.
513
*
514
* The object must be locked.
515
*/
516
void
517
vm_object_reference_locked(vm_object_t object)
518
{
519
u_int old;
520
521
VM_OBJECT_ASSERT_LOCKED(object);
522
old = refcount_acquire(&object->ref_count);
523
if (object->type == OBJT_VNODE && old == 0)
524
vref(object->handle);
525
KASSERT((object->flags & OBJ_DEAD) == 0,
526
("vm_object_reference: Referenced dead object."));
527
}
528
529
/*
530
* Handle deallocating an object of type OBJT_VNODE.
531
*/
532
static void
533
vm_object_deallocate_vnode(vm_object_t object)
534
{
535
struct vnode *vp = (struct vnode *) object->handle;
536
bool last;
537
538
KASSERT(object->type == OBJT_VNODE,
539
("vm_object_deallocate_vnode: not a vnode object"));
540
KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp"));
541
542
/* Object lock to protect handle lookup. */
543
last = refcount_release(&object->ref_count);
544
VM_OBJECT_RUNLOCK(object);
545
546
if (!last)
547
return;
548
549
if (!umtx_shm_vnobj_persistent)
550
umtx_shm_object_terminated(object);
551
552
/* vrele may need the vnode lock. */
553
vrele(vp);
554
}
555
556
/*
557
* We dropped a reference on an object and discovered that it had a
558
* single remaining shadow. This is a sibling of the reference we
559
* dropped. Attempt to collapse the sibling and backing object.
560
*/
561
static vm_object_t
562
vm_object_deallocate_anon(vm_object_t backing_object)
563
{
564
vm_object_t object;
565
566
/* Fetch the final shadow. */
567
object = LIST_FIRST(&backing_object->shadow_head);
568
KASSERT(object != NULL &&
569
atomic_load_int(&backing_object->shadow_count) == 1,
570
("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
571
backing_object->ref_count,
572
atomic_load_int(&backing_object->shadow_count)));
573
KASSERT((object->flags & OBJ_ANON) != 0,
574
("invalid shadow object %p", object));
575
576
if (!VM_OBJECT_TRYWLOCK(object)) {
577
/*
578
* Prevent object from disappearing since we do not have a
579
* reference.
580
*/
581
vm_object_pip_add(object, 1);
582
VM_OBJECT_WUNLOCK(backing_object);
583
VM_OBJECT_WLOCK(object);
584
vm_object_pip_wakeup(object);
585
} else
586
VM_OBJECT_WUNLOCK(backing_object);
587
588
/*
589
* Check for a collapse/terminate race with the last reference holder.
590
*/
591
if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
592
!refcount_acquire_if_not_zero(&object->ref_count)) {
593
VM_OBJECT_WUNLOCK(object);
594
return (NULL);
595
}
596
backing_object = object->backing_object;
597
if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
598
vm_object_collapse(object);
599
VM_OBJECT_WUNLOCK(object);
600
601
return (object);
602
}
603
604
/*
605
* vm_object_deallocate:
606
*
607
* Release a reference to the specified object,
608
* gained either through a vm_object_allocate
609
* or a vm_object_reference call. When all references
610
* are gone, storage associated with this object
611
* may be relinquished.
612
*
613
* No object may be locked.
614
*/
615
void
616
vm_object_deallocate(vm_object_t object)
617
{
618
vm_object_t temp;
619
bool released;
620
621
while (object != NULL) {
622
/*
623
* If the reference count goes to 0 we start calling
624
* vm_object_terminate() on the object chain. A ref count
625
* of 1 may be a special case depending on the shadow count
626
* being 0 or 1. These cases require a write lock on the
627
* object.
628
*/
629
if ((object->flags & OBJ_ANON) == 0)
630
released = refcount_release_if_gt(&object->ref_count, 1);
631
else
632
released = refcount_release_if_gt(&object->ref_count, 2);
633
if (released)
634
return;
635
636
if (object->type == OBJT_VNODE) {
637
VM_OBJECT_RLOCK(object);
638
if (object->type == OBJT_VNODE) {
639
vm_object_deallocate_vnode(object);
640
return;
641
}
642
VM_OBJECT_RUNLOCK(object);
643
}
644
645
VM_OBJECT_WLOCK(object);
646
KASSERT(object->ref_count > 0,
647
("vm_object_deallocate: object deallocated too many times: %d",
648
object->type));
649
650
/*
651
* If this is not the final reference to an anonymous
652
* object we may need to collapse the shadow chain.
653
*/
654
if (!refcount_release(&object->ref_count)) {
655
if (object->ref_count > 1 ||
656
atomic_load_int(&object->shadow_count) == 0) {
657
if ((object->flags & OBJ_ANON) != 0 &&
658
object->ref_count == 1)
659
vm_object_set_flag(object,
660
OBJ_ONEMAPPING);
661
VM_OBJECT_WUNLOCK(object);
662
return;
663
}
664
665
/* Handle collapsing last ref on anonymous objects. */
666
object = vm_object_deallocate_anon(object);
667
continue;
668
}
669
670
/*
671
* Handle the final reference to an object. We restart
672
* the loop with the backing object to avoid recursion.
673
*/
674
umtx_shm_object_terminated(object);
675
temp = object->backing_object;
676
if (temp != NULL) {
677
KASSERT(object->type == OBJT_SWAP,
678
("shadowed tmpfs v_object 2 %p", object));
679
vm_object_backing_remove(object);
680
}
681
682
KASSERT((object->flags & OBJ_DEAD) == 0,
683
("vm_object_deallocate: Terminating dead object."));
684
vm_object_set_flag(object, OBJ_DEAD);
685
vm_object_terminate(object);
686
object = temp;
687
}
688
}
689
690
void
691
vm_object_destroy(vm_object_t object)
692
{
693
uma_zfree(obj_zone, object);
694
}
695
696
static void
697
vm_object_sub_shadow(vm_object_t object)
698
{
699
KASSERT(object->shadow_count >= 1,
700
("object %p sub_shadow count zero", object));
701
atomic_subtract_int(&object->shadow_count, 1);
702
}
703
704
static void
705
vm_object_backing_remove_locked(vm_object_t object)
706
{
707
vm_object_t backing_object;
708
709
backing_object = object->backing_object;
710
VM_OBJECT_ASSERT_WLOCKED(object);
711
VM_OBJECT_ASSERT_WLOCKED(backing_object);
712
713
KASSERT((object->flags & OBJ_COLLAPSING) == 0,
714
("vm_object_backing_remove: Removing collapsing object."));
715
716
vm_object_sub_shadow(backing_object);
717
if ((object->flags & OBJ_SHADOWLIST) != 0) {
718
LIST_REMOVE(object, shadow_list);
719
vm_object_clear_flag(object, OBJ_SHADOWLIST);
720
}
721
object->backing_object = NULL;
722
}
723
724
static void
725
vm_object_backing_remove(vm_object_t object)
726
{
727
vm_object_t backing_object;
728
729
VM_OBJECT_ASSERT_WLOCKED(object);
730
731
backing_object = object->backing_object;
732
if ((object->flags & OBJ_SHADOWLIST) != 0) {
733
VM_OBJECT_WLOCK(backing_object);
734
vm_object_backing_remove_locked(object);
735
VM_OBJECT_WUNLOCK(backing_object);
736
} else {
737
object->backing_object = NULL;
738
vm_object_sub_shadow(backing_object);
739
}
740
}
741
742
static void
743
vm_object_backing_insert_locked(vm_object_t object, vm_object_t backing_object)
744
{
745
746
VM_OBJECT_ASSERT_WLOCKED(object);
747
748
atomic_add_int(&backing_object->shadow_count, 1);
749
if ((backing_object->flags & OBJ_ANON) != 0) {
750
VM_OBJECT_ASSERT_WLOCKED(backing_object);
751
LIST_INSERT_HEAD(&backing_object->shadow_head, object,
752
shadow_list);
753
vm_object_set_flag(object, OBJ_SHADOWLIST);
754
}
755
object->backing_object = backing_object;
756
}
757
758
static void
759
vm_object_backing_insert(vm_object_t object, vm_object_t backing_object)
760
{
761
762
VM_OBJECT_ASSERT_WLOCKED(object);
763
764
if ((backing_object->flags & OBJ_ANON) != 0) {
765
VM_OBJECT_WLOCK(backing_object);
766
vm_object_backing_insert_locked(object, backing_object);
767
VM_OBJECT_WUNLOCK(backing_object);
768
} else {
769
object->backing_object = backing_object;
770
atomic_add_int(&backing_object->shadow_count, 1);
771
}
772
}
773
774
/*
775
* Insert an object into a backing_object's shadow list with an additional
776
* reference to the backing_object added.
777
*/
778
static void
779
vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
780
{
781
782
VM_OBJECT_ASSERT_WLOCKED(object);
783
784
if ((backing_object->flags & OBJ_ANON) != 0) {
785
VM_OBJECT_WLOCK(backing_object);
786
KASSERT((backing_object->flags & OBJ_DEAD) == 0,
787
("shadowing dead anonymous object"));
788
vm_object_reference_locked(backing_object);
789
vm_object_backing_insert_locked(object, backing_object);
790
vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
791
VM_OBJECT_WUNLOCK(backing_object);
792
} else {
793
vm_object_reference(backing_object);
794
atomic_add_int(&backing_object->shadow_count, 1);
795
object->backing_object = backing_object;
796
}
797
}
798
799
/*
800
* Transfer a backing reference from backing_object to object.
801
*/
802
static void
803
vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
804
{
805
vm_object_t new_backing_object;
806
807
/*
808
* Note that the reference to backing_object->backing_object
809
* moves from within backing_object to within object.
810
*/
811
vm_object_backing_remove_locked(object);
812
new_backing_object = backing_object->backing_object;
813
if (new_backing_object == NULL)
814
return;
815
if ((new_backing_object->flags & OBJ_ANON) != 0) {
816
VM_OBJECT_WLOCK(new_backing_object);
817
vm_object_backing_remove_locked(backing_object);
818
vm_object_backing_insert_locked(object, new_backing_object);
819
VM_OBJECT_WUNLOCK(new_backing_object);
820
} else {
821
/*
822
* shadow_count for new_backing_object is left
823
* unchanged, its reference provided by backing_object
824
* is replaced by object.
825
*/
826
object->backing_object = new_backing_object;
827
backing_object->backing_object = NULL;
828
}
829
}
830
831
/*
832
* Wait for a concurrent collapse to settle.
833
*/
834
static void
835
vm_object_collapse_wait(vm_object_t object)
836
{
837
838
VM_OBJECT_ASSERT_WLOCKED(object);
839
840
while ((object->flags & OBJ_COLLAPSING) != 0) {
841
vm_object_pip_wait(object, "vmcolwait");
842
counter_u64_add(object_collapse_waits, 1);
843
}
844
}
845
846
/*
847
* Waits for a backing object to clear a pending collapse and returns
848
* it locked if it is an ANON object.
849
*/
850
static vm_object_t
851
vm_object_backing_collapse_wait(vm_object_t object)
852
{
853
vm_object_t backing_object;
854
855
VM_OBJECT_ASSERT_WLOCKED(object);
856
857
for (;;) {
858
backing_object = object->backing_object;
859
if (backing_object == NULL ||
860
(backing_object->flags & OBJ_ANON) == 0)
861
return (NULL);
862
VM_OBJECT_WLOCK(backing_object);
863
if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
864
break;
865
VM_OBJECT_WUNLOCK(object);
866
vm_object_pip_sleep(backing_object, "vmbckwait");
867
counter_u64_add(object_collapse_waits, 1);
868
VM_OBJECT_WLOCK(object);
869
}
870
return (backing_object);
871
}
872
873
/*
874
* vm_object_terminate_single_page removes a pageable page from the object,
875
* and removes it from the paging queues and frees it, if it is not wired.
876
* It is invoked via callback from vm_object_terminate_pages.
877
*/
878
static void
879
vm_object_terminate_single_page(vm_page_t p, void *objectv)
880
{
881
vm_object_t object __diagused = objectv;
882
883
vm_page_assert_unbusied(p);
884
KASSERT(p->object == object &&
885
(p->ref_count & VPRC_OBJREF) != 0,
886
("%s: page %p is inconsistent", __func__, p));
887
p->object = NULL;
888
if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) {
889
KASSERT((object->flags & OBJ_UNMANAGED) != 0 ||
890
vm_page_astate_load(p).queue != PQ_NONE,
891
("%s: page %p does not belong to a queue", __func__, p));
892
VM_CNT_INC(v_pfree);
893
vm_page_free(p);
894
}
895
}
896
897
/*
898
* vm_object_terminate_pages removes any remaining pageable pages
899
* from the object and resets the object to an empty state.
900
*/
901
static void
902
vm_object_terminate_pages(vm_object_t object)
903
{
904
VM_OBJECT_ASSERT_WLOCKED(object);
905
906
/*
907
* If the object contained any pages, then reset it to an empty state.
908
* Rather than incrementally removing each page from the object, the
909
* page and object are reset to any empty state.
910
*/
911
if (object->resident_page_count == 0)
912
return;
913
914
vm_radix_reclaim_callback(&object->rtree,
915
vm_object_terminate_single_page, object);
916
object->resident_page_count = 0;
917
if (object->type == OBJT_VNODE)
918
vdrop(object->handle);
919
}
920
921
/*
922
* vm_object_terminate actually destroys the specified object, freeing
923
* up all previously used resources.
924
*
925
* The object must be locked.
926
* This routine may block.
927
*/
928
void
929
vm_object_terminate(vm_object_t object)
930
{
931
932
VM_OBJECT_ASSERT_WLOCKED(object);
933
KASSERT((object->flags & OBJ_DEAD) != 0,
934
("terminating non-dead obj %p", object));
935
KASSERT((object->flags & OBJ_COLLAPSING) == 0,
936
("terminating collapsing obj %p", object));
937
KASSERT(object->backing_object == NULL,
938
("terminating shadow obj %p", object));
939
940
/*
941
* Wait for the pageout daemon and other current users to be
942
* done with the object. Note that new paging_in_progress
943
* users can come after this wait, but they must check
944
* OBJ_DEAD flag set (without unlocking the object), and avoid
945
* the object being terminated.
946
*/
947
vm_object_pip_wait(object, "objtrm");
948
949
KASSERT(object->ref_count == 0,
950
("vm_object_terminate: object with references, ref_count=%d",
951
object->ref_count));
952
953
if ((object->flags & OBJ_PG_DTOR) == 0)
954
vm_object_terminate_pages(object);
955
956
#if VM_NRESERVLEVEL > 0
957
if (__predict_false(!LIST_EMPTY(&object->rvq)))
958
vm_reserv_break_all(object);
959
#endif
960
961
KASSERT(object->cred == NULL || (object->flags & OBJ_SWAP) != 0,
962
("%s: non-swap obj %p has cred", __func__, object));
963
964
/*
965
* Let the pager know object is dead.
966
*/
967
vm_pager_deallocate(object);
968
VM_OBJECT_WUNLOCK(object);
969
970
vm_object_destroy(object);
971
}
972
973
/*
974
* Make the page read-only so that we can clear the object flags. However, if
975
* this is a nosync mmap then the object is likely to stay dirty so do not
976
* mess with the page and do not clear the object flags. Returns TRUE if the
977
* page should be flushed, and FALSE otherwise.
978
*/
979
static boolean_t
980
vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean)
981
{
982
983
vm_page_assert_busied(p);
984
985
/*
986
* If we have been asked to skip nosync pages and this is a
987
* nosync page, skip it. Note that the object flags were not
988
* cleared in this case so we do not have to set them.
989
*/
990
if ((flags & OBJPC_NOSYNC) != 0 && (p->a.flags & PGA_NOSYNC) != 0) {
991
*allclean = FALSE;
992
return (FALSE);
993
} else {
994
pmap_remove_write(p);
995
return (p->dirty != 0);
996
}
997
}
998
999
static int
1000
vm_object_page_clean_flush(struct pctrie_iter *pages, vm_page_t p,
1001
int pagerflags, int flags, boolean_t *allclean, bool *eio)
1002
{
1003
vm_page_t ma[vm_pageout_page_count];
1004
int count, runlen;
1005
1006
vm_page_assert_xbusied(p);
1007
ma[0] = p;
1008
runlen = vm_radix_iter_lookup_range(pages, p->pindex + 1,
1009
&ma[1], vm_pageout_page_count - 1);
1010
for (count = 1; count <= runlen; count++) {
1011
p = ma[count];
1012
if (vm_page_tryxbusy(p) == 0)
1013
break;
1014
if (!vm_object_page_remove_write(p, flags, allclean)) {
1015
vm_page_xunbusy(p);
1016
break;
1017
}
1018
}
1019
1020
return (vm_pageout_flush(ma, count, pagerflags, eio));
1021
}
1022
1023
/*
1024
* vm_object_page_clean
1025
*
1026
* Clean all dirty pages in the specified range of object. Leaves page
1027
* on whatever queue it is currently on. If NOSYNC is set then do not
1028
* write out pages with PGA_NOSYNC set (originally comes from MAP_NOSYNC),
1029
* leaving the object dirty.
1030
*
1031
* For swap objects backing tmpfs regular files, do not flush anything,
1032
* but remove write protection on the mapped pages to update mtime through
1033
* mmaped writes.
1034
*
1035
* When stuffing pages asynchronously, allow clustering. XXX we need a
1036
* synchronous clustering mode implementation.
1037
*
1038
* Odd semantics: if start == end, we clean everything.
1039
*
1040
* The object must be locked.
1041
*
1042
* Returns FALSE if some page from the range was not written, as
1043
* reported by the pager, and TRUE otherwise.
1044
*/
1045
boolean_t
1046
vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
1047
int flags)
1048
{
1049
struct pctrie_iter pages;
1050
vm_page_t np, p;
1051
vm_pindex_t pi, tend, tstart;
1052
int curgeneration, n, pagerflags;
1053
boolean_t res, allclean;
1054
bool eio;
1055
1056
VM_OBJECT_ASSERT_WLOCKED(object);
1057
1058
if (!vm_object_mightbedirty(object) || object->resident_page_count == 0)
1059
return (TRUE);
1060
1061
pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
1062
VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
1063
pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
1064
1065
tstart = OFF_TO_IDX(start);
1066
tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
1067
allclean = tstart == 0 && tend >= object->size;
1068
res = TRUE;
1069
vm_page_iter_init(&pages, object);
1070
1071
rescan:
1072
curgeneration = object->generation;
1073
1074
for (p = vm_radix_iter_lookup_ge(&pages, tstart); p != NULL; p = np) {
1075
pi = p->pindex;
1076
if (pi >= tend)
1077
break;
1078
if (vm_page_none_valid(p)) {
1079
np = vm_radix_iter_step(&pages);
1080
continue;
1081
}
1082
if (!vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL)) {
1083
pctrie_iter_reset(&pages);
1084
if (object->generation != curgeneration &&
1085
(flags & OBJPC_SYNC) != 0)
1086
goto rescan;
1087
np = vm_radix_iter_lookup_ge(&pages, pi);
1088
continue;
1089
}
1090
if (!vm_object_page_remove_write(p, flags, &allclean)) {
1091
np = vm_radix_iter_step(&pages);
1092
vm_page_xunbusy(p);
1093
continue;
1094
}
1095
if (object->type == OBJT_VNODE) {
1096
n = vm_object_page_clean_flush(&pages, p, pagerflags,
1097
flags, &allclean, &eio);
1098
pctrie_iter_reset(&pages);
1099
if (eio) {
1100
res = FALSE;
1101
allclean = FALSE;
1102
}
1103
if (object->generation != curgeneration &&
1104
(flags & OBJPC_SYNC) != 0)
1105
goto rescan;
1106
1107
/*
1108
* If the VOP_PUTPAGES() did a truncated write, so
1109
* that even the first page of the run is not fully
1110
* written, vm_pageout_flush() returns 0 as the run
1111
* length. Since the condition that caused truncated
1112
* write may be permanent, e.g. exhausted free space,
1113
* accepting n == 0 would cause an infinite loop.
1114
*
1115
* Forwarding the iterator leaves the unwritten page
1116
* behind, but there is not much we can do there if
1117
* filesystem refuses to write it.
1118
*/
1119
if (n == 0) {
1120
n = 1;
1121
allclean = FALSE;
1122
}
1123
} else {
1124
n = 1;
1125
vm_page_xunbusy(p);
1126
}
1127
np = vm_radix_iter_lookup_ge(&pages, pi + n);
1128
}
1129
#if 0
1130
VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
1131
#endif
1132
1133
/*
1134
* Leave updating cleangeneration for tmpfs objects to tmpfs
1135
* scan. It needs to update mtime, which happens for other
1136
* filesystems during page writeouts.
1137
*/
1138
if (allclean && object->type == OBJT_VNODE)
1139
object->cleangeneration = curgeneration;
1140
return (res);
1141
}
1142
1143
/*
1144
* Note that there is absolutely no sense in writing out
1145
* anonymous objects, so we track down the vnode object
1146
* to write out.
1147
* We invalidate (remove) all pages from the address space
1148
* for semantic correctness.
1149
*
1150
* If the backing object is a device object with unmanaged pages, then any
1151
* mappings to the specified range of pages must be removed before this
1152
* function is called.
1153
*
1154
* Note: certain anonymous maps, such as MAP_NOSYNC maps,
1155
* may start out with a NULL object.
1156
*/
1157
boolean_t
1158
vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
1159
boolean_t syncio, boolean_t invalidate)
1160
{
1161
vm_object_t backing_object;
1162
struct vnode *vp;
1163
struct mount *mp;
1164
int error, flags, fsync_after;
1165
boolean_t res;
1166
1167
if (object == NULL)
1168
return (TRUE);
1169
res = TRUE;
1170
error = 0;
1171
VM_OBJECT_WLOCK(object);
1172
while ((backing_object = object->backing_object) != NULL) {
1173
VM_OBJECT_WLOCK(backing_object);
1174
offset += object->backing_object_offset;
1175
VM_OBJECT_WUNLOCK(object);
1176
object = backing_object;
1177
if (object->size < OFF_TO_IDX(offset + size))
1178
size = IDX_TO_OFF(object->size) - offset;
1179
}
1180
/*
1181
* Flush pages if writing is allowed, invalidate them
1182
* if invalidation requested. Pages undergoing I/O
1183
* will be ignored by vm_object_page_remove().
1184
*
1185
* We cannot lock the vnode and then wait for paging
1186
* to complete without deadlocking against vm_fault.
1187
* Instead we simply call vm_object_page_remove() and
1188
* allow it to block internally on a page-by-page
1189
* basis when it encounters pages undergoing async
1190
* I/O.
1191
*/
1192
if (object->type == OBJT_VNODE &&
1193
vm_object_mightbedirty(object) != 0 &&
1194
((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) {
1195
VM_OBJECT_WUNLOCK(object);
1196
(void)vn_start_write(vp, &mp, V_WAIT);
1197
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1198
if (syncio && !invalidate && offset == 0 &&
1199
atop(size) == object->size) {
1200
/*
1201
* If syncing the whole mapping of the file,
1202
* it is faster to schedule all the writes in
1203
* async mode, also allowing the clustering,
1204
* and then wait for i/o to complete.
1205
*/
1206
flags = 0;
1207
fsync_after = TRUE;
1208
} else {
1209
flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1210
flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0;
1211
fsync_after = FALSE;
1212
}
1213
VM_OBJECT_WLOCK(object);
1214
res = vm_object_page_clean(object, offset, offset + size,
1215
flags);
1216
VM_OBJECT_WUNLOCK(object);
1217
if (fsync_after) {
1218
for (;;) {
1219
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
1220
if (error != ERELOOKUP)
1221
break;
1222
1223
/*
1224
* Allow SU/bufdaemon to handle more
1225
* dependencies in the meantime.
1226
*/
1227
VOP_UNLOCK(vp);
1228
vn_finished_write(mp);
1229
1230
(void)vn_start_write(vp, &mp, V_WAIT);
1231
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1232
}
1233
}
1234
VOP_UNLOCK(vp);
1235
vn_finished_write(mp);
1236
if (error != 0)
1237
res = FALSE;
1238
VM_OBJECT_WLOCK(object);
1239
}
1240
if ((object->type == OBJT_VNODE ||
1241
object->type == OBJT_DEVICE) && invalidate) {
1242
if (object->type == OBJT_DEVICE)
1243
/*
1244
* The option OBJPR_NOTMAPPED must be passed here
1245
* because vm_object_page_remove() cannot remove
1246
* unmanaged mappings.
1247
*/
1248
flags = OBJPR_NOTMAPPED;
1249
else if (old_msync)
1250
flags = 0;
1251
else
1252
flags = OBJPR_CLEANONLY;
1253
vm_object_page_remove(object, OFF_TO_IDX(offset),
1254
OFF_TO_IDX(offset + size + PAGE_MASK), flags);
1255
}
1256
VM_OBJECT_WUNLOCK(object);
1257
return (res);
1258
}
1259
1260
/*
1261
* Determine whether the given advice can be applied to the object. Advice is
1262
* not applied to unmanaged pages since they never belong to page queues, and
1263
* since MADV_FREE is destructive, it can apply only to anonymous pages that
1264
* have been mapped at most once.
1265
*/
1266
static bool
1267
vm_object_advice_applies(vm_object_t object, int advice)
1268
{
1269
1270
if ((object->flags & OBJ_UNMANAGED) != 0)
1271
return (false);
1272
if (advice != MADV_FREE)
1273
return (true);
1274
return ((object->flags & (OBJ_ONEMAPPING | OBJ_ANON)) ==
1275
(OBJ_ONEMAPPING | OBJ_ANON));
1276
}
1277
1278
static void
1279
vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex,
1280
vm_size_t size)
1281
{
1282
1283
if (advice == MADV_FREE)
1284
vm_pager_freespace(object, pindex, size);
1285
}
1286
1287
/*
1288
* vm_object_madvise:
1289
*
1290
* Implements the madvise function at the object/page level.
1291
*
1292
* MADV_WILLNEED (any object)
1293
*
1294
* Activate the specified pages if they are resident.
1295
*
1296
* MADV_DONTNEED (any object)
1297
*
1298
* Deactivate the specified pages if they are resident.
1299
*
1300
* MADV_FREE (OBJT_SWAP objects, OBJ_ONEMAPPING only)
1301
*
1302
* Deactivate and clean the specified pages if they are
1303
* resident. This permits the process to reuse the pages
1304
* without faulting or the kernel to reclaim the pages
1305
* without I/O.
1306
*/
1307
void
1308
vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
1309
int advice)
1310
{
1311
struct pctrie_iter pages;
1312
vm_pindex_t tpindex;
1313
vm_object_t backing_object, tobject;
1314
vm_page_t m, tm;
1315
1316
if (object == NULL)
1317
return;
1318
1319
vm_page_iter_init(&pages, object);
1320
relookup:
1321
VM_OBJECT_WLOCK(object);
1322
if (!vm_object_advice_applies(object, advice)) {
1323
VM_OBJECT_WUNLOCK(object);
1324
return;
1325
}
1326
for (m = vm_radix_iter_lookup_ge(&pages, pindex); pindex < end;
1327
pindex++) {
1328
tobject = object;
1329
1330
/*
1331
* If the next page isn't resident in the top-level object, we
1332
* need to search the shadow chain. When applying MADV_FREE, we
1333
* take care to release any swap space used to store
1334
* non-resident pages.
1335
*/
1336
if (m == NULL || pindex < m->pindex) {
1337
/*
1338
* Optimize a common case: if the top-level object has
1339
* no backing object, we can skip over the non-resident
1340
* range in constant time.
1341
*/
1342
if (object->backing_object == NULL) {
1343
tpindex = (m != NULL && m->pindex < end) ?
1344
m->pindex : end;
1345
vm_object_madvise_freespace(object, advice,
1346
pindex, tpindex - pindex);
1347
if ((pindex = tpindex) == end)
1348
break;
1349
goto next_page;
1350
}
1351
1352
tpindex = pindex;
1353
do {
1354
vm_object_madvise_freespace(tobject, advice,
1355
tpindex, 1);
1356
/*
1357
* Prepare to search the next object in the
1358
* chain.
1359
*/
1360
backing_object = tobject->backing_object;
1361
if (backing_object == NULL)
1362
goto next_pindex;
1363
VM_OBJECT_WLOCK(backing_object);
1364
tpindex +=
1365
OFF_TO_IDX(tobject->backing_object_offset);
1366
if (tobject != object)
1367
VM_OBJECT_WUNLOCK(tobject);
1368
tobject = backing_object;
1369
if (!vm_object_advice_applies(tobject, advice))
1370
goto next_pindex;
1371
} while ((tm = vm_page_lookup(tobject, tpindex)) ==
1372
NULL);
1373
} else {
1374
next_page:
1375
tm = m;
1376
m = vm_radix_iter_step(&pages);
1377
}
1378
1379
/*
1380
* If the page is not in a normal state, skip it. The page
1381
* can not be invalidated while the object lock is held.
1382
*/
1383
if (!vm_page_all_valid(tm) || vm_page_wired(tm))
1384
goto next_pindex;
1385
KASSERT((tm->flags & PG_FICTITIOUS) == 0,
1386
("vm_object_madvise: page %p is fictitious", tm));
1387
KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
1388
("vm_object_madvise: page %p is not managed", tm));
1389
if (vm_page_tryxbusy(tm) == 0) {
1390
if (object != tobject)
1391
VM_OBJECT_WUNLOCK(object);
1392
if (advice == MADV_WILLNEED) {
1393
/*
1394
* Reference the page before unlocking and
1395
* sleeping so that the page daemon is less
1396
* likely to reclaim it.
1397
*/
1398
vm_page_aflag_set(tm, PGA_REFERENCED);
1399
}
1400
if (!vm_page_busy_sleep(tm, "madvpo", 0))
1401
VM_OBJECT_WUNLOCK(tobject);
1402
pctrie_iter_reset(&pages);
1403
goto relookup;
1404
}
1405
vm_page_advise(tm, advice);
1406
vm_page_xunbusy(tm);
1407
vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
1408
next_pindex:
1409
if (tobject != object)
1410
VM_OBJECT_WUNLOCK(tobject);
1411
}
1412
VM_OBJECT_WUNLOCK(object);
1413
}
1414
1415
/*
1416
* vm_object_shadow:
1417
*
1418
* Create a new object which is backed by the
1419
* specified existing object range. The source
1420
* object reference is deallocated.
1421
*
1422
* The new object and offset into that object
1423
* are returned in the source parameters.
1424
*/
1425
void
1426
vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length,
1427
struct ucred *cred, bool shared)
1428
{
1429
vm_object_t source;
1430
vm_object_t result;
1431
1432
source = *object;
1433
1434
/*
1435
* Don't create the new object if the old object isn't shared.
1436
*
1437
* If we hold the only reference we can guarantee that it won't
1438
* increase while we have the map locked. Otherwise the race is
1439
* harmless and we will end up with an extra shadow object that
1440
* will be collapsed later.
1441
*/
1442
if (source != NULL && source->ref_count == 1 &&
1443
(source->flags & OBJ_ANON) != 0)
1444
return;
1445
1446
/*
1447
* Allocate a new object with the given length.
1448
*/
1449
result = vm_object_allocate_anon(atop(length), source, cred);
1450
1451
/*
1452
* Store the offset into the source object, and fix up the offset into
1453
* the new object.
1454
*/
1455
result->backing_object_offset = *offset;
1456
1457
if (shared || source != NULL) {
1458
VM_OBJECT_WLOCK(result);
1459
1460
/*
1461
* The new object shadows the source object, adding a
1462
* reference to it. Our caller changes his reference
1463
* to point to the new object, removing a reference to
1464
* the source object. Net result: no change of
1465
* reference count, unless the caller needs to add one
1466
* more reference due to forking a shared map entry.
1467
*/
1468
if (shared) {
1469
vm_object_reference_locked(result);
1470
vm_object_clear_flag(result, OBJ_ONEMAPPING);
1471
}
1472
1473
/*
1474
* Try to optimize the result object's page color when
1475
* shadowing in order to maintain page coloring
1476
* consistency in the combined shadowed object.
1477
*/
1478
if (source != NULL) {
1479
vm_object_backing_insert(result, source);
1480
result->domain = source->domain;
1481
#if VM_NRESERVLEVEL > 0
1482
vm_object_set_flag(result,
1483
(source->flags & OBJ_COLORED));
1484
result->pg_color = (source->pg_color +
1485
OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER -
1486
1)) - 1);
1487
#endif
1488
}
1489
VM_OBJECT_WUNLOCK(result);
1490
}
1491
1492
/*
1493
* Return the new things
1494
*/
1495
*offset = 0;
1496
*object = result;
1497
}
1498
1499
/*
1500
* vm_object_split:
1501
*
1502
* Split the pages in a map entry into a new object. This affords
1503
* easier removal of unused pages, and keeps object inheritance from
1504
* being a negative impact on memory usage.
1505
*/
1506
void
1507
vm_object_split(vm_map_entry_t entry)
1508
{
1509
struct pctrie_iter pages;
1510
vm_page_t m;
1511
vm_object_t orig_object, new_object, backing_object;
1512
struct ucred *cred;
1513
vm_pindex_t offidxstart;
1514
vm_size_t size;
1515
1516
orig_object = entry->object.vm_object;
1517
KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
1518
("vm_object_split: Splitting object with multiple mappings."));
1519
if ((orig_object->flags & OBJ_ANON) == 0)
1520
return;
1521
if (orig_object->ref_count <= 1)
1522
return;
1523
VM_OBJECT_WUNLOCK(orig_object);
1524
1525
offidxstart = OFF_TO_IDX(entry->offset);
1526
size = atop(entry->end - entry->start);
1527
if (orig_object->cred != NULL) {
1528
/*
1529
* vm_object_split() is currently called from
1530
* vmspace_fork(), and it might be tempting to add the
1531
* charge for the split object to fork_charge. But
1532
* fork_charge is discharged on error when the copied
1533
* vmspace is destroyed. Since the split object is
1534
* inserted into the shadow hierarchy serving the
1535
* source vm_map, it is kept even after the
1536
* unsuccessful fork, meaning that we have to force
1537
* its swap usage.
1538
*/
1539
cred = curthread->td_ucred;
1540
crhold(cred);
1541
swap_reserve_force_by_cred(ptoa(size), cred);
1542
} else {
1543
cred = NULL;
1544
}
1545
1546
new_object = vm_object_allocate_anon(size, orig_object, cred);
1547
1548
/*
1549
* We must wait for the orig_object to complete any in-progress
1550
* collapse so that the swap blocks are stable below. The
1551
* additional reference on backing_object by new object will
1552
* prevent further collapse operations until split completes.
1553
*/
1554
VM_OBJECT_WLOCK(orig_object);
1555
vm_object_collapse_wait(orig_object);
1556
1557
/*
1558
* At this point, the new object is still private, so the order in
1559
* which the original and new objects are locked does not matter.
1560
*/
1561
VM_OBJECT_WLOCK(new_object);
1562
new_object->domain = orig_object->domain;
1563
backing_object = orig_object->backing_object;
1564
if (backing_object != NULL) {
1565
vm_object_backing_insert_ref(new_object, backing_object);
1566
new_object->backing_object_offset =
1567
orig_object->backing_object_offset + entry->offset;
1568
}
1569
1570
/*
1571
* Mark the split operation so that swap_pager_getpages() knows
1572
* that the object is in transition.
1573
*/
1574
vm_object_set_flag(orig_object, OBJ_SPLIT);
1575
vm_page_iter_limit_init(&pages, orig_object, offidxstart + size);
1576
retry:
1577
KASSERT(pctrie_iter_is_reset(&pages),
1578
("%s: pctrie_iter not reset for retry", __func__));
1579
for (m = vm_radix_iter_lookup_ge(&pages, offidxstart); m != NULL;
1580
m = vm_radix_iter_step(&pages)) {
1581
/*
1582
* We must wait for pending I/O to complete before we can
1583
* rename the page.
1584
*
1585
* We do not have to VM_PROT_NONE the page as mappings should
1586
* not be changed by this operation.
1587
*/
1588
if (vm_page_tryxbusy(m) == 0) {
1589
VM_OBJECT_WUNLOCK(new_object);
1590
if (vm_page_busy_sleep(m, "spltwt", 0))
1591
VM_OBJECT_WLOCK(orig_object);
1592
pctrie_iter_reset(&pages);
1593
VM_OBJECT_WLOCK(new_object);
1594
goto retry;
1595
}
1596
1597
/*
1598
* If the page was left invalid, it was likely placed there by
1599
* an incomplete fault. Just remove and ignore.
1600
*
1601
* One other possibility is that the map entry is wired, in
1602
* which case we must hang on to the page to avoid leaking it,
1603
* as the map entry owns the wiring. This case can arise if the
1604
* backing object is truncated by the pager.
1605
*/
1606
if (vm_page_none_valid(m) && entry->wired_count == 0) {
1607
if (vm_page_iter_remove(&pages, m))
1608
vm_page_free(m);
1609
continue;
1610
}
1611
1612
/* vm_page_iter_rename() will dirty the page if it is valid. */
1613
if (!vm_page_iter_rename(&pages, m, new_object, m->pindex -
1614
offidxstart)) {
1615
vm_page_xunbusy(m);
1616
VM_OBJECT_WUNLOCK(new_object);
1617
VM_OBJECT_WUNLOCK(orig_object);
1618
vm_radix_wait();
1619
pctrie_iter_reset(&pages);
1620
VM_OBJECT_WLOCK(orig_object);
1621
VM_OBJECT_WLOCK(new_object);
1622
goto retry;
1623
}
1624
1625
#if VM_NRESERVLEVEL > 0
1626
/*
1627
* If some of the reservation's allocated pages remain with
1628
* the original object, then transferring the reservation to
1629
* the new object is neither particularly beneficial nor
1630
* particularly harmful as compared to leaving the reservation
1631
* with the original object. If, however, all of the
1632
* reservation's allocated pages are transferred to the new
1633
* object, then transferring the reservation is typically
1634
* beneficial. Determining which of these two cases applies
1635
* would be more costly than unconditionally renaming the
1636
* reservation.
1637
*/
1638
vm_reserv_rename(m, new_object, orig_object, offidxstart);
1639
#endif
1640
}
1641
1642
/*
1643
* swap_pager_copy() can sleep, in which case the orig_object's
1644
* and new_object's locks are released and reacquired.
1645
*/
1646
swap_pager_copy(orig_object, new_object, offidxstart, 0);
1647
vm_page_iter_init(&pages, new_object);
1648
VM_RADIX_FOREACH(m, &pages)
1649
vm_page_xunbusy(m);
1650
1651
vm_object_clear_flag(orig_object, OBJ_SPLIT);
1652
VM_OBJECT_WUNLOCK(orig_object);
1653
VM_OBJECT_WUNLOCK(new_object);
1654
entry->object.vm_object = new_object;
1655
entry->offset = 0LL;
1656
vm_object_deallocate(orig_object);
1657
VM_OBJECT_WLOCK(new_object);
1658
}
1659
1660
static vm_page_t
1661
vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object,
1662
vm_page_t p)
1663
{
1664
vm_object_t backing_object;
1665
1666
VM_OBJECT_ASSERT_WLOCKED(object);
1667
backing_object = object->backing_object;
1668
VM_OBJECT_ASSERT_WLOCKED(backing_object);
1669
1670
KASSERT(p == NULL || p->object == object || p->object == backing_object,
1671
("invalid ownership %p %p %p", p, object, backing_object));
1672
/* The page is only NULL when rename fails. */
1673
if (p == NULL) {
1674
VM_OBJECT_WUNLOCK(object);
1675
VM_OBJECT_WUNLOCK(backing_object);
1676
vm_radix_wait();
1677
VM_OBJECT_WLOCK(object);
1678
} else if (p->object == object) {
1679
VM_OBJECT_WUNLOCK(backing_object);
1680
if (vm_page_busy_sleep(p, "vmocol", 0))
1681
VM_OBJECT_WLOCK(object);
1682
} else {
1683
VM_OBJECT_WUNLOCK(object);
1684
if (!vm_page_busy_sleep(p, "vmocol", 0))
1685
VM_OBJECT_WUNLOCK(backing_object);
1686
VM_OBJECT_WLOCK(object);
1687
}
1688
VM_OBJECT_WLOCK(backing_object);
1689
vm_page_iter_init(pages, backing_object);
1690
return (vm_radix_iter_lookup_ge(pages, 0));
1691
}
1692
1693
static void
1694
vm_object_collapse_scan(vm_object_t object)
1695
{
1696
struct pctrie_iter pages;
1697
vm_object_t backing_object;
1698
vm_page_t next, p, pp;
1699
vm_pindex_t backing_offset_index, new_pindex;
1700
1701
VM_OBJECT_ASSERT_WLOCKED(object);
1702
VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
1703
1704
backing_object = object->backing_object;
1705
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1706
1707
/*
1708
* Our scan
1709
*/
1710
vm_page_iter_init(&pages, backing_object);
1711
for (p = vm_radix_iter_lookup_ge(&pages, 0); p != NULL; p = next) {
1712
/*
1713
* Check for busy page
1714
*/
1715
if (vm_page_tryxbusy(p) == 0) {
1716
next = vm_object_collapse_scan_wait(&pages, object, p);
1717
continue;
1718
}
1719
1720
KASSERT(object->backing_object == backing_object,
1721
("vm_object_collapse_scan: backing object mismatch %p != %p",
1722
object->backing_object, backing_object));
1723
KASSERT(p->object == backing_object,
1724
("vm_object_collapse_scan: object mismatch %p != %p",
1725
p->object, backing_object));
1726
1727
if (p->pindex < backing_offset_index || object->size <=
1728
(new_pindex = p->pindex - backing_offset_index)) {
1729
vm_pager_freespace(backing_object, p->pindex, 1);
1730
1731
KASSERT(!pmap_page_is_mapped(p),
1732
("freeing mapped page %p", p));
1733
if (vm_page_iter_remove(&pages, p))
1734
vm_page_free(p);
1735
next = vm_radix_iter_step(&pages);
1736
continue;
1737
}
1738
1739
if (!vm_page_all_valid(p)) {
1740
KASSERT(!pmap_page_is_mapped(p),
1741
("freeing mapped page %p", p));
1742
if (vm_page_iter_remove(&pages, p))
1743
vm_page_free(p);
1744
next = vm_radix_iter_step(&pages);
1745
continue;
1746
}
1747
1748
pp = vm_page_lookup(object, new_pindex);
1749
if (pp != NULL && vm_page_tryxbusy(pp) == 0) {
1750
vm_page_xunbusy(p);
1751
/*
1752
* The page in the parent is busy and possibly not
1753
* (yet) valid. Until its state is finalized by the
1754
* busy bit owner, we can't tell whether it shadows the
1755
* original page.
1756
*/
1757
next = vm_object_collapse_scan_wait(&pages, object, pp);
1758
continue;
1759
}
1760
1761
if (pp != NULL && vm_page_none_valid(pp)) {
1762
/*
1763
* The page was invalid in the parent. Likely placed
1764
* there by an incomplete fault. Just remove and
1765
* ignore. p can replace it.
1766
*/
1767
if (vm_page_remove(pp))
1768
vm_page_free(pp);
1769
pp = NULL;
1770
}
1771
1772
if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
1773
NULL)) {
1774
/*
1775
* The page already exists in the parent OR swap exists
1776
* for this location in the parent. Leave the parent's
1777
* page alone. Destroy the original page from the
1778
* backing object.
1779
*/
1780
vm_pager_freespace(backing_object, p->pindex, 1);
1781
KASSERT(!pmap_page_is_mapped(p),
1782
("freeing mapped page %p", p));
1783
if (pp != NULL)
1784
vm_page_xunbusy(pp);
1785
if (vm_page_iter_remove(&pages, p))
1786
vm_page_free(p);
1787
next = vm_radix_iter_step(&pages);
1788
continue;
1789
}
1790
1791
/*
1792
* Page does not exist in parent, rename the page from the
1793
* backing object to the main object.
1794
*
1795
* If the page was mapped to a process, it can remain mapped
1796
* through the rename. vm_page_iter_rename() will dirty the
1797
* page.
1798
*/
1799
if (!vm_page_iter_rename(&pages, p, object, new_pindex)) {
1800
vm_page_xunbusy(p);
1801
next = vm_object_collapse_scan_wait(&pages, object,
1802
NULL);
1803
continue;
1804
}
1805
1806
/* Use the old pindex to free the right page. */
1807
vm_pager_freespace(backing_object, new_pindex +
1808
backing_offset_index, 1);
1809
1810
#if VM_NRESERVLEVEL > 0
1811
/*
1812
* Rename the reservation.
1813
*/
1814
vm_reserv_rename(p, object, backing_object,
1815
backing_offset_index);
1816
#endif
1817
vm_page_xunbusy(p);
1818
next = vm_radix_iter_step(&pages);
1819
}
1820
return;
1821
}
1822
1823
/*
1824
* vm_object_collapse:
1825
*
1826
* Collapse an object with the object backing it.
1827
* Pages in the backing object are moved into the
1828
* parent, and the backing object is deallocated.
1829
*/
1830
void
1831
vm_object_collapse(vm_object_t object)
1832
{
1833
vm_object_t backing_object, new_backing_object;
1834
1835
VM_OBJECT_ASSERT_WLOCKED(object);
1836
1837
while (TRUE) {
1838
KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
1839
("collapsing invalid object"));
1840
1841
/*
1842
* Wait for the backing_object to finish any pending
1843
* collapse so that the caller sees the shortest possible
1844
* shadow chain.
1845
*/
1846
backing_object = vm_object_backing_collapse_wait(object);
1847
if (backing_object == NULL)
1848
return;
1849
1850
KASSERT(object->ref_count > 0 &&
1851
object->ref_count > atomic_load_int(&object->shadow_count),
1852
("collapse with invalid ref %d or shadow %d count.",
1853
object->ref_count, atomic_load_int(&object->shadow_count)));
1854
KASSERT((backing_object->flags &
1855
(OBJ_COLLAPSING | OBJ_DEAD)) == 0,
1856
("vm_object_collapse: Backing object already collapsing."));
1857
KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
1858
("vm_object_collapse: object is already collapsing."));
1859
1860
/*
1861
* We know that we can either collapse the backing object if
1862
* the parent is the only reference to it, or (perhaps) have
1863
* the parent bypass the object if the parent happens to shadow
1864
* all the resident pages in the entire backing object.
1865
*/
1866
if (backing_object->ref_count == 1) {
1867
KASSERT(atomic_load_int(&backing_object->shadow_count)
1868
== 1,
1869
("vm_object_collapse: shadow_count: %d",
1870
atomic_load_int(&backing_object->shadow_count)));
1871
vm_object_pip_add(object, 1);
1872
vm_object_set_flag(object, OBJ_COLLAPSING);
1873
vm_object_pip_add(backing_object, 1);
1874
vm_object_set_flag(backing_object, OBJ_DEAD);
1875
1876
/*
1877
* If there is exactly one reference to the backing
1878
* object, we can collapse it into the parent.
1879
*/
1880
vm_object_collapse_scan(object);
1881
1882
/*
1883
* Move the pager from backing_object to object.
1884
*
1885
* swap_pager_copy() can sleep, in which case the
1886
* backing_object's and object's locks are released and
1887
* reacquired.
1888
*/
1889
swap_pager_copy(backing_object, object,
1890
OFF_TO_IDX(object->backing_object_offset), TRUE);
1891
1892
/*
1893
* Object now shadows whatever backing_object did.
1894
*/
1895
vm_object_clear_flag(object, OBJ_COLLAPSING);
1896
vm_object_backing_transfer(object, backing_object);
1897
object->backing_object_offset +=
1898
backing_object->backing_object_offset;
1899
VM_OBJECT_WUNLOCK(object);
1900
vm_object_pip_wakeup(object);
1901
1902
/*
1903
* Discard backing_object.
1904
*
1905
* Since the backing object has no pages, no pager left,
1906
* and no object references within it, all that is
1907
* necessary is to dispose of it.
1908
*/
1909
KASSERT(backing_object->ref_count == 1, (
1910
"backing_object %p was somehow re-referenced during collapse!",
1911
backing_object));
1912
vm_object_pip_wakeup(backing_object);
1913
(void)refcount_release(&backing_object->ref_count);
1914
umtx_shm_object_terminated(backing_object);
1915
vm_object_terminate(backing_object);
1916
counter_u64_add(object_collapses, 1);
1917
VM_OBJECT_WLOCK(object);
1918
} else {
1919
/*
1920
* If we do not entirely shadow the backing object,
1921
* there is nothing we can do so we give up.
1922
*
1923
* The object lock and backing_object lock must not
1924
* be dropped during this sequence.
1925
*/
1926
if (!swap_pager_scan_all_shadowed(object)) {
1927
VM_OBJECT_WUNLOCK(backing_object);
1928
break;
1929
}
1930
1931
/*
1932
* Make the parent shadow the next object in the
1933
* chain. Deallocating backing_object will not remove
1934
* it, since its reference count is at least 2.
1935
*/
1936
vm_object_backing_remove_locked(object);
1937
new_backing_object = backing_object->backing_object;
1938
if (new_backing_object != NULL) {
1939
vm_object_backing_insert_ref(object,
1940
new_backing_object);
1941
object->backing_object_offset +=
1942
backing_object->backing_object_offset;
1943
}
1944
1945
/*
1946
* Drop the reference count on backing_object. Since
1947
* its ref_count was at least 2, it will not vanish.
1948
*/
1949
(void)refcount_release(&backing_object->ref_count);
1950
KASSERT(backing_object->ref_count >= 1, (
1951
"backing_object %p was somehow dereferenced during collapse!",
1952
backing_object));
1953
VM_OBJECT_WUNLOCK(backing_object);
1954
counter_u64_add(object_bypasses, 1);
1955
}
1956
1957
/*
1958
* Try again with this object's new backing object.
1959
*/
1960
}
1961
}
1962
1963
/*
1964
* vm_object_page_remove:
1965
*
1966
* For the given object, either frees or invalidates each of the
1967
* specified pages. In general, a page is freed. However, if a page is
1968
* wired for any reason other than the existence of a managed, wired
1969
* mapping, then it may be invalidated but not removed from the object.
1970
* Pages are specified by the given range ["start", "end") and the option
1971
* OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range
1972
* extends from "start" to the end of the object. If the option
1973
* OBJPR_CLEANONLY is specified, then only the non-dirty pages within the
1974
* specified range are affected. If the option OBJPR_NOTMAPPED is
1975
* specified, then the pages within the specified range must have no
1976
* mappings. Otherwise, if this option is not specified, any mappings to
1977
* the specified pages are removed before the pages are freed or
1978
* invalidated.
1979
*
1980
* In general, this operation should only be performed on objects that
1981
* contain managed pages. There are, however, two exceptions. First, it
1982
* is performed on the kernel and kmem objects by vm_map_entry_delete().
1983
* Second, it is used by msync(..., MS_INVALIDATE) to invalidate device-
1984
* backed pages. In both of these cases, the option OBJPR_CLEANONLY must
1985
* not be specified and the option OBJPR_NOTMAPPED must be specified.
1986
*
1987
* The object must be locked.
1988
*/
1989
void
1990
vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1991
int options)
1992
{
1993
struct pctrie_iter pages;
1994
vm_page_t p;
1995
1996
VM_OBJECT_ASSERT_WLOCKED(object);
1997
KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
1998
(options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
1999
("vm_object_page_remove: illegal options for object %p", object));
2000
if (object->resident_page_count == 0)
2001
goto remove_pager;
2002
vm_object_pip_add(object, 1);
2003
vm_page_iter_limit_init(&pages, object, end);
2004
again:
2005
KASSERT(pctrie_iter_is_reset(&pages),
2006
("%s: pctrie_iter not reset for retry", __func__));
2007
for (p = vm_radix_iter_lookup_ge(&pages, start); p != NULL;
2008
p = vm_radix_iter_step(&pages)) {
2009
/*
2010
* Skip invalid pages if asked to do so. Try to avoid acquiring
2011
* the busy lock, as some consumers rely on this to avoid
2012
* deadlocks.
2013
*
2014
* A thread may concurrently transition the page from invalid to
2015
* valid using only the busy lock, so the result of this check
2016
* is immediately stale. It is up to consumers to handle this,
2017
* for instance by ensuring that all invalid->valid transitions
2018
* happen with a mutex held, as may be possible for a
2019
* filesystem.
2020
*/
2021
if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p))
2022
continue;
2023
2024
/*
2025
* If the page is wired for any reason besides the existence
2026
* of managed, wired mappings, then it cannot be freed. For
2027
* example, fictitious pages, which represent device memory,
2028
* are inherently wired and cannot be freed. They can,
2029
* however, be invalidated if the option OBJPR_CLEANONLY is
2030
* not specified.
2031
*/
2032
if (vm_page_tryxbusy(p) == 0) {
2033
if (vm_page_busy_sleep(p, "vmopar", 0))
2034
VM_OBJECT_WLOCK(object);
2035
pctrie_iter_reset(&pages);
2036
goto again;
2037
}
2038
if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p)) {
2039
vm_page_xunbusy(p);
2040
continue;
2041
}
2042
if (vm_page_wired(p)) {
2043
wired:
2044
if ((options & OBJPR_NOTMAPPED) == 0 &&
2045
object->ref_count != 0)
2046
pmap_remove_all(p);
2047
if ((options & OBJPR_CLEANONLY) == 0) {
2048
vm_page_invalid(p);
2049
vm_page_undirty(p);
2050
}
2051
vm_page_xunbusy(p);
2052
continue;
2053
}
2054
KASSERT((p->flags & PG_FICTITIOUS) == 0,
2055
("vm_object_page_remove: page %p is fictitious", p));
2056
if ((options & OBJPR_CLEANONLY) != 0 &&
2057
!vm_page_none_valid(p)) {
2058
if ((options & OBJPR_NOTMAPPED) == 0 &&
2059
object->ref_count != 0 &&
2060
!vm_page_try_remove_write(p))
2061
goto wired;
2062
if (p->dirty != 0) {
2063
vm_page_xunbusy(p);
2064
continue;
2065
}
2066
}
2067
if ((options & OBJPR_NOTMAPPED) == 0 &&
2068
object->ref_count != 0 && !vm_page_try_remove_all(p))
2069
goto wired;
2070
vm_page_iter_free(&pages, p);
2071
}
2072
vm_object_pip_wakeup(object);
2073
2074
remove_pager:
2075
vm_pager_freespace(object, start, (end == 0 ? object->size : end) -
2076
start);
2077
}
2078
2079
/*
2080
* vm_object_page_noreuse:
2081
*
2082
* For the given object, attempt to move the specified pages to
2083
* the head of the inactive queue. This bypasses regular LRU
2084
* operation and allows the pages to be reused quickly under memory
2085
* pressure. If a page is wired for any reason, then it will not
2086
* be queued. Pages are specified by the range ["start", "end").
2087
* As a special case, if "end" is zero, then the range extends from
2088
* "start" to the end of the object.
2089
*
2090
* This operation should only be performed on objects that
2091
* contain non-fictitious, managed pages.
2092
*
2093
* The object must be locked.
2094
*/
2095
void
2096
vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
2097
{
2098
struct pctrie_iter pages;
2099
vm_page_t p;
2100
2101
VM_OBJECT_ASSERT_LOCKED(object);
2102
KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
2103
("vm_object_page_noreuse: illegal object %p", object));
2104
if (object->resident_page_count == 0)
2105
return;
2106
2107
vm_page_iter_limit_init(&pages, object, end);
2108
VM_RADIX_FOREACH_FROM(p, &pages, start)
2109
vm_page_deactivate_noreuse(p);
2110
}
2111
2112
/*
2113
* Populate the specified range of the object with valid pages. Returns
2114
* TRUE if the range is successfully populated and FALSE otherwise.
2115
*
2116
* Note: This function should be optimized to pass a larger array of
2117
* pages to vm_pager_get_pages() before it is applied to a non-
2118
* OBJT_DEVICE object.
2119
*
2120
* The object must be locked.
2121
*/
2122
boolean_t
2123
vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
2124
{
2125
struct pctrie_iter pages;
2126
vm_page_t m;
2127
vm_pindex_t pindex;
2128
int rv;
2129
2130
vm_page_iter_init(&pages, object);
2131
VM_OBJECT_ASSERT_WLOCKED(object);
2132
for (pindex = start; pindex < end; pindex++) {
2133
rv = vm_page_grab_valid_iter(&m, object, pindex,
2134
VM_ALLOC_NORMAL, &pages);
2135
if (rv != VM_PAGER_OK)
2136
break;
2137
2138
/*
2139
* Keep "m" busy because a subsequent iteration may unlock
2140
* the object.
2141
*/
2142
}
2143
if (pindex > start) {
2144
pages.limit = pindex;
2145
VM_RADIX_FORALL_FROM(m, &pages, start)
2146
vm_page_xunbusy(m);
2147
}
2148
return (pindex == end);
2149
}
2150
2151
/*
2152
* Routine: vm_object_coalesce
2153
* Function: Coalesces two objects backing up adjoining
2154
* regions of memory into a single object.
2155
*
2156
* returns TRUE if objects were combined.
2157
*
2158
* NOTE: Only works at the moment if the second object is NULL -
2159
* if it's not, which object do we lock first?
2160
*
2161
* Parameters:
2162
* prev_object First object to coalesce
2163
* prev_offset Offset into prev_object
2164
* prev_size Size of reference to prev_object
2165
* next_size Size of reference to the second object
2166
* reserved Indicator that extension region has
2167
* swap accounted for
2168
*
2169
* Conditions:
2170
* The object must *not* be locked.
2171
*/
2172
boolean_t
2173
vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
2174
vm_size_t prev_size, vm_size_t next_size, int cflags)
2175
{
2176
vm_pindex_t next_end, next_pindex;
2177
2178
if (prev_object == NULL)
2179
return (TRUE);
2180
if ((prev_object->flags & OBJ_ANON) == 0)
2181
return (FALSE);
2182
2183
VM_OBJECT_WLOCK(prev_object);
2184
/*
2185
* Try to collapse the object first.
2186
*/
2187
vm_object_collapse(prev_object);
2188
2189
/*
2190
* Can't coalesce if: . more than one reference . paged out . shadows
2191
* another object . has a copy elsewhere (any of which mean that the
2192
* pages not mapped to prev_entry may be in use anyway)
2193
*/
2194
if (prev_object->backing_object != NULL) {
2195
VM_OBJECT_WUNLOCK(prev_object);
2196
return (FALSE);
2197
}
2198
2199
prev_size >>= PAGE_SHIFT;
2200
next_size >>= PAGE_SHIFT;
2201
next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
2202
2203
if (prev_object->ref_count > 1 &&
2204
prev_object->size != next_pindex &&
2205
(prev_object->flags & OBJ_ONEMAPPING) == 0) {
2206
VM_OBJECT_WUNLOCK(prev_object);
2207
return (FALSE);
2208
}
2209
2210
next_end = next_pindex + next_size;
2211
2212
/*
2213
* Account for the charge.
2214
*/
2215
if (prev_object->cred != NULL && (cflags & OBJCO_NO_CHARGE) == 0) {
2216
/*
2217
* If prev_object was charged, then this mapping,
2218
* although not charged now, may become writable
2219
* later. Non-NULL cred in the object would prevent
2220
* swap reservation during enabling of the write
2221
* access, so reserve swap now. Failed reservation
2222
* cause allocation of the separate object for the map
2223
* entry, and swap reservation for this entry is
2224
* managed in appropriate time.
2225
*/
2226
if (next_end > prev_object->size) {
2227
vm_size_t charge = ptoa(next_end - prev_object->size);
2228
2229
if ((cflags & OBJCO_CHARGED) == 0) {
2230
if (!swap_reserve_by_cred(charge,
2231
prev_object->cred)) {
2232
VM_OBJECT_WUNLOCK(prev_object);
2233
return (FALSE);
2234
}
2235
} else if (prev_object->size > next_pindex) {
2236
/*
2237
* The caller charged, but:
2238
* - the object has already accounted for the
2239
* space,
2240
* - and the object end is between previous
2241
* mapping end and next_end.
2242
*/
2243
swap_release_by_cred(ptoa(prev_object->size -
2244
next_pindex), prev_object->cred);
2245
}
2246
} else if ((cflags & OBJCO_CHARGED) != 0) {
2247
/*
2248
* The caller charged, but the object has
2249
* already accounted for the space. Whole new
2250
* mapping charge should be released,
2251
*/
2252
swap_release_by_cred(ptoa(next_size),
2253
prev_object->cred);
2254
}
2255
}
2256
2257
/*
2258
* Remove any pages that may still be in the object from a previous
2259
* deallocation.
2260
*/
2261
if (next_pindex < prev_object->size)
2262
vm_object_page_remove(prev_object, next_pindex, next_end, 0);
2263
2264
/*
2265
* Extend the object if necessary.
2266
*/
2267
if (next_end > prev_object->size)
2268
prev_object->size = next_end;
2269
2270
#ifdef INVARIANTS
2271
/*
2272
* Re-check: there must be no pages in the next range backed
2273
* by prev_entry's object. Otherwise, the resulting
2274
* corruption is same as faulting in a non-zeroed page.
2275
*/
2276
if (vm_check_pg_zero) {
2277
vm_pindex_t pidx;
2278
2279
pidx = swap_pager_seek_data(prev_object, next_pindex);
2280
KASSERT(pidx >= next_end,
2281
("found obj %p pindex %#jx e %#jx %#jx %#jx",
2282
prev_object, pidx, (uintmax_t)prev_offset,
2283
(uintmax_t)prev_size, (uintmax_t)next_size));
2284
}
2285
#endif
2286
2287
VM_OBJECT_WUNLOCK(prev_object);
2288
return (TRUE);
2289
}
2290
2291
/*
2292
* Fill in the m_dst array with up to *rbehind optional pages before m_src[0]
2293
* and up to *rahead optional pages after m_src[count - 1]. In both cases, stop
2294
* the filling-in short on encountering a cached page, an object boundary limit,
2295
* or an allocation error. Update *rbehind and *rahead to indicate the number
2296
* of pages allocated. Copy elements of m_src into array elements from
2297
* m_dst[*rbehind] to m_dst[*rbehind + count -1].
2298
*/
2299
void
2300
vm_object_prepare_buf_pages(vm_object_t object, vm_page_t *ma_dst, int count,
2301
int *rbehind, int *rahead, vm_page_t *ma_src)
2302
{
2303
struct pctrie_iter pages;
2304
vm_pindex_t pindex;
2305
vm_page_t m, mpred, msucc;
2306
2307
vm_page_iter_init(&pages, object);
2308
VM_OBJECT_ASSERT_LOCKED(object);
2309
if (*rbehind != 0) {
2310
m = ma_src[0];
2311
pindex = m->pindex;
2312
mpred = vm_radix_iter_lookup_lt(&pages, pindex);
2313
*rbehind = MIN(*rbehind,
2314
pindex - (mpred != NULL ? mpred->pindex + 1 : 0));
2315
for (int i = 0; i < *rbehind; i++) {
2316
m = vm_page_alloc_iter(object, pindex - i - 1,
2317
VM_ALLOC_NORMAL, &pages);
2318
if (m == NULL) {
2319
/* Shift the array. */
2320
for (int j = 0; j < i; j++)
2321
ma_dst[j] = ma_dst[j + *rbehind - i];
2322
*rbehind = i;
2323
*rahead = 0;
2324
break;
2325
}
2326
ma_dst[*rbehind - i - 1] = m;
2327
}
2328
}
2329
for (int i = 0; i < count; i++)
2330
ma_dst[*rbehind + i] = ma_src[i];
2331
if (*rahead != 0) {
2332
m = ma_src[count - 1];
2333
pindex = m->pindex + 1;
2334
msucc = vm_radix_iter_lookup_ge(&pages, pindex);
2335
*rahead = MIN(*rahead,
2336
(msucc != NULL ? msucc->pindex : object->size) - pindex);
2337
for (int i = 0; i < *rahead; i++) {
2338
m = vm_page_alloc_iter(object, pindex + i,
2339
VM_ALLOC_NORMAL, &pages);
2340
if (m == NULL) {
2341
*rahead = i;
2342
break;
2343
}
2344
ma_dst[*rbehind + count + i] = m;
2345
}
2346
}
2347
}
2348
2349
void
2350
vm_object_set_writeable_dirty_(vm_object_t object)
2351
{
2352
atomic_add_int(&object->generation, 1);
2353
}
2354
2355
bool
2356
vm_object_mightbedirty_(vm_object_t object)
2357
{
2358
return (object->generation != object->cleangeneration);
2359
}
2360
2361
/*
2362
* vm_object_unwire:
2363
*
2364
* For each page offset within the specified range of the given object,
2365
* find the highest-level page in the shadow chain and unwire it. A page
2366
* must exist at every page offset, and the highest-level page must be
2367
* wired.
2368
*/
2369
void
2370
vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
2371
uint8_t queue)
2372
{
2373
struct pctrie_iter pages;
2374
vm_object_t tobject, t1object;
2375
vm_page_t m, tm;
2376
vm_pindex_t end_pindex, pindex, tpindex;
2377
int depth, locked_depth;
2378
2379
KASSERT((offset & PAGE_MASK) == 0,
2380
("vm_object_unwire: offset is not page aligned"));
2381
KASSERT((length & PAGE_MASK) == 0,
2382
("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
2383
/* The wired count of a fictitious page never changes. */
2384
if ((object->flags & OBJ_FICTITIOUS) != 0)
2385
return;
2386
pindex = OFF_TO_IDX(offset);
2387
end_pindex = pindex + atop(length);
2388
vm_page_iter_init(&pages, object);
2389
again:
2390
locked_depth = 1;
2391
VM_OBJECT_RLOCK(object);
2392
m = vm_radix_iter_lookup_ge(&pages, pindex);
2393
while (pindex < end_pindex) {
2394
if (m == NULL || pindex < m->pindex) {
2395
/*
2396
* The first object in the shadow chain doesn't
2397
* contain a page at the current index. Therefore,
2398
* the page must exist in a backing object.
2399
*/
2400
tobject = object;
2401
tpindex = pindex;
2402
depth = 0;
2403
do {
2404
tpindex +=
2405
OFF_TO_IDX(tobject->backing_object_offset);
2406
tobject = tobject->backing_object;
2407
KASSERT(tobject != NULL,
2408
("vm_object_unwire: missing page"));
2409
if ((tobject->flags & OBJ_FICTITIOUS) != 0)
2410
goto next_page;
2411
depth++;
2412
if (depth == locked_depth) {
2413
locked_depth++;
2414
VM_OBJECT_RLOCK(tobject);
2415
}
2416
} while ((tm = vm_page_lookup(tobject, tpindex)) ==
2417
NULL);
2418
} else {
2419
tm = m;
2420
m = vm_radix_iter_step(&pages);
2421
}
2422
if (vm_page_trysbusy(tm) == 0) {
2423
for (tobject = object; locked_depth >= 1;
2424
locked_depth--) {
2425
t1object = tobject->backing_object;
2426
if (tm->object != tobject)
2427
VM_OBJECT_RUNLOCK(tobject);
2428
tobject = t1object;
2429
}
2430
tobject = tm->object;
2431
if (!vm_page_busy_sleep(tm, "unwbo",
2432
VM_ALLOC_IGN_SBUSY))
2433
VM_OBJECT_RUNLOCK(tobject);
2434
pctrie_iter_reset(&pages);
2435
goto again;
2436
}
2437
vm_page_unwire(tm, queue);
2438
vm_page_sunbusy(tm);
2439
next_page:
2440
pindex++;
2441
}
2442
/* Release the accumulated object locks. */
2443
for (tobject = object; locked_depth >= 1; locked_depth--) {
2444
t1object = tobject->backing_object;
2445
VM_OBJECT_RUNLOCK(tobject);
2446
tobject = t1object;
2447
}
2448
}
2449
2450
/*
2451
* Return the vnode for the given object, or NULL if none exists.
2452
* For tmpfs objects, the function may return NULL if there is
2453
* no vnode allocated at the time of the call.
2454
*/
2455
struct vnode *
2456
vm_object_vnode(vm_object_t object)
2457
{
2458
struct vnode *vp;
2459
2460
VM_OBJECT_ASSERT_LOCKED(object);
2461
vm_pager_getvp(object, &vp, NULL);
2462
return (vp);
2463
}
2464
2465
/*
2466
* Busy the vm object. This prevents new pages belonging to the object from
2467
* becoming busy. Existing pages persist as busy. Callers are responsible
2468
* for checking page state before proceeding.
2469
*/
2470
void
2471
vm_object_busy(vm_object_t obj)
2472
{
2473
2474
VM_OBJECT_ASSERT_LOCKED(obj);
2475
2476
blockcount_acquire(&obj->busy, 1);
2477
/* The fence is required to order loads of page busy. */
2478
atomic_thread_fence_acq_rel();
2479
}
2480
2481
void
2482
vm_object_unbusy(vm_object_t obj)
2483
{
2484
2485
blockcount_release(&obj->busy, 1);
2486
}
2487
2488
void
2489
vm_object_busy_wait(vm_object_t obj, const char *wmesg)
2490
{
2491
2492
VM_OBJECT_ASSERT_UNLOCKED(obj);
2493
2494
(void)blockcount_sleep(&obj->busy, NULL, wmesg, PVM);
2495
}
2496
2497
/*
2498
* This function aims to determine if the object is mapped,
2499
* specifically, if it is referenced by a vm_map_entry. Because
2500
* objects occasionally acquire transient references that do not
2501
* represent a mapping, the method used here is inexact. However, it
2502
* has very low overhead and is good enough for the advisory
2503
* vm.vmtotal sysctl.
2504
*/
2505
bool
2506
vm_object_is_active(vm_object_t obj)
2507
{
2508
2509
return (obj->ref_count > atomic_load_int(&obj->shadow_count));
2510
}
2511
2512
static int
2513
vm_object_list_handler(struct sysctl_req *req, bool swap_only)
2514
{
2515
struct pctrie_iter pages;
2516
struct kinfo_vmobject *kvo;
2517
char *fullpath, *freepath;
2518
struct vnode *vp;
2519
struct vattr va;
2520
vm_object_t obj;
2521
vm_page_t m;
2522
u_long sp;
2523
int count, error;
2524
key_t key;
2525
unsigned short seq;
2526
bool want_path;
2527
2528
if (req->oldptr == NULL) {
2529
/*
2530
* If an old buffer has not been provided, generate an
2531
* estimate of the space needed for a subsequent call.
2532
*/
2533
mtx_lock(&vm_object_list_mtx);
2534
count = 0;
2535
TAILQ_FOREACH(obj, &vm_object_list, object_list) {
2536
if (obj->type == OBJT_DEAD)
2537
continue;
2538
count++;
2539
}
2540
mtx_unlock(&vm_object_list_mtx);
2541
return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
2542
count * 11 / 10));
2543
}
2544
2545
want_path = !(swap_only || jailed(curthread->td_ucred));
2546
kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK | M_ZERO);
2547
error = 0;
2548
2549
/*
2550
* VM objects are type stable and are never removed from the
2551
* list once added. This allows us to safely read obj->object_list
2552
* after reacquiring the VM object lock.
2553
*/
2554
mtx_lock(&vm_object_list_mtx);
2555
TAILQ_FOREACH(obj, &vm_object_list, object_list) {
2556
if (obj->type == OBJT_DEAD ||
2557
(swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0))
2558
continue;
2559
VM_OBJECT_RLOCK(obj);
2560
if (obj->type == OBJT_DEAD ||
2561
(swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0)) {
2562
VM_OBJECT_RUNLOCK(obj);
2563
continue;
2564
}
2565
mtx_unlock(&vm_object_list_mtx);
2566
2567
memset(kvo, 0, sizeof(*kvo));
2568
kvo->kvo_size = ptoa(obj->size);
2569
kvo->kvo_resident = obj->resident_page_count;
2570
kvo->kvo_ref_count = obj->ref_count;
2571
kvo->kvo_shadow_count = atomic_load_int(&obj->shadow_count);
2572
kvo->kvo_memattr = obj->memattr;
2573
if (!swap_only) {
2574
vm_page_iter_init(&pages, obj);
2575
VM_RADIX_FOREACH(m, &pages) {
2576
/*
2577
* A page may belong to the object but be
2578
* dequeued and set to PQ_NONE while the
2579
* object lock is not held. This makes the
2580
* reads of m->queue below racy, and we do not
2581
* count pages set to PQ_NONE. However, this
2582
* sysctl is only meant to give an
2583
* approximation of the system anyway.
2584
*/
2585
if (vm_page_active(m))
2586
kvo->kvo_active++;
2587
else if (vm_page_inactive(m))
2588
kvo->kvo_inactive++;
2589
else if (vm_page_in_laundry(m))
2590
kvo->kvo_laundry++;
2591
2592
if (vm_page_wired(m))
2593
kvo->kvo_wired++;
2594
}
2595
}
2596
2597
freepath = NULL;
2598
fullpath = "";
2599
vp = NULL;
2600
kvo->kvo_type = vm_object_kvme_type(obj, want_path ? &vp :
2601
NULL);
2602
if (vp != NULL) {
2603
vref(vp);
2604
} else if ((obj->flags & OBJ_ANON) != 0) {
2605
MPASS(kvo->kvo_type == KVME_TYPE_SWAP);
2606
kvo->kvo_me = (uintptr_t)obj;
2607
/* tmpfs objs are reported as vnodes */
2608
kvo->kvo_backing_obj = (uintptr_t)obj->backing_object;
2609
sp = swap_pager_swapped_pages(obj);
2610
kvo->kvo_swapped = sp > UINT32_MAX ? UINT32_MAX : sp;
2611
}
2612
if (obj->type == OBJT_DEVICE || obj->type == OBJT_MGTDEVICE) {
2613
cdev_pager_get_path(obj, kvo->kvo_path,
2614
sizeof(kvo->kvo_path));
2615
}
2616
VM_OBJECT_RUNLOCK(obj);
2617
if ((obj->flags & OBJ_SYSVSHM) != 0) {
2618
kvo->kvo_flags |= KVMO_FLAG_SYSVSHM;
2619
shmobjinfo(obj, &key, &seq);
2620
kvo->kvo_vn_fileid = key;
2621
kvo->kvo_vn_fsid_freebsd11 = seq;
2622
}
2623
if ((obj->flags & OBJ_POSIXSHM) != 0) {
2624
kvo->kvo_flags |= KVMO_FLAG_POSIXSHM;
2625
shm_get_path(obj, kvo->kvo_path,
2626
sizeof(kvo->kvo_path));
2627
}
2628
if (vp != NULL) {
2629
vn_fullpath(vp, &fullpath, &freepath);
2630
vn_lock(vp, LK_SHARED | LK_RETRY);
2631
if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
2632
kvo->kvo_vn_fileid = va.va_fileid;
2633
kvo->kvo_vn_fsid = va.va_fsid;
2634
kvo->kvo_vn_fsid_freebsd11 = va.va_fsid;
2635
/* truncate */
2636
}
2637
vput(vp);
2638
strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path));
2639
free(freepath, M_TEMP);
2640
}
2641
2642
/* Pack record size down */
2643
kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path)
2644
+ strlen(kvo->kvo_path) + 1;
2645
kvo->kvo_structsize = roundup(kvo->kvo_structsize,
2646
sizeof(uint64_t));
2647
error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize);
2648
maybe_yield();
2649
mtx_lock(&vm_object_list_mtx);
2650
if (error)
2651
break;
2652
}
2653
mtx_unlock(&vm_object_list_mtx);
2654
free(kvo, M_TEMP);
2655
return (error);
2656
}
2657
2658
static int
2659
sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
2660
{
2661
return (vm_object_list_handler(req, false));
2662
}
2663
2664
SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
2665
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
2666
"List of VM objects");
2667
2668
static int
2669
sysctl_vm_object_list_swap(SYSCTL_HANDLER_ARGS)
2670
{
2671
return (vm_object_list_handler(req, true));
2672
}
2673
2674
/*
2675
* This sysctl returns list of the anonymous or swap objects. Intent
2676
* is to provide stripped optimized list useful to analyze swap use.
2677
* Since technically non-swap (default) objects participate in the
2678
* shadow chains, and are converted to swap type as needed by swap
2679
* pager, we must report them.
2680
*/
2681
SYSCTL_PROC(_vm, OID_AUTO, swap_objects,
2682
CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0,
2683
sysctl_vm_object_list_swap, "S,kinfo_vmobject",
2684
"List of swap VM objects");
2685
2686
#include "opt_ddb.h"
2687
#ifdef DDB
2688
#include <sys/kernel.h>
2689
2690
#include <sys/cons.h>
2691
2692
#include <ddb/ddb.h>
2693
2694
static int
2695
_vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2696
{
2697
vm_map_t tmpm;
2698
vm_map_entry_t tmpe;
2699
vm_object_t obj;
2700
2701
if (map == 0)
2702
return 0;
2703
2704
if (entry == 0) {
2705
VM_MAP_ENTRY_FOREACH(tmpe, map) {
2706
if (_vm_object_in_map(map, object, tmpe)) {
2707
return 1;
2708
}
2709
}
2710
} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2711
tmpm = entry->object.sub_map;
2712
VM_MAP_ENTRY_FOREACH(tmpe, tmpm) {
2713
if (_vm_object_in_map(tmpm, object, tmpe)) {
2714
return 1;
2715
}
2716
}
2717
} else if ((obj = entry->object.vm_object) != NULL) {
2718
for (; obj; obj = obj->backing_object)
2719
if (obj == object) {
2720
return 1;
2721
}
2722
}
2723
return 0;
2724
}
2725
2726
static int
2727
vm_object_in_map(vm_object_t object)
2728
{
2729
struct proc *p;
2730
2731
/* sx_slock(&allproc_lock); */
2732
FOREACH_PROC_IN_SYSTEM(p) {
2733
if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
2734
continue;
2735
if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
2736
/* sx_sunlock(&allproc_lock); */
2737
return 1;
2738
}
2739
}
2740
/* sx_sunlock(&allproc_lock); */
2741
if (_vm_object_in_map(kernel_map, object, 0))
2742
return 1;
2743
return 0;
2744
}
2745
2746
DB_SHOW_COMMAND_FLAGS(vmochk, vm_object_check, DB_CMD_MEMSAFE)
2747
{
2748
vm_object_t object;
2749
2750
/*
2751
* make sure that internal objs are in a map somewhere
2752
* and none have zero ref counts.
2753
*/
2754
TAILQ_FOREACH(object, &vm_object_list, object_list) {
2755
if ((object->flags & OBJ_ANON) != 0) {
2756
if (object->ref_count == 0) {
2757
db_printf(
2758
"vmochk: internal obj has zero ref count: %lu\n",
2759
(u_long)object->size);
2760
}
2761
if (!vm_object_in_map(object)) {
2762
db_printf(
2763
"vmochk: internal obj is not in a map: "
2764
"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2765
object->ref_count, (u_long)object->size,
2766
(u_long)object->size,
2767
(void *)object->backing_object);
2768
}
2769
}
2770
if (db_pager_quit)
2771
return;
2772
}
2773
}
2774
2775
/*
2776
* vm_object_print: [ debug ]
2777
*/
2778
DB_SHOW_COMMAND(object, vm_object_print_static)
2779
{
2780
struct pctrie_iter pages;
2781
/* XXX convert args. */
2782
vm_object_t object = (vm_object_t)addr;
2783
boolean_t full = have_addr;
2784
2785
vm_page_t p;
2786
2787
/* XXX count is an (unused) arg. Avoid shadowing it. */
2788
#define count was_count
2789
2790
int count;
2791
2792
if (object == NULL)
2793
return;
2794
2795
db_iprintf("Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x",
2796
object, (int)object->type, (uintmax_t)object->size,
2797
object->resident_page_count, object->ref_count, object->flags);
2798
db_iprintf(" ruid %d\n",
2799
object->cred ? object->cred->cr_ruid : -1);
2800
db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2801
atomic_load_int(&object->shadow_count),
2802
object->backing_object ? object->backing_object->ref_count : 0,
2803
object->backing_object, (uintmax_t)object->backing_object_offset);
2804
2805
if (!full)
2806
return;
2807
2808
db_indent += 2;
2809
count = 0;
2810
vm_page_iter_init(&pages, object);
2811
VM_RADIX_FOREACH(p, &pages) {
2812
if (count == 0)
2813
db_iprintf("memory:=");
2814
else if (count == 6) {
2815
db_printf("\n");
2816
db_iprintf(" ...");
2817
count = 0;
2818
} else
2819
db_printf(",");
2820
count++;
2821
2822
db_printf("(off=0x%jx,page=0x%jx)",
2823
(uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2824
2825
if (db_pager_quit)
2826
break;
2827
}
2828
if (count != 0)
2829
db_printf("\n");
2830
db_indent -= 2;
2831
}
2832
2833
/* XXX. */
2834
#undef count
2835
2836
/* XXX need this non-static entry for calling from vm_map_print. */
2837
void
2838
vm_object_print(
2839
/* db_expr_t */ long addr,
2840
boolean_t have_addr,
2841
/* db_expr_t */ long count,
2842
char *modif)
2843
{
2844
vm_object_print_static(addr, have_addr, count, modif);
2845
}
2846
2847
DB_SHOW_COMMAND_FLAGS(vmopag, vm_object_print_pages, DB_CMD_MEMSAFE)
2848
{
2849
struct pctrie_iter pages;
2850
vm_object_t object;
2851
vm_page_t m, start_m;
2852
int rcount;
2853
2854
TAILQ_FOREACH(object, &vm_object_list, object_list) {
2855
db_printf("new object: %p\n", (void *)object);
2856
if (db_pager_quit)
2857
return;
2858
start_m = NULL;
2859
vm_page_iter_init(&pages, object);
2860
VM_RADIX_FOREACH(m, &pages) {
2861
if (start_m == NULL) {
2862
start_m = m;
2863
rcount = 0;
2864
} else if (start_m->pindex + rcount != m->pindex ||
2865
VM_PAGE_TO_PHYS(start_m) + ptoa(rcount) !=
2866
VM_PAGE_TO_PHYS(m)) {
2867
db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2868
(long)start_m->pindex, rcount,
2869
(long)VM_PAGE_TO_PHYS(start_m));
2870
if (db_pager_quit)
2871
return;
2872
start_m = m;
2873
rcount = 0;
2874
}
2875
rcount++;
2876
}
2877
if (start_m != NULL) {
2878
db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2879
(long)start_m->pindex, rcount,
2880
(long)VM_PAGE_TO_PHYS(start_m));
2881
if (db_pager_quit)
2882
return;
2883
}
2884
}
2885
}
2886
#endif /* DDB */
2887
2888