Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.sbin/bhyve/mevent.c
105240 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2011 NetApp, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
/*
30
* Micro event library for FreeBSD, designed for a single i/o thread
31
* using kqueue, and having events be persistent by default.
32
*/
33
34
#include <sys/cdefs.h>
35
#include <assert.h>
36
#ifndef WITHOUT_CAPSICUM
37
#include <capsicum_helpers.h>
38
#endif
39
#include <err.h>
40
#include <errno.h>
41
#include <stdbool.h>
42
#include <stdlib.h>
43
#include <stdio.h>
44
#include <string.h>
45
#include <sysexits.h>
46
#include <unistd.h>
47
48
#include <sys/types.h>
49
#ifndef WITHOUT_CAPSICUM
50
#include <sys/capsicum.h>
51
#endif
52
#include <sys/event.h>
53
#include <sys/time.h>
54
55
#include <pthread.h>
56
#include <pthread_np.h>
57
58
#include "bhyverun.h"
59
#include "mevent.h"
60
61
#define MEVENT_MAX 64
62
63
static pthread_t mevent_tid;
64
static pthread_once_t mevent_once = PTHREAD_ONCE_INIT;
65
static int mevent_timid = 43;
66
static int mevent_pipefd[2];
67
static int mfd;
68
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
69
70
struct mevent {
71
void (*me_func)(int, enum ev_type, void *);
72
#define me_msecs me_fd
73
int me_fd;
74
int me_timid;
75
enum ev_type me_type;
76
void *me_param;
77
int me_cq;
78
int me_state; /* Desired kevent flags. */
79
int me_closefd;
80
int me_fflags;
81
LIST_ENTRY(mevent) me_list;
82
};
83
84
enum mevent_update_type {
85
UPDATE_ENABLE,
86
UPDATE_DISABLE,
87
UPDATE_TIMER,
88
};
89
90
static LIST_HEAD(listhead, mevent) global_head, change_head;
91
92
static void
93
mevent_qlock(void)
94
{
95
pthread_mutex_lock(&mevent_lmutex);
96
}
97
98
static void
99
mevent_qunlock(void)
100
{
101
pthread_mutex_unlock(&mevent_lmutex);
102
}
103
104
static void
105
mevent_pipe_read(int fd, enum ev_type type __unused, void *param __unused)
106
{
107
char buf[MEVENT_MAX];
108
int status;
109
110
/*
111
* Drain the pipe read side. The fd is non-blocking so this is
112
* safe to do.
113
*/
114
do {
115
status = read(fd, buf, sizeof(buf));
116
} while (status == MEVENT_MAX);
117
}
118
119
static void
120
mevent_notify(void)
121
{
122
char c = '\0';
123
124
/*
125
* If calling from outside the i/o thread, write a byte on the
126
* pipe to force the i/o thread to exit the blocking kevent call.
127
*/
128
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
129
write(mevent_pipefd[1], &c, 1);
130
}
131
}
132
133
static void
134
mevent_init(void)
135
{
136
#ifndef WITHOUT_CAPSICUM
137
cap_rights_t rights;
138
#endif
139
140
mfd = kqueue();
141
assert(mfd > 0);
142
143
#ifndef WITHOUT_CAPSICUM
144
cap_rights_init(&rights, CAP_KQUEUE);
145
if (caph_rights_limit(mfd, &rights) == -1)
146
errx(EX_OSERR, "Unable to apply rights for sandbox");
147
#endif
148
149
LIST_INIT(&change_head);
150
LIST_INIT(&global_head);
151
}
152
153
static int
154
mevent_kq_filter(struct mevent *mevp)
155
{
156
int retval;
157
158
retval = 0;
159
160
if (mevp->me_type == EVF_READ)
161
retval = EVFILT_READ;
162
163
if (mevp->me_type == EVF_WRITE)
164
retval = EVFILT_WRITE;
165
166
if (mevp->me_type == EVF_TIMER)
167
retval = EVFILT_TIMER;
168
169
if (mevp->me_type == EVF_SIGNAL)
170
retval = EVFILT_SIGNAL;
171
172
if (mevp->me_type == EVF_VNODE)
173
retval = EVFILT_VNODE;
174
175
return (retval);
176
}
177
178
static int
179
mevent_kq_flags(struct mevent *mevp)
180
{
181
int retval;
182
183
retval = mevp->me_state;
184
185
if (mevp->me_type == EVF_VNODE)
186
retval |= EV_CLEAR;
187
188
return (retval);
189
}
190
191
static int
192
mevent_kq_fflags(struct mevent *mevp)
193
{
194
int retval;
195
196
retval = 0;
197
198
switch (mevp->me_type) {
199
case EVF_VNODE:
200
if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
201
retval |= NOTE_ATTRIB;
202
break;
203
case EVF_READ:
204
case EVF_WRITE:
205
case EVF_TIMER:
206
case EVF_SIGNAL:
207
break;
208
}
209
210
return (retval);
211
}
212
213
static void
214
mevent_populate(struct mevent *mevp, struct kevent *kev)
215
{
216
if (mevp->me_type == EVF_TIMER) {
217
kev->ident = mevp->me_timid;
218
kev->data = mevp->me_msecs;
219
} else {
220
kev->ident = mevp->me_fd;
221
kev->data = 0;
222
}
223
kev->filter = mevent_kq_filter(mevp);
224
kev->flags = mevent_kq_flags(mevp);
225
kev->fflags = mevent_kq_fflags(mevp);
226
kev->udata = mevp;
227
}
228
229
static int
230
mevent_build(struct kevent *kev)
231
{
232
struct mevent *mevp, *tmpp;
233
int i;
234
235
i = 0;
236
237
mevent_qlock();
238
239
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
240
if (mevp->me_closefd) {
241
/*
242
* A close of the file descriptor will remove the
243
* event
244
*/
245
close(mevp->me_fd);
246
} else {
247
mevent_populate(mevp, &kev[i]);
248
i++;
249
}
250
251
mevp->me_cq = 0;
252
LIST_REMOVE(mevp, me_list);
253
254
if (mevp->me_state & EV_DELETE) {
255
free(mevp);
256
} else {
257
LIST_INSERT_HEAD(&global_head, mevp, me_list);
258
}
259
260
assert(i < MEVENT_MAX);
261
}
262
263
mevent_qunlock();
264
265
return (i);
266
}
267
268
static void
269
mevent_handle(struct kevent *kev, int numev)
270
{
271
struct mevent *mevp;
272
int i;
273
274
for (i = 0; i < numev; i++) {
275
mevp = kev[i].udata;
276
277
/* XXX check for EV_ERROR ? */
278
279
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
280
}
281
}
282
283
static struct mevent *
284
mevent_add_state(int tfd, enum ev_type type,
285
void (*func)(int, enum ev_type, void *), void *param,
286
int state, int fflags)
287
{
288
struct kevent kev;
289
struct mevent *lp, *mevp;
290
int ret;
291
292
if (tfd < 0 || func == NULL) {
293
return (NULL);
294
}
295
296
mevp = NULL;
297
298
pthread_once(&mevent_once, mevent_init);
299
300
mevent_qlock();
301
302
/*
303
* Verify that the fd/type tuple is not present in any list
304
*/
305
LIST_FOREACH(lp, &global_head, me_list) {
306
if (type != EVF_TIMER && lp->me_fd == tfd &&
307
lp->me_type == type) {
308
goto exit;
309
}
310
}
311
312
LIST_FOREACH(lp, &change_head, me_list) {
313
if (type != EVF_TIMER && lp->me_fd == tfd &&
314
lp->me_type == type) {
315
goto exit;
316
}
317
}
318
319
/*
320
* Allocate an entry and populate it.
321
*/
322
mevp = calloc(1, sizeof(struct mevent));
323
if (mevp == NULL) {
324
goto exit;
325
}
326
327
if (type == EVF_TIMER) {
328
mevp->me_msecs = tfd;
329
mevp->me_timid = mevent_timid++;
330
} else
331
mevp->me_fd = tfd;
332
mevp->me_type = type;
333
mevp->me_func = func;
334
mevp->me_param = param;
335
mevp->me_state = state;
336
mevp->me_fflags = fflags;
337
338
/*
339
* Try to add the event. If this fails, report the failure to
340
* the caller.
341
*/
342
mevent_populate(mevp, &kev);
343
ret = kevent(mfd, &kev, 1, NULL, 0, NULL);
344
if (ret == -1) {
345
free(mevp);
346
mevp = NULL;
347
goto exit;
348
}
349
350
mevp->me_state &= ~EV_ADD;
351
LIST_INSERT_HEAD(&global_head, mevp, me_list);
352
353
exit:
354
mevent_qunlock();
355
356
return (mevp);
357
}
358
359
struct mevent *
360
mevent_add(int tfd, enum ev_type type,
361
void (*func)(int, enum ev_type, void *), void *param)
362
{
363
364
return (mevent_add_state(tfd, type, func, param, EV_ADD, 0));
365
}
366
367
struct mevent *
368
mevent_add_flags(int tfd, enum ev_type type, int fflags,
369
void (*func)(int, enum ev_type, void *), void *param)
370
{
371
372
return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags));
373
}
374
375
struct mevent *
376
mevent_add_disabled(int tfd, enum ev_type type,
377
void (*func)(int, enum ev_type, void *), void *param)
378
{
379
380
return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0));
381
}
382
383
static int
384
mevent_update(struct mevent *evp, enum mevent_update_type type, int msecs)
385
{
386
int newstate;
387
388
mevent_qlock();
389
390
/*
391
* It's not possible to update a deleted event
392
*/
393
assert((evp->me_state & EV_DELETE) == 0);
394
395
newstate = evp->me_state;
396
if (type == UPDATE_ENABLE) {
397
newstate |= EV_ENABLE;
398
newstate &= ~EV_DISABLE;
399
} else if (type == UPDATE_DISABLE) {
400
newstate |= EV_DISABLE;
401
newstate &= ~EV_ENABLE;
402
} else {
403
assert(type == UPDATE_TIMER);
404
assert(evp->me_type == EVF_TIMER);
405
newstate |= EV_ADD;
406
evp->me_msecs = msecs;
407
}
408
409
/*
410
* No update needed if enable/disable had no effect
411
*/
412
if (evp->me_state != newstate || type == UPDATE_TIMER) {
413
evp->me_state = newstate;
414
415
/*
416
* Place the entry onto the changed list if not
417
* already there.
418
*/
419
if (evp->me_cq == 0) {
420
evp->me_cq = 1;
421
LIST_REMOVE(evp, me_list);
422
LIST_INSERT_HEAD(&change_head, evp, me_list);
423
mevent_notify();
424
}
425
}
426
427
mevent_qunlock();
428
429
return (0);
430
}
431
432
int
433
mevent_enable(struct mevent *evp)
434
{
435
return (mevent_update(evp, UPDATE_ENABLE, -1));
436
}
437
438
int
439
mevent_disable(struct mevent *evp)
440
{
441
return (mevent_update(evp, UPDATE_DISABLE, -1));
442
}
443
444
int
445
mevent_timer_update(struct mevent *evp, int msecs)
446
{
447
return (mevent_update(evp, UPDATE_TIMER, msecs));
448
}
449
450
static int
451
mevent_delete_event(struct mevent *evp, int closefd)
452
{
453
mevent_qlock();
454
455
/*
456
* Place the entry onto the changed list if not already there, and
457
* mark as to be deleted.
458
*/
459
if (evp->me_cq == 0) {
460
evp->me_cq = 1;
461
LIST_REMOVE(evp, me_list);
462
LIST_INSERT_HEAD(&change_head, evp, me_list);
463
mevent_notify();
464
}
465
evp->me_state = EV_DELETE;
466
467
if (closefd)
468
evp->me_closefd = 1;
469
470
mevent_qunlock();
471
472
return (0);
473
}
474
475
int
476
mevent_delete(struct mevent *evp)
477
{
478
479
return (mevent_delete_event(evp, 0));
480
}
481
482
int
483
mevent_delete_close(struct mevent *evp)
484
{
485
486
return (mevent_delete_event(evp, 1));
487
}
488
489
static void
490
mevent_set_name(void)
491
{
492
493
pthread_set_name_np(mevent_tid, "mevent");
494
}
495
496
void
497
mevent_dispatch(void)
498
{
499
struct kevent changelist[MEVENT_MAX];
500
struct kevent eventlist[MEVENT_MAX];
501
struct mevent *pipev;
502
int numev;
503
int ret;
504
#ifndef WITHOUT_CAPSICUM
505
cap_rights_t rights;
506
#endif
507
508
mevent_tid = pthread_self();
509
mevent_set_name();
510
511
pthread_once(&mevent_once, mevent_init);
512
513
/*
514
* Open the pipe that will be used for other threads to force
515
* the blocking kqueue call to exit by writing to it. Set the
516
* descriptor to non-blocking.
517
*/
518
ret = pipe(mevent_pipefd);
519
if (ret < 0) {
520
perror("pipe");
521
exit(BHYVE_EXIT_ERROR);
522
}
523
524
#ifndef WITHOUT_CAPSICUM
525
cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
526
if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
527
errx(EX_OSERR, "Unable to apply rights for sandbox");
528
if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
529
errx(EX_OSERR, "Unable to apply rights for sandbox");
530
#endif
531
532
/*
533
* Add internal event handler for the pipe write fd
534
*/
535
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
536
assert(pipev != NULL);
537
538
for (;;) {
539
/*
540
* Build changelist if required.
541
* XXX the changelist can be put into the blocking call
542
* to eliminate the extra syscall. Currently better for
543
* debug.
544
*/
545
numev = mevent_build(changelist);
546
if (numev) {
547
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
548
if (ret == -1) {
549
perror("Error return from kevent change");
550
}
551
}
552
553
/*
554
* Block awaiting events
555
*/
556
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
557
if (ret == -1 && errno != EINTR) {
558
perror("Error return from kevent monitor");
559
}
560
561
/*
562
* Handle reported events
563
*/
564
mevent_handle(eventlist, ret);
565
}
566
}
567
568