Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
109245 views
1
/*
2
* CDDL HEADER START
3
*
4
* The contents of this file are subject to the terms of the
5
* Common Development and Distribution License (the "License").
6
* You may not use this file except in compliance with the License.
7
*
8
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9
* or http://www.opensolaris.org/os/licensing.
10
* See the License for the specific language governing permissions
11
* and limitations under the License.
12
*
13
* When distributing Covered Code, include this CDDL HEADER in each
14
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15
* If applicable, add the following below this CDDL HEADER, with the
16
* fields enclosed by brackets "[]" replaced with your own identifying
17
* information: Portions Copyright [yyyy] [name of copyright owner]
18
*
19
* CDDL HEADER END
20
*/
21
22
/*
23
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24
* Copyright (c) 2016, Joyent, Inc. All rights reserved.
25
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
26
*/
27
28
/*
29
* DTrace - Dynamic Tracing for Solaris
30
*
31
* This is the implementation of the Solaris Dynamic Tracing framework
32
* (DTrace). The user-visible interface to DTrace is described at length in
33
* the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
34
* library, the in-kernel DTrace framework, and the DTrace providers are
35
* described in the block comments in the <sys/dtrace.h> header file. The
36
* internal architecture of DTrace is described in the block comments in the
37
* <sys/dtrace_impl.h> header file. The comments contained within the DTrace
38
* implementation very much assume mastery of all of these sources; if one has
39
* an unanswered question about the implementation, one should consult them
40
* first.
41
*
42
* The functions here are ordered roughly as follows:
43
*
44
* - Probe context functions
45
* - Probe hashing functions
46
* - Non-probe context utility functions
47
* - Matching functions
48
* - Provider-to-Framework API functions
49
* - Probe management functions
50
* - DIF object functions
51
* - Format functions
52
* - Predicate functions
53
* - ECB functions
54
* - Buffer functions
55
* - Enabling functions
56
* - DOF functions
57
* - Anonymous enabling functions
58
* - Consumer state functions
59
* - Helper functions
60
* - Hook functions
61
* - Driver cookbook functions
62
*
63
* Each group of functions begins with a block comment labelled the "DTrace
64
* [Group] Functions", allowing one to find each block by searching forward
65
* on capital-f functions.
66
*/
67
#include <sys/errno.h>
68
#include <sys/param.h>
69
#include <sys/types.h>
70
#ifndef illumos
71
#include <sys/time.h>
72
#endif
73
#include <sys/stat.h>
74
#include <sys/conf.h>
75
#include <sys/systm.h>
76
#include <sys/endian.h>
77
#ifdef illumos
78
#include <sys/ddi.h>
79
#include <sys/sunddi.h>
80
#endif
81
#include <sys/cpuvar.h>
82
#include <sys/kmem.h>
83
#ifdef illumos
84
#include <sys/strsubr.h>
85
#endif
86
#include <sys/sysmacros.h>
87
#include <sys/dtrace_impl.h>
88
#include <sys/atomic.h>
89
#include <sys/cmn_err.h>
90
#ifdef illumos
91
#include <sys/mutex_impl.h>
92
#include <sys/rwlock_impl.h>
93
#endif
94
#include <sys/ctf_api.h>
95
#ifdef illumos
96
#include <sys/panic.h>
97
#include <sys/priv_impl.h>
98
#endif
99
#ifdef illumos
100
#include <sys/cred_impl.h>
101
#include <sys/procfs_isa.h>
102
#endif
103
#include <sys/taskq.h>
104
#ifdef illumos
105
#include <sys/mkdev.h>
106
#include <sys/kdi.h>
107
#endif
108
#include <sys/zone.h>
109
#include <sys/socket.h>
110
#include <netinet/in.h>
111
#include "strtolctype.h"
112
113
/* FreeBSD includes: */
114
#ifndef illumos
115
#include <sys/callout.h>
116
#include <sys/ctype.h>
117
#include <sys/eventhandler.h>
118
#include <sys/limits.h>
119
#include <sys/linker.h>
120
#include <sys/kdb.h>
121
#include <sys/jail.h>
122
#include <sys/kernel.h>
123
#include <sys/malloc.h>
124
#include <sys/lock.h>
125
#include <sys/mutex.h>
126
#include <sys/ptrace.h>
127
#include <sys/random.h>
128
#include <sys/rwlock.h>
129
#include <sys/sx.h>
130
#include <sys/sysctl.h>
131
132
133
#include <sys/mount.h>
134
#undef AT_UID
135
#undef AT_GID
136
#include <sys/vnode.h>
137
#include <sys/cred.h>
138
139
#include <sys/dtrace_bsd.h>
140
141
#include <netinet/in.h>
142
143
#include "dtrace_cddl.h"
144
#include "dtrace_debug.c"
145
#endif
146
147
#include "dtrace_xoroshiro128_plus.h"
148
149
/*
150
* DTrace Tunable Variables
151
*
152
* The following variables may be tuned by adding a line to /etc/system that
153
* includes both the name of the DTrace module ("dtrace") and the name of the
154
* variable. For example:
155
*
156
* set dtrace:dtrace_destructive_disallow = 1
157
*
158
* In general, the only variables that one should be tuning this way are those
159
* that affect system-wide DTrace behavior, and for which the default behavior
160
* is undesirable. Most of these variables are tunable on a per-consumer
161
* basis using DTrace options, and need not be tuned on a system-wide basis.
162
* When tuning these variables, avoid pathological values; while some attempt
163
* is made to verify the integrity of these variables, they are not considered
164
* part of the supported interface to DTrace, and they are therefore not
165
* checked comprehensively. Further, these variables should not be tuned
166
* dynamically via "mdb -kw" or other means; they should only be tuned via
167
* /etc/system.
168
*/
169
int dtrace_destructive_disallow = 0;
170
#ifndef illumos
171
/* Positive logic version of dtrace_destructive_disallow for loader tunable */
172
int dtrace_allow_destructive = 1;
173
#endif
174
dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
175
size_t dtrace_difo_maxsize = (256 * 1024);
176
dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024);
177
size_t dtrace_statvar_maxsize = (16 * 1024);
178
size_t dtrace_actions_max = (16 * 1024);
179
size_t dtrace_retain_max = 1024;
180
dtrace_optval_t dtrace_helper_actions_max = 128;
181
dtrace_optval_t dtrace_helper_providers_max = 32;
182
dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
183
size_t dtrace_strsize_default = 256;
184
dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
185
dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
186
dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
187
dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
188
dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
189
dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
190
dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
191
dtrace_optval_t dtrace_nspec_default = 1;
192
dtrace_optval_t dtrace_specsize_default = 32 * 1024;
193
dtrace_optval_t dtrace_stackframes_default = 20;
194
dtrace_optval_t dtrace_ustackframes_default = 20;
195
dtrace_optval_t dtrace_jstackframes_default = 50;
196
dtrace_optval_t dtrace_jstackstrsize_default = 512;
197
int dtrace_msgdsize_max = 128;
198
hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */
199
hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
200
int dtrace_devdepth_max = 32;
201
int dtrace_err_verbose;
202
hrtime_t dtrace_deadman_interval = NANOSEC;
203
hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
204
hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
205
hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
206
#ifndef illumos
207
int dtrace_memstr_max = 4096;
208
int dtrace_bufsize_max_frac = 128;
209
#endif
210
211
/*
212
* DTrace External Variables
213
*
214
* As dtrace(7D) is a kernel module, any DTrace variables are obviously
215
* available to DTrace consumers via the backtick (`) syntax. One of these,
216
* dtrace_zero, is made deliberately so: it is provided as a source of
217
* well-known, zero-filled memory. While this variable is not documented,
218
* it is used by some translators as an implementation detail.
219
*/
220
const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
221
222
/*
223
* DTrace Internal Variables
224
*/
225
#ifdef illumos
226
static dev_info_t *dtrace_devi; /* device info */
227
#endif
228
#ifdef illumos
229
static vmem_t *dtrace_arena; /* probe ID arena */
230
static vmem_t *dtrace_minor; /* minor number arena */
231
#else
232
static taskq_t *dtrace_taskq; /* task queue */
233
static struct unrhdr *dtrace_arena; /* Probe ID number. */
234
#endif
235
static dtrace_probe_t **dtrace_probes; /* array of all probes */
236
static int dtrace_nprobes; /* number of probes */
237
static dtrace_provider_t *dtrace_provider; /* provider list */
238
static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
239
static int dtrace_opens; /* number of opens */
240
static int dtrace_helpers; /* number of helpers */
241
static int dtrace_getf; /* number of unpriv getf()s */
242
#ifdef illumos
243
static void *dtrace_softstate; /* softstate pointer */
244
#endif
245
static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
246
static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
247
static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
248
static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
249
static int dtrace_toxranges; /* number of toxic ranges */
250
static int dtrace_toxranges_max; /* size of toxic range array */
251
static dtrace_anon_t dtrace_anon; /* anonymous enabling */
252
static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
253
static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
254
static kthread_t *dtrace_panicked; /* panicking thread */
255
static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
256
static dtrace_genid_t dtrace_probegen; /* current probe generation */
257
static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
258
static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
259
static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
260
static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
261
static int dtrace_dynvar_failclean; /* dynvars failed to clean */
262
#ifndef illumos
263
static struct mtx dtrace_unr_mtx;
264
MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF);
265
static eventhandler_tag dtrace_kld_load_tag;
266
static eventhandler_tag dtrace_kld_unload_try_tag;
267
#endif
268
269
/*
270
* DTrace Locking
271
* DTrace is protected by three (relatively coarse-grained) locks:
272
*
273
* (1) dtrace_lock is required to manipulate essentially any DTrace state,
274
* including enabling state, probes, ECBs, consumer state, helper state,
275
* etc. Importantly, dtrace_lock is _not_ required when in probe context;
276
* probe context is lock-free -- synchronization is handled via the
277
* dtrace_sync() cross call mechanism.
278
*
279
* (2) dtrace_provider_lock is required when manipulating provider state, or
280
* when provider state must be held constant.
281
*
282
* (3) dtrace_meta_lock is required when manipulating meta provider state, or
283
* when meta provider state must be held constant.
284
*
285
* The lock ordering between these three locks is dtrace_meta_lock before
286
* dtrace_provider_lock before dtrace_lock. (In particular, there are
287
* several places where dtrace_provider_lock is held by the framework as it
288
* calls into the providers -- which then call back into the framework,
289
* grabbing dtrace_lock.)
290
*
291
* There are two other locks in the mix: mod_lock and cpu_lock. With respect
292
* to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
293
* role as a coarse-grained lock; it is acquired before both of these locks.
294
* With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
295
* be acquired _between_ dtrace_meta_lock and any other DTrace locks.
296
* mod_lock is similar with respect to dtrace_provider_lock in that it must be
297
* acquired _between_ dtrace_provider_lock and dtrace_lock.
298
*/
299
static kmutex_t dtrace_lock; /* probe state lock */
300
static kmutex_t dtrace_provider_lock; /* provider state lock */
301
static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
302
303
#ifndef illumos
304
/* XXX FreeBSD hacks. */
305
#define cr_suid cr_svuid
306
#define cr_sgid cr_svgid
307
#define ipaddr_t in_addr_t
308
#define mod_modname pathname
309
#define vuprintf vprintf
310
#ifndef crgetzoneid
311
#define crgetzoneid(_a) 0
312
#endif
313
#define ttoproc(_a) ((_a)->td_proc)
314
#define SNOCD 0
315
#define CPU_ON_INTR(_a) 0
316
317
#define PRIV_EFFECTIVE (1 << 0)
318
#define PRIV_DTRACE_KERNEL (1 << 1)
319
#define PRIV_DTRACE_PROC (1 << 2)
320
#define PRIV_DTRACE_USER (1 << 3)
321
#define PRIV_PROC_OWNER (1 << 4)
322
#define PRIV_PROC_ZONE (1 << 5)
323
#define PRIV_ALL ~0
324
325
SYSCTL_DECL(_debug_dtrace);
326
SYSCTL_DECL(_kern_dtrace);
327
#endif
328
329
#ifdef illumos
330
#define curcpu CPU->cpu_id
331
#endif
332
333
334
/*
335
* DTrace Provider Variables
336
*
337
* These are the variables relating to DTrace as a provider (that is, the
338
* provider of the BEGIN, END, and ERROR probes).
339
*/
340
static dtrace_pattr_t dtrace_provider_attr = {
341
{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
342
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
343
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
344
{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
345
{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
346
};
347
348
static void
349
dtrace_nullop(void)
350
{}
351
352
static dtrace_pops_t dtrace_provider_ops = {
353
.dtps_provide = (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop,
354
.dtps_provide_module = (void (*)(void *, modctl_t *))dtrace_nullop,
355
.dtps_enable = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
356
.dtps_disable = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
357
.dtps_suspend = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
358
.dtps_resume = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
359
.dtps_getargdesc = NULL,
360
.dtps_getargval = NULL,
361
.dtps_usermode = NULL,
362
.dtps_destroy = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
363
};
364
365
static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
366
static dtrace_id_t dtrace_probeid_end; /* special END probe */
367
dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
368
369
/*
370
* DTrace Helper Tracing Variables
371
*
372
* These variables should be set dynamically to enable helper tracing. The
373
* only variables that should be set are dtrace_helptrace_enable (which should
374
* be set to a non-zero value to allocate helper tracing buffers on the next
375
* open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
376
* non-zero value to deallocate helper tracing buffers on the next close of
377
* /dev/dtrace). When (and only when) helper tracing is disabled, the
378
* buffer size may also be set via dtrace_helptrace_bufsize.
379
*/
380
int dtrace_helptrace_enable = 0;
381
int dtrace_helptrace_disable = 0;
382
int dtrace_helptrace_bufsize = 16 * 1024 * 1024;
383
uint32_t dtrace_helptrace_nlocals;
384
static dtrace_helptrace_t *dtrace_helptrace_buffer;
385
static uint32_t dtrace_helptrace_next = 0;
386
static int dtrace_helptrace_wrapped = 0;
387
388
/*
389
* DTrace Error Hashing
390
*
391
* On DEBUG kernels, DTrace will track the errors that has seen in a hash
392
* table. This is very useful for checking coverage of tests that are
393
* expected to induce DIF or DOF processing errors, and may be useful for
394
* debugging problems in the DIF code generator or in DOF generation . The
395
* error hash may be examined with the ::dtrace_errhash MDB dcmd.
396
*/
397
#ifdef DEBUG
398
static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
399
static const char *dtrace_errlast;
400
static kthread_t *dtrace_errthread;
401
static kmutex_t dtrace_errlock;
402
#endif
403
404
/*
405
* DTrace Macros and Constants
406
*
407
* These are various macros that are useful in various spots in the
408
* implementation, along with a few random constants that have no meaning
409
* outside of the implementation. There is no real structure to this cpp
410
* mishmash -- but is there ever?
411
*/
412
#define DTRACE_HASHSTR(hash, probe) \
413
dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
414
415
#define DTRACE_HASHNEXT(hash, probe) \
416
(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
417
418
#define DTRACE_HASHPREV(hash, probe) \
419
(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
420
421
#define DTRACE_HASHEQ(hash, lhs, rhs) \
422
(strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
423
*((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
424
425
#define DTRACE_AGGHASHSIZE_SLEW 17
426
427
#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
428
429
/*
430
* The key for a thread-local variable consists of the lower 61 bits of the
431
* t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
432
* We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
433
* equal to a variable identifier. This is necessary (but not sufficient) to
434
* assure that global associative arrays never collide with thread-local
435
* variables. To guarantee that they cannot collide, we must also define the
436
* order for keying dynamic variables. That order is:
437
*
438
* [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
439
*
440
* Because the variable-key and the tls-key are in orthogonal spaces, there is
441
* no way for a global variable key signature to match a thread-local key
442
* signature.
443
*/
444
#ifdef illumos
445
#define DTRACE_TLS_THRKEY(where) { \
446
uint_t intr = 0; \
447
uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
448
for (; actv; actv >>= 1) \
449
intr++; \
450
ASSERT(intr < (1 << 3)); \
451
(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
452
(((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
453
}
454
#else
455
#define DTRACE_TLS_THRKEY(where) { \
456
solaris_cpu_t *_c = &solaris_cpu[curcpu]; \
457
uint_t intr = 0; \
458
uint_t actv = _c->cpu_intr_actv; \
459
for (; actv; actv >>= 1) \
460
intr++; \
461
ASSERT(intr < (1 << 3)); \
462
(where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \
463
(((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
464
}
465
#endif
466
467
#define DT_BSWAP_8(x) ((x) & 0xff)
468
#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
469
#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
470
#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
471
472
#define DT_MASK_LO 0x00000000FFFFFFFFULL
473
474
#define DTRACE_STORE(type, tomax, offset, what) \
475
*((type *)((uintptr_t)(tomax) + (size_t)offset)) = (type)(what);
476
477
#if !defined(__x86) && !defined(__aarch64__)
478
#define DTRACE_ALIGNCHECK(addr, size, flags) \
479
if (addr & (size - 1)) { \
480
*flags |= CPU_DTRACE_BADALIGN; \
481
cpu_core[curcpu].cpuc_dtrace_illval = addr; \
482
return (0); \
483
}
484
#else
485
#define DTRACE_ALIGNCHECK(addr, size, flags)
486
#endif
487
488
/*
489
* Test whether a range of memory starting at testaddr of size testsz falls
490
* within the range of memory described by addr, sz. We take care to avoid
491
* problems with overflow and underflow of the unsigned quantities, and
492
* disallow all negative sizes. Ranges of size 0 are allowed.
493
*/
494
#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
495
((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
496
(testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
497
(testaddr) + (testsz) >= (testaddr))
498
499
#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \
500
do { \
501
if ((remp) != NULL) { \
502
*(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \
503
} \
504
} while (0)
505
506
507
/*
508
* Test whether alloc_sz bytes will fit in the scratch region. We isolate
509
* alloc_sz on the righthand side of the comparison in order to avoid overflow
510
* or underflow in the comparison with it. This is simpler than the INRANGE
511
* check above, because we know that the dtms_scratch_ptr is valid in the
512
* range. Allocations of size zero are allowed.
513
*/
514
#define DTRACE_INSCRATCH(mstate, alloc_sz) \
515
((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
516
(mstate)->dtms_scratch_ptr >= (alloc_sz))
517
518
#define DTRACE_INSCRATCHPTR(mstate, ptr, howmany) \
519
((ptr) >= (mstate)->dtms_scratch_base && \
520
(ptr) <= \
521
((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - (howmany)))
522
523
#define DTRACE_LOADFUNC(bits) \
524
/*CSTYLED*/ \
525
uint##bits##_t \
526
dtrace_load##bits(uintptr_t addr) \
527
{ \
528
size_t size = bits / NBBY; \
529
/*CSTYLED*/ \
530
uint##bits##_t rval; \
531
int i; \
532
volatile uint16_t *flags = (volatile uint16_t *) \
533
&cpu_core[curcpu].cpuc_dtrace_flags; \
534
\
535
DTRACE_ALIGNCHECK(addr, size, flags); \
536
\
537
for (i = 0; i < dtrace_toxranges; i++) { \
538
if (addr >= dtrace_toxrange[i].dtt_limit) \
539
continue; \
540
\
541
if (addr + size <= dtrace_toxrange[i].dtt_base) \
542
continue; \
543
\
544
/* \
545
* This address falls within a toxic region; return 0. \
546
*/ \
547
*flags |= CPU_DTRACE_BADADDR; \
548
cpu_core[curcpu].cpuc_dtrace_illval = addr; \
549
return (0); \
550
} \
551
\
552
__compiler_membar(); \
553
*flags |= CPU_DTRACE_NOFAULT; \
554
/*CSTYLED*/ \
555
rval = *((volatile uint##bits##_t *)addr); \
556
*flags &= ~CPU_DTRACE_NOFAULT; \
557
__compiler_membar(); \
558
\
559
return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
560
}
561
562
#ifdef _LP64
563
#define dtrace_loadptr dtrace_load64
564
#else
565
#define dtrace_loadptr dtrace_load32
566
#endif
567
568
#define DTRACE_DYNHASH_FREE 0
569
#define DTRACE_DYNHASH_SINK 1
570
#define DTRACE_DYNHASH_VALID 2
571
572
#define DTRACE_MATCH_NEXT 0
573
#define DTRACE_MATCH_DONE 1
574
#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
575
#define DTRACE_STATE_ALIGN 64
576
577
#define DTRACE_FLAGS2FLT(flags) \
578
(((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
579
((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
580
((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
581
((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
582
((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
583
((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
584
((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
585
((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
586
((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
587
DTRACEFLT_UNKNOWN)
588
589
#define DTRACEACT_ISSTRING(act) \
590
((act)->dta_kind == DTRACEACT_DIFEXPR && \
591
(act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
592
593
/* Function prototype definitions: */
594
static size_t dtrace_strlen(const char *, size_t);
595
static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
596
static void dtrace_enabling_provide(dtrace_provider_t *);
597
static int dtrace_enabling_match(dtrace_enabling_t *, int *);
598
static void dtrace_enabling_matchall(void);
599
static void dtrace_enabling_matchall_task(void *);
600
static void dtrace_enabling_reap(void *);
601
static dtrace_state_t *dtrace_anon_grab(void);
602
static uint64_t dtrace_helper(int, dtrace_mstate_t *,
603
dtrace_state_t *, uint64_t, uint64_t);
604
static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
605
static void dtrace_buffer_drop(dtrace_buffer_t *);
606
static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
607
static ssize_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
608
dtrace_state_t *, dtrace_mstate_t *);
609
static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
610
dtrace_optval_t);
611
static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
612
static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
613
uint16_t dtrace_load16(uintptr_t);
614
uint32_t dtrace_load32(uintptr_t);
615
uint64_t dtrace_load64(uintptr_t);
616
uint8_t dtrace_load8(uintptr_t);
617
void dtrace_dynvar_clean(dtrace_dstate_t *);
618
dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *,
619
size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *);
620
uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *);
621
static int dtrace_priv_proc(dtrace_state_t *);
622
static void dtrace_getf_barrier(void);
623
static int dtrace_canload_remains(uint64_t, size_t, size_t *,
624
dtrace_mstate_t *, dtrace_vstate_t *);
625
static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
626
dtrace_mstate_t *, dtrace_vstate_t *);
627
628
/*
629
* DTrace Probe Context Functions
630
*
631
* These functions are called from probe context. Because probe context is
632
* any context in which C may be called, arbitrarily locks may be held,
633
* interrupts may be disabled, we may be in arbitrary dispatched state, etc.
634
* As a result, functions called from probe context may only call other DTrace
635
* support functions -- they may not interact at all with the system at large.
636
* (Note that the ASSERT macro is made probe-context safe by redefining it in
637
* terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
638
* loads are to be performed from probe context, they _must_ be in terms of
639
* the safe dtrace_load*() variants.
640
*
641
* Some functions in this block are not actually called from probe context;
642
* for these functions, there will be a comment above the function reading
643
* "Note: not called from probe context."
644
*/
645
void
646
dtrace_panic(const char *format, ...)
647
{
648
va_list alist;
649
650
va_start(alist, format);
651
#ifdef __FreeBSD__
652
vpanic(format, alist);
653
#else
654
dtrace_vpanic(format, alist);
655
#endif
656
va_end(alist);
657
}
658
659
int
660
dtrace_assfail(const char *a, const char *f, int l)
661
{
662
dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
663
664
/*
665
* We just need something here that even the most clever compiler
666
* cannot optimize away.
667
*/
668
return (a[(uintptr_t)f]);
669
}
670
671
/*
672
* Atomically increment a specified error counter from probe context.
673
*/
674
static void
675
dtrace_error(uint32_t *counter)
676
{
677
/*
678
* Most counters stored to in probe context are per-CPU counters.
679
* However, there are some error conditions that are sufficiently
680
* arcane that they don't merit per-CPU storage. If these counters
681
* are incremented concurrently on different CPUs, scalability will be
682
* adversely affected -- but we don't expect them to be white-hot in a
683
* correctly constructed enabling...
684
*/
685
uint32_t oval, nval;
686
687
do {
688
oval = *counter;
689
690
if ((nval = oval + 1) == 0) {
691
/*
692
* If the counter would wrap, set it to 1 -- assuring
693
* that the counter is never zero when we have seen
694
* errors. (The counter must be 32-bits because we
695
* aren't guaranteed a 64-bit compare&swap operation.)
696
* To save this code both the infamy of being fingered
697
* by a priggish news story and the indignity of being
698
* the target of a neo-puritan witch trial, we're
699
* carefully avoiding any colorful description of the
700
* likelihood of this condition -- but suffice it to
701
* say that it is only slightly more likely than the
702
* overflow of predicate cache IDs, as discussed in
703
* dtrace_predicate_create().
704
*/
705
nval = 1;
706
}
707
} while (dtrace_cas32(counter, oval, nval) != oval);
708
}
709
710
void
711
dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
712
{
713
cpuset_t cpus;
714
715
if (cpu == DTRACE_CPUALL)
716
cpus = all_cpus;
717
else
718
CPU_SETOF(cpu, &cpus);
719
720
smp_rendezvous_cpus(cpus, smp_no_rendezvous_barrier, func,
721
smp_no_rendezvous_barrier, arg);
722
}
723
724
static void
725
dtrace_sync_func(void)
726
{
727
}
728
729
void
730
dtrace_sync(void)
731
{
732
dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
733
}
734
735
/*
736
* Use the DTRACE_LOADFUNC macro to define functions for each of loading a
737
* uint8_t, a uint16_t, a uint32_t and a uint64_t.
738
*/
739
/* BEGIN CSTYLED */
740
DTRACE_LOADFUNC(8)
741
DTRACE_LOADFUNC(16)
742
DTRACE_LOADFUNC(32)
743
DTRACE_LOADFUNC(64)
744
/* END CSTYLED */
745
746
static int
747
dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
748
{
749
if (dest < mstate->dtms_scratch_base)
750
return (0);
751
752
if (dest + size < dest)
753
return (0);
754
755
if (dest + size > mstate->dtms_scratch_ptr)
756
return (0);
757
758
return (1);
759
}
760
761
static int
762
dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
763
dtrace_statvar_t **svars, int nsvars)
764
{
765
int i;
766
size_t maxglobalsize, maxlocalsize;
767
768
if (nsvars == 0)
769
return (0);
770
771
maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
772
maxlocalsize = maxglobalsize * (mp_maxid + 1);
773
774
for (i = 0; i < nsvars; i++) {
775
dtrace_statvar_t *svar = svars[i];
776
uint8_t scope;
777
size_t size;
778
779
if (svar == NULL || (size = svar->dtsv_size) == 0)
780
continue;
781
782
scope = svar->dtsv_var.dtdv_scope;
783
784
/*
785
* We verify that our size is valid in the spirit of providing
786
* defense in depth: we want to prevent attackers from using
787
* DTrace to escalate an orthogonal kernel heap corruption bug
788
* into the ability to store to arbitrary locations in memory.
789
*/
790
VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
791
(scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
792
793
if (DTRACE_INRANGE(addr, sz, svar->dtsv_data,
794
svar->dtsv_size)) {
795
DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
796
svar->dtsv_size);
797
return (1);
798
}
799
}
800
801
return (0);
802
}
803
804
/*
805
* Check to see if the address is within a memory region to which a store may
806
* be issued. This includes the DTrace scratch areas, and any DTrace variable
807
* region. The caller of dtrace_canstore() is responsible for performing any
808
* alignment checks that are needed before stores are actually executed.
809
*/
810
static int
811
dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
812
dtrace_vstate_t *vstate)
813
{
814
return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
815
}
816
817
/*
818
* Implementation of dtrace_canstore which communicates the upper bound of the
819
* allowed memory region.
820
*/
821
static int
822
dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
823
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
824
{
825
/*
826
* First, check to see if the address is in scratch space...
827
*/
828
if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
829
mstate->dtms_scratch_size)) {
830
DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
831
mstate->dtms_scratch_size);
832
return (1);
833
}
834
835
/*
836
* Now check to see if it's a dynamic variable. This check will pick
837
* up both thread-local variables and any global dynamically-allocated
838
* variables.
839
*/
840
if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
841
vstate->dtvs_dynvars.dtds_size)) {
842
dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
843
uintptr_t base = (uintptr_t)dstate->dtds_base +
844
(dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
845
uintptr_t chunkoffs;
846
dtrace_dynvar_t *dvar;
847
848
/*
849
* Before we assume that we can store here, we need to make
850
* sure that it isn't in our metadata -- storing to our
851
* dynamic variable metadata would corrupt our state. For
852
* the range to not include any dynamic variable metadata,
853
* it must:
854
*
855
* (1) Start above the hash table that is at the base of
856
* the dynamic variable space
857
*
858
* (2) Have a starting chunk offset that is beyond the
859
* dtrace_dynvar_t that is at the base of every chunk
860
*
861
* (3) Not span a chunk boundary
862
*
863
* (4) Not be in the tuple space of a dynamic variable
864
*
865
*/
866
if (addr < base)
867
return (0);
868
869
chunkoffs = (addr - base) % dstate->dtds_chunksize;
870
871
if (chunkoffs < sizeof (dtrace_dynvar_t))
872
return (0);
873
874
if (chunkoffs + sz > dstate->dtds_chunksize)
875
return (0);
876
877
dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
878
879
if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
880
return (0);
881
882
if (chunkoffs < sizeof (dtrace_dynvar_t) +
883
((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
884
return (0);
885
886
DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize);
887
return (1);
888
}
889
890
/*
891
* Finally, check the static local and global variables. These checks
892
* take the longest, so we perform them last.
893
*/
894
if (dtrace_canstore_statvar(addr, sz, remain,
895
vstate->dtvs_locals, vstate->dtvs_nlocals))
896
return (1);
897
898
if (dtrace_canstore_statvar(addr, sz, remain,
899
vstate->dtvs_globals, vstate->dtvs_nglobals))
900
return (1);
901
902
return (0);
903
}
904
905
906
/*
907
* Convenience routine to check to see if the address is within a memory
908
* region in which a load may be issued given the user's privilege level;
909
* if not, it sets the appropriate error flags and loads 'addr' into the
910
* illegal value slot.
911
*
912
* DTrace subroutines (DIF_SUBR_*) should use this helper to implement
913
* appropriate memory access protection.
914
*/
915
static int
916
dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
917
dtrace_vstate_t *vstate)
918
{
919
return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
920
}
921
922
/*
923
* Implementation of dtrace_canload which communicates the uppoer bound of the
924
* allowed memory region.
925
*/
926
static int
927
dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
928
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
929
{
930
volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
931
file_t *fp;
932
933
/*
934
* If we hold the privilege to read from kernel memory, then
935
* everything is readable.
936
*/
937
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
938
DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
939
return (1);
940
}
941
942
/*
943
* You can obviously read that which you can store.
944
*/
945
if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
946
return (1);
947
948
/*
949
* We're allowed to read from our own string table.
950
*/
951
if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
952
mstate->dtms_difo->dtdo_strlen)) {
953
DTRACE_RANGE_REMAIN(remain, addr,
954
mstate->dtms_difo->dtdo_strtab,
955
mstate->dtms_difo->dtdo_strlen);
956
return (1);
957
}
958
959
if (vstate->dtvs_state != NULL &&
960
dtrace_priv_proc(vstate->dtvs_state)) {
961
proc_t *p;
962
963
/*
964
* When we have privileges to the current process, there are
965
* several context-related kernel structures that are safe to
966
* read, even absent the privilege to read from kernel memory.
967
* These reads are safe because these structures contain only
968
* state that (1) we're permitted to read, (2) is harmless or
969
* (3) contains pointers to additional kernel state that we're
970
* not permitted to read (and as such, do not present an
971
* opportunity for privilege escalation). Finally (and
972
* critically), because of the nature of their relation with
973
* the current thread context, the memory associated with these
974
* structures cannot change over the duration of probe context,
975
* and it is therefore impossible for this memory to be
976
* deallocated and reallocated as something else while it's
977
* being operated upon.
978
*/
979
if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) {
980
DTRACE_RANGE_REMAIN(remain, addr, curthread,
981
sizeof (kthread_t));
982
return (1);
983
}
984
985
if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
986
sz, curthread->t_procp, sizeof (proc_t))) {
987
DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp,
988
sizeof (proc_t));
989
return (1);
990
}
991
992
if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
993
curthread->t_cred, sizeof (cred_t))) {
994
DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred,
995
sizeof (cred_t));
996
return (1);
997
}
998
999
#ifdef illumos
1000
if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
1001
&(p->p_pidp->pid_id), sizeof (pid_t))) {
1002
DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id),
1003
sizeof (pid_t));
1004
return (1);
1005
}
1006
1007
if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
1008
curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
1009
DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu,
1010
offsetof(cpu_t, cpu_pause_thread));
1011
return (1);
1012
}
1013
#endif
1014
}
1015
1016
if ((fp = mstate->dtms_getf) != NULL) {
1017
uintptr_t psz = sizeof (void *);
1018
vnode_t *vp;
1019
vnodeops_t *op;
1020
1021
/*
1022
* When getf() returns a file_t, the enabling is implicitly
1023
* granted the (transient) right to read the returned file_t
1024
* as well as the v_path and v_op->vnop_name of the underlying
1025
* vnode. These accesses are allowed after a successful
1026
* getf() because the members that they refer to cannot change
1027
* once set -- and the barrier logic in the kernel's closef()
1028
* path assures that the file_t and its referenced vode_t
1029
* cannot themselves be stale (that is, it impossible for
1030
* either dtms_getf itself or its f_vnode member to reference
1031
* freed memory).
1032
*/
1033
if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) {
1034
DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t));
1035
return (1);
1036
}
1037
1038
if ((vp = fp->f_vnode) != NULL) {
1039
size_t slen;
1040
#ifdef illumos
1041
if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) {
1042
DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path,
1043
psz);
1044
return (1);
1045
}
1046
slen = strlen(vp->v_path) + 1;
1047
if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) {
1048
DTRACE_RANGE_REMAIN(remain, addr, vp->v_path,
1049
slen);
1050
return (1);
1051
}
1052
#endif
1053
1054
if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) {
1055
DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op,
1056
psz);
1057
return (1);
1058
}
1059
1060
#ifdef illumos
1061
if ((op = vp->v_op) != NULL &&
1062
DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
1063
DTRACE_RANGE_REMAIN(remain, addr,
1064
&op->vnop_name, psz);
1065
return (1);
1066
}
1067
1068
if (op != NULL && op->vnop_name != NULL &&
1069
DTRACE_INRANGE(addr, sz, op->vnop_name,
1070
(slen = strlen(op->vnop_name) + 1))) {
1071
DTRACE_RANGE_REMAIN(remain, addr,
1072
op->vnop_name, slen);
1073
return (1);
1074
}
1075
#endif
1076
}
1077
}
1078
1079
DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
1080
*illval = addr;
1081
return (0);
1082
}
1083
1084
/*
1085
* Convenience routine to check to see if a given string is within a memory
1086
* region in which a load may be issued given the user's privilege level;
1087
* this exists so that we don't need to issue unnecessary dtrace_strlen()
1088
* calls in the event that the user has all privileges.
1089
*/
1090
static int
1091
dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
1092
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1093
{
1094
size_t rsize;
1095
1096
/*
1097
* If we hold the privilege to read from kernel memory, then
1098
* everything is readable.
1099
*/
1100
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1101
DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
1102
return (1);
1103
}
1104
1105
/*
1106
* Even if the caller is uninterested in querying the remaining valid
1107
* range, it is required to ensure that the access is allowed.
1108
*/
1109
if (remain == NULL) {
1110
remain = &rsize;
1111
}
1112
if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
1113
size_t strsz;
1114
/*
1115
* Perform the strlen after determining the length of the
1116
* memory region which is accessible. This prevents timing
1117
* information from being used to find NULs in memory which is
1118
* not accessible to the caller.
1119
*/
1120
strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
1121
MIN(sz, *remain));
1122
if (strsz <= *remain) {
1123
return (1);
1124
}
1125
}
1126
1127
return (0);
1128
}
1129
1130
/*
1131
* Convenience routine to check to see if a given variable is within a memory
1132
* region in which a load may be issued given the user's privilege level.
1133
*/
1134
static int
1135
dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
1136
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1137
{
1138
size_t sz;
1139
ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1140
1141
/*
1142
* Calculate the max size before performing any checks since even
1143
* DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
1144
* return the max length via 'remain'.
1145
*/
1146
if (type->dtdt_kind == DIF_TYPE_STRING) {
1147
dtrace_state_t *state = vstate->dtvs_state;
1148
1149
if (state != NULL) {
1150
sz = state->dts_options[DTRACEOPT_STRSIZE];
1151
} else {
1152
/*
1153
* In helper context, we have a NULL state; fall back
1154
* to using the system-wide default for the string size
1155
* in this case.
1156
*/
1157
sz = dtrace_strsize_default;
1158
}
1159
} else {
1160
sz = type->dtdt_size;
1161
}
1162
1163
/*
1164
* If we hold the privilege to read from kernel memory, then
1165
* everything is readable.
1166
*/
1167
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1168
DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
1169
return (1);
1170
}
1171
1172
if (type->dtdt_kind == DIF_TYPE_STRING) {
1173
return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
1174
vstate));
1175
}
1176
return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
1177
vstate));
1178
}
1179
1180
/*
1181
* Convert a string to a signed integer using safe loads.
1182
*
1183
* NOTE: This function uses various macros from strtolctype.h to manipulate
1184
* digit values, etc -- these have all been checked to ensure they make
1185
* no additional function calls.
1186
*/
1187
static int64_t
1188
dtrace_strtoll(char *input, int base, size_t limit)
1189
{
1190
uintptr_t pos = (uintptr_t)input;
1191
int64_t val = 0;
1192
int x;
1193
boolean_t neg = B_FALSE;
1194
char c, cc, ccc;
1195
uintptr_t end = pos + limit;
1196
1197
/*
1198
* Consume any whitespace preceding digits.
1199
*/
1200
while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
1201
pos++;
1202
1203
/*
1204
* Handle an explicit sign if one is present.
1205
*/
1206
if (c == '-' || c == '+') {
1207
if (c == '-')
1208
neg = B_TRUE;
1209
c = dtrace_load8(++pos);
1210
}
1211
1212
/*
1213
* Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
1214
* if present.
1215
*/
1216
if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
1217
cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
1218
pos += 2;
1219
c = ccc;
1220
}
1221
1222
/*
1223
* Read in contiguous digits until the first non-digit character.
1224
*/
1225
for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
1226
c = dtrace_load8(++pos))
1227
val = val * base + x;
1228
1229
return (neg ? -val : val);
1230
}
1231
1232
/*
1233
* Compare two strings using safe loads.
1234
*/
1235
static int
1236
dtrace_strncmp(char *s1, char *s2, size_t limit)
1237
{
1238
uint8_t c1, c2;
1239
volatile uint16_t *flags;
1240
1241
if (s1 == s2 || limit == 0)
1242
return (0);
1243
1244
flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
1245
1246
do {
1247
if (s1 == NULL) {
1248
c1 = '\0';
1249
} else {
1250
c1 = dtrace_load8((uintptr_t)s1++);
1251
}
1252
1253
if (s2 == NULL) {
1254
c2 = '\0';
1255
} else {
1256
c2 = dtrace_load8((uintptr_t)s2++);
1257
}
1258
1259
if (c1 != c2)
1260
return (c1 - c2);
1261
} while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
1262
1263
return (0);
1264
}
1265
1266
/*
1267
* Compute strlen(s) for a string using safe memory accesses. The additional
1268
* len parameter is used to specify a maximum length to ensure completion.
1269
*/
1270
static size_t
1271
dtrace_strlen(const char *s, size_t lim)
1272
{
1273
uint_t len;
1274
1275
for (len = 0; len != lim; len++) {
1276
if (dtrace_load8((uintptr_t)s++) == '\0')
1277
break;
1278
}
1279
1280
return (len);
1281
}
1282
1283
/*
1284
* Check if an address falls within a toxic region.
1285
*/
1286
static int
1287
dtrace_istoxic(uintptr_t kaddr, size_t size)
1288
{
1289
uintptr_t taddr, tsize;
1290
int i;
1291
1292
for (i = 0; i < dtrace_toxranges; i++) {
1293
taddr = dtrace_toxrange[i].dtt_base;
1294
tsize = dtrace_toxrange[i].dtt_limit - taddr;
1295
1296
if (kaddr - taddr < tsize) {
1297
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1298
cpu_core[curcpu].cpuc_dtrace_illval = kaddr;
1299
return (1);
1300
}
1301
1302
if (taddr - kaddr < size) {
1303
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1304
cpu_core[curcpu].cpuc_dtrace_illval = taddr;
1305
return (1);
1306
}
1307
}
1308
1309
return (0);
1310
}
1311
1312
/*
1313
* Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1314
* memory specified by the DIF program. The dst is assumed to be safe memory
1315
* that we can store to directly because it is managed by DTrace. As with
1316
* standard bcopy, overlapping copies are handled properly.
1317
*/
1318
static void
1319
dtrace_bcopy(const void *src, void *dst, size_t len)
1320
{
1321
if (len != 0) {
1322
uint8_t *s1 = dst;
1323
const uint8_t *s2 = src;
1324
1325
if (s1 <= s2) {
1326
do {
1327
*s1++ = dtrace_load8((uintptr_t)s2++);
1328
} while (--len != 0);
1329
} else {
1330
s2 += len;
1331
s1 += len;
1332
1333
do {
1334
*--s1 = dtrace_load8((uintptr_t)--s2);
1335
} while (--len != 0);
1336
}
1337
}
1338
}
1339
1340
/*
1341
* Copy src to dst using safe memory accesses, up to either the specified
1342
* length, or the point that a nul byte is encountered. The src is assumed to
1343
* be unsafe memory specified by the DIF program. The dst is assumed to be
1344
* safe memory that we can store to directly because it is managed by DTrace.
1345
* Unlike dtrace_bcopy(), overlapping regions are not handled.
1346
*/
1347
static void
1348
dtrace_strcpy(const void *src, void *dst, size_t len)
1349
{
1350
if (len != 0) {
1351
uint8_t *s1 = dst, c;
1352
const uint8_t *s2 = src;
1353
1354
do {
1355
*s1++ = c = dtrace_load8((uintptr_t)s2++);
1356
} while (--len != 0 && c != '\0');
1357
}
1358
}
1359
1360
/*
1361
* Copy src to dst, deriving the size and type from the specified (BYREF)
1362
* variable type. The src is assumed to be unsafe memory specified by the DIF
1363
* program. The dst is assumed to be DTrace variable memory that is of the
1364
* specified type; we assume that we can store to directly.
1365
*/
1366
static void
1367
dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
1368
{
1369
ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1370
1371
if (type->dtdt_kind == DIF_TYPE_STRING) {
1372
dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
1373
} else {
1374
dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
1375
}
1376
}
1377
1378
/*
1379
* Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1380
* unsafe memory specified by the DIF program. The s2 data is assumed to be
1381
* safe memory that we can access directly because it is managed by DTrace.
1382
*/
1383
static int
1384
dtrace_bcmp(const void *s1, const void *s2, size_t len)
1385
{
1386
volatile uint16_t *flags;
1387
1388
flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
1389
1390
if (s1 == s2)
1391
return (0);
1392
1393
if (s1 == NULL || s2 == NULL)
1394
return (1);
1395
1396
if (s1 != s2 && len != 0) {
1397
const uint8_t *ps1 = s1;
1398
const uint8_t *ps2 = s2;
1399
1400
do {
1401
if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
1402
return (1);
1403
} while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
1404
}
1405
return (0);
1406
}
1407
1408
/*
1409
* Zero the specified region using a simple byte-by-byte loop. Note that this
1410
* is for safe DTrace-managed memory only.
1411
*/
1412
static void
1413
dtrace_bzero(void *dst, size_t len)
1414
{
1415
uchar_t *cp;
1416
1417
for (cp = dst; len != 0; len--)
1418
*cp++ = 0;
1419
}
1420
1421
static void
1422
dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
1423
{
1424
uint64_t result[2];
1425
1426
result[0] = addend1[0] + addend2[0];
1427
result[1] = addend1[1] + addend2[1] +
1428
(result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
1429
1430
sum[0] = result[0];
1431
sum[1] = result[1];
1432
}
1433
1434
/*
1435
* Shift the 128-bit value in a by b. If b is positive, shift left.
1436
* If b is negative, shift right.
1437
*/
1438
static void
1439
dtrace_shift_128(uint64_t *a, int b)
1440
{
1441
uint64_t mask;
1442
1443
if (b == 0)
1444
return;
1445
1446
if (b < 0) {
1447
b = -b;
1448
if (b >= 64) {
1449
a[0] = a[1] >> (b - 64);
1450
a[1] = 0;
1451
} else {
1452
a[0] >>= b;
1453
mask = 1LL << (64 - b);
1454
mask -= 1;
1455
a[0] |= ((a[1] & mask) << (64 - b));
1456
a[1] >>= b;
1457
}
1458
} else {
1459
if (b >= 64) {
1460
a[1] = a[0] << (b - 64);
1461
a[0] = 0;
1462
} else {
1463
a[1] <<= b;
1464
mask = a[0] >> (64 - b);
1465
a[1] |= mask;
1466
a[0] <<= b;
1467
}
1468
}
1469
}
1470
1471
/*
1472
* The basic idea is to break the 2 64-bit values into 4 32-bit values,
1473
* use native multiplication on those, and then re-combine into the
1474
* resulting 128-bit value.
1475
*
1476
* (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1477
* hi1 * hi2 << 64 +
1478
* hi1 * lo2 << 32 +
1479
* hi2 * lo1 << 32 +
1480
* lo1 * lo2
1481
*/
1482
static void
1483
dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1484
{
1485
uint64_t hi1, hi2, lo1, lo2;
1486
uint64_t tmp[2];
1487
1488
hi1 = factor1 >> 32;
1489
hi2 = factor2 >> 32;
1490
1491
lo1 = factor1 & DT_MASK_LO;
1492
lo2 = factor2 & DT_MASK_LO;
1493
1494
product[0] = lo1 * lo2;
1495
product[1] = hi1 * hi2;
1496
1497
tmp[0] = hi1 * lo2;
1498
tmp[1] = 0;
1499
dtrace_shift_128(tmp, 32);
1500
dtrace_add_128(product, tmp, product);
1501
1502
tmp[0] = hi2 * lo1;
1503
tmp[1] = 0;
1504
dtrace_shift_128(tmp, 32);
1505
dtrace_add_128(product, tmp, product);
1506
}
1507
1508
/*
1509
* This privilege check should be used by actions and subroutines to
1510
* verify that the user credentials of the process that enabled the
1511
* invoking ECB match the target credentials
1512
*/
1513
static int
1514
dtrace_priv_proc_common_user(dtrace_state_t *state)
1515
{
1516
cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1517
1518
/*
1519
* We should always have a non-NULL state cred here, since if cred
1520
* is null (anonymous tracing), we fast-path bypass this routine.
1521
*/
1522
ASSERT(s_cr != NULL);
1523
1524
if ((cr = CRED()) != NULL &&
1525
s_cr->cr_uid == cr->cr_uid &&
1526
s_cr->cr_uid == cr->cr_ruid &&
1527
s_cr->cr_uid == cr->cr_suid &&
1528
s_cr->cr_gid == cr->cr_gid &&
1529
s_cr->cr_gid == cr->cr_rgid &&
1530
s_cr->cr_gid == cr->cr_sgid)
1531
return (1);
1532
1533
return (0);
1534
}
1535
1536
/*
1537
* This privilege check should be used by actions and subroutines to
1538
* verify that the zone of the process that enabled the invoking ECB
1539
* matches the target credentials
1540
*/
1541
static int
1542
dtrace_priv_proc_common_zone(dtrace_state_t *state)
1543
{
1544
#ifdef illumos
1545
cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1546
1547
/*
1548
* We should always have a non-NULL state cred here, since if cred
1549
* is null (anonymous tracing), we fast-path bypass this routine.
1550
*/
1551
ASSERT(s_cr != NULL);
1552
1553
if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
1554
return (1);
1555
1556
return (0);
1557
#else
1558
return (1);
1559
#endif
1560
}
1561
1562
/*
1563
* This privilege check should be used by actions and subroutines to
1564
* verify that the process has not setuid or changed credentials.
1565
*/
1566
static int
1567
dtrace_priv_proc_common_nocd(void)
1568
{
1569
proc_t *proc;
1570
1571
if ((proc = ttoproc(curthread)) != NULL &&
1572
!(proc->p_flag & SNOCD))
1573
return (1);
1574
1575
return (0);
1576
}
1577
1578
static int
1579
dtrace_priv_proc_destructive(dtrace_state_t *state)
1580
{
1581
int action = state->dts_cred.dcr_action;
1582
1583
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1584
dtrace_priv_proc_common_zone(state) == 0)
1585
goto bad;
1586
1587
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1588
dtrace_priv_proc_common_user(state) == 0)
1589
goto bad;
1590
1591
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1592
dtrace_priv_proc_common_nocd() == 0)
1593
goto bad;
1594
1595
return (1);
1596
1597
bad:
1598
cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1599
1600
return (0);
1601
}
1602
1603
static int
1604
dtrace_priv_proc_control(dtrace_state_t *state)
1605
{
1606
if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1607
return (1);
1608
1609
if (dtrace_priv_proc_common_zone(state) &&
1610
dtrace_priv_proc_common_user(state) &&
1611
dtrace_priv_proc_common_nocd())
1612
return (1);
1613
1614
cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1615
1616
return (0);
1617
}
1618
1619
static int
1620
dtrace_priv_proc(dtrace_state_t *state)
1621
{
1622
if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
1623
return (1);
1624
1625
cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1626
1627
return (0);
1628
}
1629
1630
static int
1631
dtrace_priv_kernel(dtrace_state_t *state)
1632
{
1633
if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1634
return (1);
1635
1636
cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1637
1638
return (0);
1639
}
1640
1641
static int
1642
dtrace_priv_kernel_destructive(dtrace_state_t *state)
1643
{
1644
if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1645
return (1);
1646
1647
cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1648
1649
return (0);
1650
}
1651
1652
/*
1653
* Determine if the dte_cond of the specified ECB allows for processing of
1654
* the current probe to continue. Note that this routine may allow continued
1655
* processing, but with access(es) stripped from the mstate's dtms_access
1656
* field.
1657
*/
1658
static int
1659
dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
1660
dtrace_ecb_t *ecb)
1661
{
1662
dtrace_probe_t *probe = ecb->dte_probe;
1663
dtrace_provider_t *prov = probe->dtpr_provider;
1664
dtrace_pops_t *pops = &prov->dtpv_pops;
1665
int mode = DTRACE_MODE_NOPRIV_DROP;
1666
1667
ASSERT(ecb->dte_cond);
1668
1669
#ifdef illumos
1670
if (pops->dtps_mode != NULL) {
1671
mode = pops->dtps_mode(prov->dtpv_arg,
1672
probe->dtpr_id, probe->dtpr_arg);
1673
1674
ASSERT((mode & DTRACE_MODE_USER) ||
1675
(mode & DTRACE_MODE_KERNEL));
1676
ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
1677
(mode & DTRACE_MODE_NOPRIV_DROP));
1678
}
1679
1680
/*
1681
* If the dte_cond bits indicate that this consumer is only allowed to
1682
* see user-mode firings of this probe, call the provider's dtps_mode()
1683
* entry point to check that the probe was fired while in a user
1684
* context. If that's not the case, use the policy specified by the
1685
* provider to determine if we drop the probe or merely restrict
1686
* operation.
1687
*/
1688
if (ecb->dte_cond & DTRACE_COND_USERMODE) {
1689
ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
1690
1691
if (!(mode & DTRACE_MODE_USER)) {
1692
if (mode & DTRACE_MODE_NOPRIV_DROP)
1693
return (0);
1694
1695
mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
1696
}
1697
}
1698
#endif
1699
1700
/*
1701
* This is more subtle than it looks. We have to be absolutely certain
1702
* that CRED() isn't going to change out from under us so it's only
1703
* legit to examine that structure if we're in constrained situations.
1704
* Currently, the only times we'll this check is if a non-super-user
1705
* has enabled the profile or syscall providers -- providers that
1706
* allow visibility of all processes. For the profile case, the check
1707
* above will ensure that we're examining a user context.
1708
*/
1709
if (ecb->dte_cond & DTRACE_COND_OWNER) {
1710
cred_t *cr;
1711
cred_t *s_cr = state->dts_cred.dcr_cred;
1712
proc_t *proc;
1713
1714
ASSERT(s_cr != NULL);
1715
1716
if ((cr = CRED()) == NULL ||
1717
s_cr->cr_uid != cr->cr_uid ||
1718
s_cr->cr_uid != cr->cr_ruid ||
1719
s_cr->cr_uid != cr->cr_suid ||
1720
s_cr->cr_gid != cr->cr_gid ||
1721
s_cr->cr_gid != cr->cr_rgid ||
1722
s_cr->cr_gid != cr->cr_sgid ||
1723
(proc = ttoproc(curthread)) == NULL ||
1724
(proc->p_flag & SNOCD)) {
1725
if (mode & DTRACE_MODE_NOPRIV_DROP)
1726
return (0);
1727
1728
#ifdef illumos
1729
mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
1730
#endif
1731
}
1732
}
1733
1734
#ifdef illumos
1735
/*
1736
* If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
1737
* in our zone, check to see if our mode policy is to restrict rather
1738
* than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
1739
* and DTRACE_ACCESS_ARGS
1740
*/
1741
if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
1742
cred_t *cr;
1743
cred_t *s_cr = state->dts_cred.dcr_cred;
1744
1745
ASSERT(s_cr != NULL);
1746
1747
if ((cr = CRED()) == NULL ||
1748
s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
1749
if (mode & DTRACE_MODE_NOPRIV_DROP)
1750
return (0);
1751
1752
mstate->dtms_access &=
1753
~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
1754
}
1755
}
1756
#endif
1757
1758
return (1);
1759
}
1760
1761
/*
1762
* Note: not called from probe context. This function is called
1763
* asynchronously (and at a regular interval) from outside of probe context to
1764
* clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1765
* cleaning is explained in detail in <sys/dtrace_impl.h>.
1766
*/
1767
void
1768
dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1769
{
1770
dtrace_dynvar_t *dirty;
1771
dtrace_dstate_percpu_t *dcpu;
1772
dtrace_dynvar_t **rinsep;
1773
int i, j, work = 0;
1774
1775
CPU_FOREACH(i) {
1776
dcpu = &dstate->dtds_percpu[i];
1777
rinsep = &dcpu->dtdsc_rinsing;
1778
1779
/*
1780
* If the dirty list is NULL, there is no dirty work to do.
1781
*/
1782
if (dcpu->dtdsc_dirty == NULL)
1783
continue;
1784
1785
if (dcpu->dtdsc_rinsing != NULL) {
1786
/*
1787
* If the rinsing list is non-NULL, then it is because
1788
* this CPU was selected to accept another CPU's
1789
* dirty list -- and since that time, dirty buffers
1790
* have accumulated. This is a highly unlikely
1791
* condition, but we choose to ignore the dirty
1792
* buffers -- they'll be picked up a future cleanse.
1793
*/
1794
continue;
1795
}
1796
1797
if (dcpu->dtdsc_clean != NULL) {
1798
/*
1799
* If the clean list is non-NULL, then we're in a
1800
* situation where a CPU has done deallocations (we
1801
* have a non-NULL dirty list) but no allocations (we
1802
* also have a non-NULL clean list). We can't simply
1803
* move the dirty list into the clean list on this
1804
* CPU, yet we also don't want to allow this condition
1805
* to persist, lest a short clean list prevent a
1806
* massive dirty list from being cleaned (which in
1807
* turn could lead to otherwise avoidable dynamic
1808
* drops). To deal with this, we look for some CPU
1809
* with a NULL clean list, NULL dirty list, and NULL
1810
* rinsing list -- and then we borrow this CPU to
1811
* rinse our dirty list.
1812
*/
1813
CPU_FOREACH(j) {
1814
dtrace_dstate_percpu_t *rinser;
1815
1816
rinser = &dstate->dtds_percpu[j];
1817
1818
if (rinser->dtdsc_rinsing != NULL)
1819
continue;
1820
1821
if (rinser->dtdsc_dirty != NULL)
1822
continue;
1823
1824
if (rinser->dtdsc_clean != NULL)
1825
continue;
1826
1827
rinsep = &rinser->dtdsc_rinsing;
1828
break;
1829
}
1830
1831
if (j > mp_maxid) {
1832
/*
1833
* We were unable to find another CPU that
1834
* could accept this dirty list -- we are
1835
* therefore unable to clean it now.
1836
*/
1837
dtrace_dynvar_failclean++;
1838
continue;
1839
}
1840
}
1841
1842
work = 1;
1843
1844
/*
1845
* Atomically move the dirty list aside.
1846
*/
1847
do {
1848
dirty = dcpu->dtdsc_dirty;
1849
1850
/*
1851
* Before we zap the dirty list, set the rinsing list.
1852
* (This allows for a potential assertion in
1853
* dtrace_dynvar(): if a free dynamic variable appears
1854
* on a hash chain, either the dirty list or the
1855
* rinsing list for some CPU must be non-NULL.)
1856
*/
1857
*rinsep = dirty;
1858
dtrace_membar_producer();
1859
} while (dtrace_casptr(&dcpu->dtdsc_dirty,
1860
dirty, NULL) != dirty);
1861
}
1862
1863
if (!work) {
1864
/*
1865
* We have no work to do; we can simply return.
1866
*/
1867
return;
1868
}
1869
1870
dtrace_sync();
1871
1872
CPU_FOREACH(i) {
1873
dcpu = &dstate->dtds_percpu[i];
1874
1875
if (dcpu->dtdsc_rinsing == NULL)
1876
continue;
1877
1878
/*
1879
* We are now guaranteed that no hash chain contains a pointer
1880
* into this dirty list; we can make it clean.
1881
*/
1882
ASSERT(dcpu->dtdsc_clean == NULL);
1883
dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1884
dcpu->dtdsc_rinsing = NULL;
1885
}
1886
1887
/*
1888
* Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1889
* sure that all CPUs have seen all of the dtdsc_clean pointers.
1890
* This prevents a race whereby a CPU incorrectly decides that
1891
* the state should be something other than DTRACE_DSTATE_CLEAN
1892
* after dtrace_dynvar_clean() has completed.
1893
*/
1894
dtrace_sync();
1895
1896
dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1897
}
1898
1899
/*
1900
* Depending on the value of the op parameter, this function looks-up,
1901
* allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1902
* allocation is requested, this function will return a pointer to a
1903
* dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1904
* variable can be allocated. If NULL is returned, the appropriate counter
1905
* will be incremented.
1906
*/
1907
dtrace_dynvar_t *
1908
dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
1909
dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1910
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
1911
{
1912
uint64_t hashval = DTRACE_DYNHASH_VALID;
1913
dtrace_dynhash_t *hash = dstate->dtds_hash;
1914
dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1915
processorid_t me = curcpu, cpu = me;
1916
dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1917
size_t bucket, ksize;
1918
size_t chunksize = dstate->dtds_chunksize;
1919
uintptr_t kdata, lock, nstate;
1920
uint_t i;
1921
1922
ASSERT(nkeys != 0);
1923
1924
/*
1925
* Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1926
* algorithm. For the by-value portions, we perform the algorithm in
1927
* 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1928
* bit, and seems to have only a minute effect on distribution. For
1929
* the by-reference data, we perform "One-at-a-time" iterating (safely)
1930
* over each referenced byte. It's painful to do this, but it's much
1931
* better than pathological hash distribution. The efficacy of the
1932
* hashing algorithm (and a comparison with other algorithms) may be
1933
* found by running the ::dtrace_dynstat MDB dcmd.
1934
*/
1935
for (i = 0; i < nkeys; i++) {
1936
if (key[i].dttk_size == 0) {
1937
uint64_t val = key[i].dttk_value;
1938
1939
hashval += (val >> 48) & 0xffff;
1940
hashval += (hashval << 10);
1941
hashval ^= (hashval >> 6);
1942
1943
hashval += (val >> 32) & 0xffff;
1944
hashval += (hashval << 10);
1945
hashval ^= (hashval >> 6);
1946
1947
hashval += (val >> 16) & 0xffff;
1948
hashval += (hashval << 10);
1949
hashval ^= (hashval >> 6);
1950
1951
hashval += val & 0xffff;
1952
hashval += (hashval << 10);
1953
hashval ^= (hashval >> 6);
1954
} else {
1955
/*
1956
* This is incredibly painful, but it beats the hell
1957
* out of the alternative.
1958
*/
1959
uint64_t j, size = key[i].dttk_size;
1960
uintptr_t base = (uintptr_t)key[i].dttk_value;
1961
1962
if (!dtrace_canload(base, size, mstate, vstate))
1963
break;
1964
1965
for (j = 0; j < size; j++) {
1966
hashval += dtrace_load8(base + j);
1967
hashval += (hashval << 10);
1968
hashval ^= (hashval >> 6);
1969
}
1970
}
1971
}
1972
1973
if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1974
return (NULL);
1975
1976
hashval += (hashval << 3);
1977
hashval ^= (hashval >> 11);
1978
hashval += (hashval << 15);
1979
1980
/*
1981
* There is a remote chance (ideally, 1 in 2^31) that our hashval
1982
* comes out to be one of our two sentinel hash values. If this
1983
* actually happens, we set the hashval to be a value known to be a
1984
* non-sentinel value.
1985
*/
1986
if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1987
hashval = DTRACE_DYNHASH_VALID;
1988
1989
/*
1990
* Yes, it's painful to do a divide here. If the cycle count becomes
1991
* important here, tricks can be pulled to reduce it. (However, it's
1992
* critical that hash collisions be kept to an absolute minimum;
1993
* they're much more painful than a divide.) It's better to have a
1994
* solution that generates few collisions and still keeps things
1995
* relatively simple.
1996
*/
1997
bucket = hashval % dstate->dtds_hashsize;
1998
1999
if (op == DTRACE_DYNVAR_DEALLOC) {
2000
volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
2001
2002
for (;;) {
2003
while ((lock = *lockp) & 1)
2004
continue;
2005
2006
if (dtrace_casptr((volatile void *)lockp,
2007
(volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock)
2008
break;
2009
}
2010
2011
dtrace_membar_producer();
2012
}
2013
2014
top:
2015
prev = NULL;
2016
lock = hash[bucket].dtdh_lock;
2017
2018
dtrace_membar_consumer();
2019
2020
start = hash[bucket].dtdh_chain;
2021
ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
2022
start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
2023
op != DTRACE_DYNVAR_DEALLOC));
2024
2025
for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
2026
dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
2027
dtrace_key_t *dkey = &dtuple->dtt_key[0];
2028
2029
if (dvar->dtdv_hashval != hashval) {
2030
if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
2031
/*
2032
* We've reached the sink, and therefore the
2033
* end of the hash chain; we can kick out of
2034
* the loop knowing that we have seen a valid
2035
* snapshot of state.
2036
*/
2037
ASSERT(dvar->dtdv_next == NULL);
2038
ASSERT(dvar == &dtrace_dynhash_sink);
2039
break;
2040
}
2041
2042
if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
2043
/*
2044
* We've gone off the rails: somewhere along
2045
* the line, one of the members of this hash
2046
* chain was deleted. Note that we could also
2047
* detect this by simply letting this loop run
2048
* to completion, as we would eventually hit
2049
* the end of the dirty list. However, we
2050
* want to avoid running the length of the
2051
* dirty list unnecessarily (it might be quite
2052
* long), so we catch this as early as
2053
* possible by detecting the hash marker. In
2054
* this case, we simply set dvar to NULL and
2055
* break; the conditional after the loop will
2056
* send us back to top.
2057
*/
2058
dvar = NULL;
2059
break;
2060
}
2061
2062
goto next;
2063
}
2064
2065
if (dtuple->dtt_nkeys != nkeys)
2066
goto next;
2067
2068
for (i = 0; i < nkeys; i++, dkey++) {
2069
if (dkey->dttk_size != key[i].dttk_size)
2070
goto next; /* size or type mismatch */
2071
2072
if (dkey->dttk_size != 0) {
2073
if (dtrace_bcmp(
2074
(void *)(uintptr_t)key[i].dttk_value,
2075
(void *)(uintptr_t)dkey->dttk_value,
2076
dkey->dttk_size))
2077
goto next;
2078
} else {
2079
if (dkey->dttk_value != key[i].dttk_value)
2080
goto next;
2081
}
2082
}
2083
2084
if (op != DTRACE_DYNVAR_DEALLOC)
2085
return (dvar);
2086
2087
ASSERT(dvar->dtdv_next == NULL ||
2088
dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
2089
2090
if (prev != NULL) {
2091
ASSERT(hash[bucket].dtdh_chain != dvar);
2092
ASSERT(start != dvar);
2093
ASSERT(prev->dtdv_next == dvar);
2094
prev->dtdv_next = dvar->dtdv_next;
2095
} else {
2096
if (dtrace_casptr(&hash[bucket].dtdh_chain,
2097
start, dvar->dtdv_next) != start) {
2098
/*
2099
* We have failed to atomically swing the
2100
* hash table head pointer, presumably because
2101
* of a conflicting allocation on another CPU.
2102
* We need to reread the hash chain and try
2103
* again.
2104
*/
2105
goto top;
2106
}
2107
}
2108
2109
dtrace_membar_producer();
2110
2111
/*
2112
* Now set the hash value to indicate that it's free.
2113
*/
2114
ASSERT(hash[bucket].dtdh_chain != dvar);
2115
dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2116
2117
dtrace_membar_producer();
2118
2119
/*
2120
* Set the next pointer to point at the dirty list, and
2121
* atomically swing the dirty pointer to the newly freed dvar.
2122
*/
2123
do {
2124
next = dcpu->dtdsc_dirty;
2125
dvar->dtdv_next = next;
2126
} while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
2127
2128
/*
2129
* Finally, unlock this hash bucket.
2130
*/
2131
ASSERT(hash[bucket].dtdh_lock == lock);
2132
ASSERT(lock & 1);
2133
hash[bucket].dtdh_lock++;
2134
2135
return (NULL);
2136
next:
2137
prev = dvar;
2138
continue;
2139
}
2140
2141
if (dvar == NULL) {
2142
/*
2143
* If dvar is NULL, it is because we went off the rails:
2144
* one of the elements that we traversed in the hash chain
2145
* was deleted while we were traversing it. In this case,
2146
* we assert that we aren't doing a dealloc (deallocs lock
2147
* the hash bucket to prevent themselves from racing with
2148
* one another), and retry the hash chain traversal.
2149
*/
2150
ASSERT(op != DTRACE_DYNVAR_DEALLOC);
2151
goto top;
2152
}
2153
2154
if (op != DTRACE_DYNVAR_ALLOC) {
2155
/*
2156
* If we are not to allocate a new variable, we want to
2157
* return NULL now. Before we return, check that the value
2158
* of the lock word hasn't changed. If it has, we may have
2159
* seen an inconsistent snapshot.
2160
*/
2161
if (op == DTRACE_DYNVAR_NOALLOC) {
2162
if (hash[bucket].dtdh_lock != lock)
2163
goto top;
2164
} else {
2165
ASSERT(op == DTRACE_DYNVAR_DEALLOC);
2166
ASSERT(hash[bucket].dtdh_lock == lock);
2167
ASSERT(lock & 1);
2168
hash[bucket].dtdh_lock++;
2169
}
2170
2171
return (NULL);
2172
}
2173
2174
/*
2175
* We need to allocate a new dynamic variable. The size we need is the
2176
* size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
2177
* size of any auxiliary key data (rounded up to 8-byte alignment) plus
2178
* the size of any referred-to data (dsize). We then round the final
2179
* size up to the chunksize for allocation.
2180
*/
2181
for (ksize = 0, i = 0; i < nkeys; i++)
2182
ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
2183
2184
/*
2185
* This should be pretty much impossible, but could happen if, say,
2186
* strange DIF specified the tuple. Ideally, this should be an
2187
* assertion and not an error condition -- but that requires that the
2188
* chunksize calculation in dtrace_difo_chunksize() be absolutely
2189
* bullet-proof. (That is, it must not be able to be fooled by
2190
* malicious DIF.) Given the lack of backwards branches in DIF,
2191
* solving this would presumably not amount to solving the Halting
2192
* Problem -- but it still seems awfully hard.
2193
*/
2194
if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
2195
ksize + dsize > chunksize) {
2196
dcpu->dtdsc_drops++;
2197
return (NULL);
2198
}
2199
2200
nstate = DTRACE_DSTATE_EMPTY;
2201
2202
do {
2203
retry:
2204
free = dcpu->dtdsc_free;
2205
2206
if (free == NULL) {
2207
dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
2208
void *rval;
2209
2210
if (clean == NULL) {
2211
/*
2212
* We're out of dynamic variable space on
2213
* this CPU. Unless we have tried all CPUs,
2214
* we'll try to allocate from a different
2215
* CPU.
2216
*/
2217
switch (dstate->dtds_state) {
2218
case DTRACE_DSTATE_CLEAN: {
2219
void *sp = &dstate->dtds_state;
2220
2221
if (++cpu > mp_maxid)
2222
cpu = 0;
2223
2224
if (dcpu->dtdsc_dirty != NULL &&
2225
nstate == DTRACE_DSTATE_EMPTY)
2226
nstate = DTRACE_DSTATE_DIRTY;
2227
2228
if (dcpu->dtdsc_rinsing != NULL)
2229
nstate = DTRACE_DSTATE_RINSING;
2230
2231
dcpu = &dstate->dtds_percpu[cpu];
2232
2233
if (cpu != me)
2234
goto retry;
2235
2236
(void) dtrace_cas32(sp,
2237
DTRACE_DSTATE_CLEAN, nstate);
2238
2239
/*
2240
* To increment the correct bean
2241
* counter, take another lap.
2242
*/
2243
goto retry;
2244
}
2245
2246
case DTRACE_DSTATE_DIRTY:
2247
dcpu->dtdsc_dirty_drops++;
2248
break;
2249
2250
case DTRACE_DSTATE_RINSING:
2251
dcpu->dtdsc_rinsing_drops++;
2252
break;
2253
2254
case DTRACE_DSTATE_EMPTY:
2255
dcpu->dtdsc_drops++;
2256
break;
2257
}
2258
2259
DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
2260
return (NULL);
2261
}
2262
2263
/*
2264
* The clean list appears to be non-empty. We want to
2265
* move the clean list to the free list; we start by
2266
* moving the clean pointer aside.
2267
*/
2268
if (dtrace_casptr(&dcpu->dtdsc_clean,
2269
clean, NULL) != clean) {
2270
/*
2271
* We are in one of two situations:
2272
*
2273
* (a) The clean list was switched to the
2274
* free list by another CPU.
2275
*
2276
* (b) The clean list was added to by the
2277
* cleansing cyclic.
2278
*
2279
* In either of these situations, we can
2280
* just reattempt the free list allocation.
2281
*/
2282
goto retry;
2283
}
2284
2285
ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
2286
2287
/*
2288
* Now we'll move the clean list to our free list.
2289
* It's impossible for this to fail: the only way
2290
* the free list can be updated is through this
2291
* code path, and only one CPU can own the clean list.
2292
* Thus, it would only be possible for this to fail if
2293
* this code were racing with dtrace_dynvar_clean().
2294
* (That is, if dtrace_dynvar_clean() updated the clean
2295
* list, and we ended up racing to update the free
2296
* list.) This race is prevented by the dtrace_sync()
2297
* in dtrace_dynvar_clean() -- which flushes the
2298
* owners of the clean lists out before resetting
2299
* the clean lists.
2300
*/
2301
dcpu = &dstate->dtds_percpu[me];
2302
rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
2303
ASSERT(rval == NULL);
2304
goto retry;
2305
}
2306
2307
dvar = free;
2308
new_free = dvar->dtdv_next;
2309
} while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
2310
2311
/*
2312
* We have now allocated a new chunk. We copy the tuple keys into the
2313
* tuple array and copy any referenced key data into the data space
2314
* following the tuple array. As we do this, we relocate dttk_value
2315
* in the final tuple to point to the key data address in the chunk.
2316
*/
2317
kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
2318
dvar->dtdv_data = (void *)(kdata + ksize);
2319
dvar->dtdv_tuple.dtt_nkeys = nkeys;
2320
2321
for (i = 0; i < nkeys; i++) {
2322
dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
2323
size_t kesize = key[i].dttk_size;
2324
2325
if (kesize != 0) {
2326
dtrace_bcopy(
2327
(const void *)(uintptr_t)key[i].dttk_value,
2328
(void *)kdata, kesize);
2329
dkey->dttk_value = kdata;
2330
kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
2331
} else {
2332
dkey->dttk_value = key[i].dttk_value;
2333
}
2334
2335
dkey->dttk_size = kesize;
2336
}
2337
2338
ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
2339
dvar->dtdv_hashval = hashval;
2340
dvar->dtdv_next = start;
2341
2342
if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
2343
return (dvar);
2344
2345
/*
2346
* The cas has failed. Either another CPU is adding an element to
2347
* this hash chain, or another CPU is deleting an element from this
2348
* hash chain. The simplest way to deal with both of these cases
2349
* (though not necessarily the most efficient) is to free our
2350
* allocated block and re-attempt it all. Note that the free is
2351
* to the dirty list and _not_ to the free list. This is to prevent
2352
* races with allocators, above.
2353
*/
2354
dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2355
2356
dtrace_membar_producer();
2357
2358
do {
2359
free = dcpu->dtdsc_dirty;
2360
dvar->dtdv_next = free;
2361
} while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
2362
2363
goto top;
2364
}
2365
2366
/*ARGSUSED*/
2367
static void
2368
dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
2369
{
2370
if ((int64_t)nval < (int64_t)*oval)
2371
*oval = nval;
2372
}
2373
2374
/*ARGSUSED*/
2375
static void
2376
dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
2377
{
2378
if ((int64_t)nval > (int64_t)*oval)
2379
*oval = nval;
2380
}
2381
2382
static void
2383
dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
2384
{
2385
int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
2386
int64_t val = (int64_t)nval;
2387
2388
if (val < 0) {
2389
for (i = 0; i < zero; i++) {
2390
if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
2391
quanta[i] += incr;
2392
return;
2393
}
2394
}
2395
} else {
2396
for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
2397
if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
2398
quanta[i - 1] += incr;
2399
return;
2400
}
2401
}
2402
2403
quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
2404
return;
2405
}
2406
2407
ASSERT(0);
2408
}
2409
2410
static void
2411
dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
2412
{
2413
uint64_t arg = *lquanta++;
2414
int32_t base = DTRACE_LQUANTIZE_BASE(arg);
2415
uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
2416
uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
2417
int32_t val = (int32_t)nval, level;
2418
2419
ASSERT(step != 0);
2420
ASSERT(levels != 0);
2421
2422
if (val < base) {
2423
/*
2424
* This is an underflow.
2425
*/
2426
lquanta[0] += incr;
2427
return;
2428
}
2429
2430
level = (val - base) / step;
2431
2432
if (level < levels) {
2433
lquanta[level + 1] += incr;
2434
return;
2435
}
2436
2437
/*
2438
* This is an overflow.
2439
*/
2440
lquanta[levels + 1] += incr;
2441
}
2442
2443
static int
2444
dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
2445
uint16_t high, uint16_t nsteps, int64_t value)
2446
{
2447
int64_t this = 1, last, next;
2448
int base = 1, order;
2449
2450
ASSERT(factor <= nsteps);
2451
ASSERT(nsteps % factor == 0);
2452
2453
for (order = 0; order < low; order++)
2454
this *= factor;
2455
2456
/*
2457
* If our value is less than our factor taken to the power of the
2458
* low order of magnitude, it goes into the zeroth bucket.
2459
*/
2460
if (value < (last = this))
2461
return (0);
2462
2463
for (this *= factor; order <= high; order++) {
2464
int nbuckets = this > nsteps ? nsteps : this;
2465
2466
if ((next = this * factor) < this) {
2467
/*
2468
* We should not generally get log/linear quantizations
2469
* with a high magnitude that allows 64-bits to
2470
* overflow, but we nonetheless protect against this
2471
* by explicitly checking for overflow, and clamping
2472
* our value accordingly.
2473
*/
2474
value = this - 1;
2475
}
2476
2477
if (value < this) {
2478
/*
2479
* If our value lies within this order of magnitude,
2480
* determine its position by taking the offset within
2481
* the order of magnitude, dividing by the bucket
2482
* width, and adding to our (accumulated) base.
2483
*/
2484
return (base + (value - last) / (this / nbuckets));
2485
}
2486
2487
base += nbuckets - (nbuckets / factor);
2488
last = this;
2489
this = next;
2490
}
2491
2492
/*
2493
* Our value is greater than or equal to our factor taken to the
2494
* power of one plus the high magnitude -- return the top bucket.
2495
*/
2496
return (base);
2497
}
2498
2499
static void
2500
dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2501
{
2502
uint64_t arg = *llquanta++;
2503
uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2504
uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2505
uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
2506
uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
2507
2508
llquanta[dtrace_aggregate_llquantize_bucket(factor,
2509
low, high, nsteps, nval)] += incr;
2510
}
2511
2512
/*ARGSUSED*/
2513
static void
2514
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2515
{
2516
data[0]++;
2517
data[1] += nval;
2518
}
2519
2520
/*ARGSUSED*/
2521
static void
2522
dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2523
{
2524
int64_t snval = (int64_t)nval;
2525
uint64_t tmp[2];
2526
2527
data[0]++;
2528
data[1] += nval;
2529
2530
/*
2531
* What we want to say here is:
2532
*
2533
* data[2] += nval * nval;
2534
*
2535
* But given that nval is 64-bit, we could easily overflow, so
2536
* we do this as 128-bit arithmetic.
2537
*/
2538
if (snval < 0)
2539
snval = -snval;
2540
2541
dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2542
dtrace_add_128(data + 2, tmp, data + 2);
2543
}
2544
2545
/*ARGSUSED*/
2546
static void
2547
dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2548
{
2549
*oval = *oval + 1;
2550
}
2551
2552
/*ARGSUSED*/
2553
static void
2554
dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2555
{
2556
*oval += nval;
2557
}
2558
2559
/*
2560
* Aggregate given the tuple in the principal data buffer, and the aggregating
2561
* action denoted by the specified dtrace_aggregation_t. The aggregation
2562
* buffer is specified as the buf parameter. This routine does not return
2563
* failure; if there is no space in the aggregation buffer, the data will be
2564
* dropped, and a corresponding counter incremented.
2565
*/
2566
static void
2567
dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2568
intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2569
{
2570
dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2571
uint32_t i, ndx, size, fsize;
2572
uint32_t align = sizeof (uint64_t) - 1;
2573
dtrace_aggbuffer_t *agb;
2574
dtrace_aggkey_t *key;
2575
uint32_t hashval = 0, limit, isstr;
2576
caddr_t tomax, data, kdata;
2577
dtrace_actkind_t action;
2578
dtrace_action_t *act;
2579
size_t offs;
2580
2581
if (buf == NULL)
2582
return;
2583
2584
if (!agg->dtag_hasarg) {
2585
/*
2586
* Currently, only quantize() and lquantize() take additional
2587
* arguments, and they have the same semantics: an increment
2588
* value that defaults to 1 when not present. If additional
2589
* aggregating actions take arguments, the setting of the
2590
* default argument value will presumably have to become more
2591
* sophisticated...
2592
*/
2593
arg = 1;
2594
}
2595
2596
action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2597
size = rec->dtrd_offset - agg->dtag_base;
2598
fsize = size + rec->dtrd_size;
2599
2600
ASSERT(dbuf->dtb_tomax != NULL);
2601
data = dbuf->dtb_tomax + offset + agg->dtag_base;
2602
2603
if ((tomax = buf->dtb_tomax) == NULL) {
2604
dtrace_buffer_drop(buf);
2605
return;
2606
}
2607
2608
/*
2609
* The metastructure is always at the bottom of the buffer.
2610
*/
2611
agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2612
sizeof (dtrace_aggbuffer_t));
2613
2614
if (buf->dtb_offset == 0) {
2615
/*
2616
* We just kludge up approximately 1/8th of the size to be
2617
* buckets. If this guess ends up being routinely
2618
* off-the-mark, we may need to dynamically readjust this
2619
* based on past performance.
2620
*/
2621
uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2622
2623
if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2624
(uintptr_t)tomax || hashsize == 0) {
2625
/*
2626
* We've been given a ludicrously small buffer;
2627
* increment our drop count and leave.
2628
*/
2629
dtrace_buffer_drop(buf);
2630
return;
2631
}
2632
2633
/*
2634
* And now, a pathetic attempt to try to get a an odd (or
2635
* perchance, a prime) hash size for better hash distribution.
2636
*/
2637
if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2638
hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2639
2640
agb->dtagb_hashsize = hashsize;
2641
agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2642
agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2643
agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2644
2645
for (i = 0; i < agb->dtagb_hashsize; i++)
2646
agb->dtagb_hash[i] = NULL;
2647
}
2648
2649
ASSERT(agg->dtag_first != NULL);
2650
ASSERT(agg->dtag_first->dta_intuple);
2651
2652
/*
2653
* Calculate the hash value based on the key. Note that we _don't_
2654
* include the aggid in the hashing (but we will store it as part of
2655
* the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2656
* algorithm: a simple, quick algorithm that has no known funnels, and
2657
* gets good distribution in practice. The efficacy of the hashing
2658
* algorithm (and a comparison with other algorithms) may be found by
2659
* running the ::dtrace_aggstat MDB dcmd.
2660
*/
2661
for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2662
i = act->dta_rec.dtrd_offset - agg->dtag_base;
2663
limit = i + act->dta_rec.dtrd_size;
2664
ASSERT(limit <= size);
2665
isstr = DTRACEACT_ISSTRING(act);
2666
2667
for (; i < limit; i++) {
2668
hashval += data[i];
2669
hashval += (hashval << 10);
2670
hashval ^= (hashval >> 6);
2671
2672
if (isstr && data[i] == '\0')
2673
break;
2674
}
2675
}
2676
2677
hashval += (hashval << 3);
2678
hashval ^= (hashval >> 11);
2679
hashval += (hashval << 15);
2680
2681
/*
2682
* Yes, the divide here is expensive -- but it's generally the least
2683
* of the performance issues given the amount of data that we iterate
2684
* over to compute hash values, compare data, etc.
2685
*/
2686
ndx = hashval % agb->dtagb_hashsize;
2687
2688
for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2689
ASSERT((caddr_t)key >= tomax);
2690
ASSERT((caddr_t)key < tomax + buf->dtb_size);
2691
2692
if (hashval != key->dtak_hashval || key->dtak_size != size)
2693
continue;
2694
2695
kdata = key->dtak_data;
2696
ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2697
2698
for (act = agg->dtag_first; act->dta_intuple;
2699
act = act->dta_next) {
2700
i = act->dta_rec.dtrd_offset - agg->dtag_base;
2701
limit = i + act->dta_rec.dtrd_size;
2702
ASSERT(limit <= size);
2703
isstr = DTRACEACT_ISSTRING(act);
2704
2705
for (; i < limit; i++) {
2706
if (kdata[i] != data[i])
2707
goto next;
2708
2709
if (isstr && data[i] == '\0')
2710
break;
2711
}
2712
}
2713
2714
if (action != key->dtak_action) {
2715
/*
2716
* We are aggregating on the same value in the same
2717
* aggregation with two different aggregating actions.
2718
* (This should have been picked up in the compiler,
2719
* so we may be dealing with errant or devious DIF.)
2720
* This is an error condition; we indicate as much,
2721
* and return.
2722
*/
2723
DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2724
return;
2725
}
2726
2727
/*
2728
* This is a hit: we need to apply the aggregator to
2729
* the value at this key.
2730
*/
2731
agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2732
return;
2733
next:
2734
continue;
2735
}
2736
2737
/*
2738
* We didn't find it. We need to allocate some zero-filled space,
2739
* link it into the hash table appropriately, and apply the aggregator
2740
* to the (zero-filled) value.
2741
*/
2742
offs = buf->dtb_offset;
2743
while (offs & (align - 1))
2744
offs += sizeof (uint32_t);
2745
2746
/*
2747
* If we don't have enough room to both allocate a new key _and_
2748
* its associated data, increment the drop count and return.
2749
*/
2750
if ((uintptr_t)tomax + offs + fsize >
2751
agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2752
dtrace_buffer_drop(buf);
2753
return;
2754
}
2755
2756
/*CONSTCOND*/
2757
ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2758
key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2759
agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2760
2761
key->dtak_data = kdata = tomax + offs;
2762
buf->dtb_offset = offs + fsize;
2763
2764
/*
2765
* Now copy the data across.
2766
*/
2767
*((dtrace_aggid_t *)kdata) = agg->dtag_id;
2768
2769
for (i = sizeof (dtrace_aggid_t); i < size; i++)
2770
kdata[i] = data[i];
2771
2772
/*
2773
* Because strings are not zeroed out by default, we need to iterate
2774
* looking for actions that store strings, and we need to explicitly
2775
* pad these strings out with zeroes.
2776
*/
2777
for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2778
int nul;
2779
2780
if (!DTRACEACT_ISSTRING(act))
2781
continue;
2782
2783
i = act->dta_rec.dtrd_offset - agg->dtag_base;
2784
limit = i + act->dta_rec.dtrd_size;
2785
ASSERT(limit <= size);
2786
2787
for (nul = 0; i < limit; i++) {
2788
if (nul) {
2789
kdata[i] = '\0';
2790
continue;
2791
}
2792
2793
if (data[i] != '\0')
2794
continue;
2795
2796
nul = 1;
2797
}
2798
}
2799
2800
for (i = size; i < fsize; i++)
2801
kdata[i] = 0;
2802
2803
key->dtak_hashval = hashval;
2804
key->dtak_size = size;
2805
key->dtak_action = action;
2806
key->dtak_next = agb->dtagb_hash[ndx];
2807
agb->dtagb_hash[ndx] = key;
2808
2809
/*
2810
* Finally, apply the aggregator.
2811
*/
2812
*((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2813
agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2814
}
2815
2816
/*
2817
* Given consumer state, this routine finds a speculation in the INACTIVE
2818
* state and transitions it into the ACTIVE state. If there is no speculation
2819
* in the INACTIVE state, 0 is returned. In this case, no error counter is
2820
* incremented -- it is up to the caller to take appropriate action.
2821
*/
2822
static int
2823
dtrace_speculation(dtrace_state_t *state)
2824
{
2825
int i = 0;
2826
dtrace_speculation_state_t curstate;
2827
uint32_t *stat = &state->dts_speculations_unavail, count;
2828
2829
while (i < state->dts_nspeculations) {
2830
dtrace_speculation_t *spec = &state->dts_speculations[i];
2831
2832
curstate = spec->dtsp_state;
2833
2834
if (curstate != DTRACESPEC_INACTIVE) {
2835
if (curstate == DTRACESPEC_COMMITTINGMANY ||
2836
curstate == DTRACESPEC_COMMITTING ||
2837
curstate == DTRACESPEC_DISCARDING)
2838
stat = &state->dts_speculations_busy;
2839
i++;
2840
continue;
2841
}
2842
2843
if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2844
curstate, DTRACESPEC_ACTIVE) == curstate)
2845
return (i + 1);
2846
}
2847
2848
/*
2849
* We couldn't find a speculation. If we found as much as a single
2850
* busy speculation buffer, we'll attribute this failure as "busy"
2851
* instead of "unavail".
2852
*/
2853
do {
2854
count = *stat;
2855
} while (dtrace_cas32(stat, count, count + 1) != count);
2856
2857
return (0);
2858
}
2859
2860
/*
2861
* This routine commits an active speculation. If the specified speculation
2862
* is not in a valid state to perform a commit(), this routine will silently do
2863
* nothing. The state of the specified speculation is transitioned according
2864
* to the state transition diagram outlined in <sys/dtrace_impl.h>
2865
*/
2866
static void
2867
dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2868
dtrace_specid_t which)
2869
{
2870
dtrace_speculation_t *spec;
2871
dtrace_buffer_t *src, *dest;
2872
uintptr_t daddr, saddr, dlimit, slimit;
2873
dtrace_speculation_state_t curstate, new = 0;
2874
ssize_t offs;
2875
uint64_t timestamp;
2876
2877
if (which == 0)
2878
return;
2879
2880
if (which > state->dts_nspeculations) {
2881
cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2882
return;
2883
}
2884
2885
spec = &state->dts_speculations[which - 1];
2886
src = &spec->dtsp_buffer[cpu];
2887
dest = &state->dts_buffer[cpu];
2888
2889
do {
2890
curstate = spec->dtsp_state;
2891
2892
if (curstate == DTRACESPEC_COMMITTINGMANY)
2893
break;
2894
2895
switch (curstate) {
2896
case DTRACESPEC_INACTIVE:
2897
case DTRACESPEC_DISCARDING:
2898
return;
2899
2900
case DTRACESPEC_COMMITTING:
2901
/*
2902
* This is only possible if we are (a) commit()'ing
2903
* without having done a prior speculate() on this CPU
2904
* and (b) racing with another commit() on a different
2905
* CPU. There's nothing to do -- we just assert that
2906
* our offset is 0.
2907
*/
2908
ASSERT(src->dtb_offset == 0);
2909
return;
2910
2911
case DTRACESPEC_ACTIVE:
2912
new = DTRACESPEC_COMMITTING;
2913
break;
2914
2915
case DTRACESPEC_ACTIVEONE:
2916
/*
2917
* This speculation is active on one CPU. If our
2918
* buffer offset is non-zero, we know that the one CPU
2919
* must be us. Otherwise, we are committing on a
2920
* different CPU from the speculate(), and we must
2921
* rely on being asynchronously cleaned.
2922
*/
2923
if (src->dtb_offset != 0) {
2924
new = DTRACESPEC_COMMITTING;
2925
break;
2926
}
2927
/*FALLTHROUGH*/
2928
2929
case DTRACESPEC_ACTIVEMANY:
2930
new = DTRACESPEC_COMMITTINGMANY;
2931
break;
2932
2933
default:
2934
ASSERT(0);
2935
}
2936
} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2937
curstate, new) != curstate);
2938
2939
/*
2940
* We have set the state to indicate that we are committing this
2941
* speculation. Now reserve the necessary space in the destination
2942
* buffer.
2943
*/
2944
if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2945
sizeof (uint64_t), state, NULL)) < 0) {
2946
dtrace_buffer_drop(dest);
2947
goto out;
2948
}
2949
2950
/*
2951
* We have sufficient space to copy the speculative buffer into the
2952
* primary buffer. First, modify the speculative buffer, filling
2953
* in the timestamp of all entries with the curstate time. The data
2954
* must have the commit() time rather than the time it was traced,
2955
* so that all entries in the primary buffer are in timestamp order.
2956
*/
2957
timestamp = dtrace_gethrtime();
2958
saddr = (uintptr_t)src->dtb_tomax;
2959
slimit = saddr + src->dtb_offset;
2960
while (saddr < slimit) {
2961
size_t size;
2962
dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
2963
2964
if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
2965
saddr += sizeof (dtrace_epid_t);
2966
continue;
2967
}
2968
ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
2969
size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
2970
2971
ASSERT3U(saddr + size, <=, slimit);
2972
ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
2973
ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
2974
2975
DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
2976
2977
saddr += size;
2978
}
2979
2980
/*
2981
* Copy the buffer across. (Note that this is a
2982
* highly subobtimal bcopy(); in the unlikely event that this becomes
2983
* a serious performance issue, a high-performance DTrace-specific
2984
* bcopy() should obviously be invented.)
2985
*/
2986
daddr = (uintptr_t)dest->dtb_tomax + offs;
2987
dlimit = daddr + src->dtb_offset;
2988
saddr = (uintptr_t)src->dtb_tomax;
2989
2990
/*
2991
* First, the aligned portion.
2992
*/
2993
while (dlimit - daddr >= sizeof (uint64_t)) {
2994
*((uint64_t *)daddr) = *((uint64_t *)saddr);
2995
2996
daddr += sizeof (uint64_t);
2997
saddr += sizeof (uint64_t);
2998
}
2999
3000
/*
3001
* Now any left-over bit...
3002
*/
3003
while (dlimit - daddr)
3004
*((uint8_t *)daddr++) = *((uint8_t *)saddr++);
3005
3006
/*
3007
* Finally, commit the reserved space in the destination buffer.
3008
*/
3009
dest->dtb_offset = offs + src->dtb_offset;
3010
3011
out:
3012
/*
3013
* If we're lucky enough to be the only active CPU on this speculation
3014
* buffer, we can just set the state back to DTRACESPEC_INACTIVE.
3015
*/
3016
if (curstate == DTRACESPEC_ACTIVE ||
3017
(curstate == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
3018
uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
3019
DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
3020
3021
ASSERT(rval == DTRACESPEC_COMMITTING);
3022
}
3023
3024
src->dtb_offset = 0;
3025
src->dtb_xamot_drops += src->dtb_drops;
3026
src->dtb_drops = 0;
3027
}
3028
3029
/*
3030
* This routine discards an active speculation. If the specified speculation
3031
* is not in a valid state to perform a discard(), this routine will silently
3032
* do nothing. The state of the specified speculation is transitioned
3033
* according to the state transition diagram outlined in <sys/dtrace_impl.h>
3034
*/
3035
static void
3036
dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
3037
dtrace_specid_t which)
3038
{
3039
dtrace_speculation_t *spec;
3040
dtrace_speculation_state_t curstate, new = 0;
3041
dtrace_buffer_t *buf;
3042
3043
if (which == 0)
3044
return;
3045
3046
if (which > state->dts_nspeculations) {
3047
cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
3048
return;
3049
}
3050
3051
spec = &state->dts_speculations[which - 1];
3052
buf = &spec->dtsp_buffer[cpu];
3053
3054
do {
3055
curstate = spec->dtsp_state;
3056
3057
switch (curstate) {
3058
case DTRACESPEC_INACTIVE:
3059
case DTRACESPEC_COMMITTINGMANY:
3060
case DTRACESPEC_COMMITTING:
3061
case DTRACESPEC_DISCARDING:
3062
return;
3063
3064
case DTRACESPEC_ACTIVE:
3065
case DTRACESPEC_ACTIVEMANY:
3066
new = DTRACESPEC_DISCARDING;
3067
break;
3068
3069
case DTRACESPEC_ACTIVEONE:
3070
if (buf->dtb_offset != 0) {
3071
new = DTRACESPEC_INACTIVE;
3072
} else {
3073
new = DTRACESPEC_DISCARDING;
3074
}
3075
break;
3076
3077
default:
3078
ASSERT(0);
3079
}
3080
} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3081
curstate, new) != curstate);
3082
3083
buf->dtb_offset = 0;
3084
buf->dtb_drops = 0;
3085
}
3086
3087
/*
3088
* Note: not called from probe context. This function is called
3089
* asynchronously from cross call context to clean any speculations that are
3090
* in the COMMITTINGMANY or DISCARDING states. These speculations may not be
3091
* transitioned back to the INACTIVE state until all CPUs have cleaned the
3092
* speculation.
3093
*/
3094
static void
3095
dtrace_speculation_clean_here(dtrace_state_t *state)
3096
{
3097
dtrace_icookie_t cookie;
3098
processorid_t cpu = curcpu;
3099
dtrace_buffer_t *dest = &state->dts_buffer[cpu];
3100
dtrace_specid_t i;
3101
3102
cookie = dtrace_interrupt_disable();
3103
3104
if (dest->dtb_tomax == NULL) {
3105
dtrace_interrupt_enable(cookie);
3106
return;
3107
}
3108
3109
for (i = 0; i < state->dts_nspeculations; i++) {
3110
dtrace_speculation_t *spec = &state->dts_speculations[i];
3111
dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
3112
3113
if (src->dtb_tomax == NULL)
3114
continue;
3115
3116
if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
3117
src->dtb_offset = 0;
3118
continue;
3119
}
3120
3121
if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
3122
continue;
3123
3124
if (src->dtb_offset == 0)
3125
continue;
3126
3127
dtrace_speculation_commit(state, cpu, i + 1);
3128
}
3129
3130
dtrace_interrupt_enable(cookie);
3131
}
3132
3133
/*
3134
* Note: not called from probe context. This function is called
3135
* asynchronously (and at a regular interval) to clean any speculations that
3136
* are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
3137
* is work to be done, it cross calls all CPUs to perform that work;
3138
* COMMITMANY and DISCARDING speculations may not be transitioned back to the
3139
* INACTIVE state until they have been cleaned by all CPUs.
3140
*/
3141
static void
3142
dtrace_speculation_clean(dtrace_state_t *state)
3143
{
3144
int work = 0, rv;
3145
dtrace_specid_t i;
3146
3147
for (i = 0; i < state->dts_nspeculations; i++) {
3148
dtrace_speculation_t *spec = &state->dts_speculations[i];
3149
3150
ASSERT(!spec->dtsp_cleaning);
3151
3152
if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
3153
spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
3154
continue;
3155
3156
work++;
3157
spec->dtsp_cleaning = 1;
3158
}
3159
3160
if (!work)
3161
return;
3162
3163
dtrace_xcall(DTRACE_CPUALL,
3164
(dtrace_xcall_t)dtrace_speculation_clean_here, state);
3165
3166
/*
3167
* We now know that all CPUs have committed or discarded their
3168
* speculation buffers, as appropriate. We can now set the state
3169
* to inactive.
3170
*/
3171
for (i = 0; i < state->dts_nspeculations; i++) {
3172
dtrace_speculation_t *spec = &state->dts_speculations[i];
3173
dtrace_speculation_state_t curstate, new;
3174
3175
if (!spec->dtsp_cleaning)
3176
continue;
3177
3178
curstate = spec->dtsp_state;
3179
ASSERT(curstate == DTRACESPEC_DISCARDING ||
3180
curstate == DTRACESPEC_COMMITTINGMANY);
3181
3182
new = DTRACESPEC_INACTIVE;
3183
3184
rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, curstate, new);
3185
ASSERT(rv == curstate);
3186
spec->dtsp_cleaning = 0;
3187
}
3188
}
3189
3190
/*
3191
* Called as part of a speculate() to get the speculative buffer associated
3192
* with a given speculation. Returns NULL if the specified speculation is not
3193
* in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
3194
* the active CPU is not the specified CPU -- the speculation will be
3195
* atomically transitioned into the ACTIVEMANY state.
3196
*/
3197
static dtrace_buffer_t *
3198
dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
3199
dtrace_specid_t which)
3200
{
3201
dtrace_speculation_t *spec;
3202
dtrace_speculation_state_t curstate, new = 0;
3203
dtrace_buffer_t *buf;
3204
3205
if (which == 0)
3206
return (NULL);
3207
3208
if (which > state->dts_nspeculations) {
3209
cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
3210
return (NULL);
3211
}
3212
3213
spec = &state->dts_speculations[which - 1];
3214
buf = &spec->dtsp_buffer[cpuid];
3215
3216
do {
3217
curstate = spec->dtsp_state;
3218
3219
switch (curstate) {
3220
case DTRACESPEC_INACTIVE:
3221
case DTRACESPEC_COMMITTINGMANY:
3222
case DTRACESPEC_DISCARDING:
3223
return (NULL);
3224
3225
case DTRACESPEC_COMMITTING:
3226
ASSERT(buf->dtb_offset == 0);
3227
return (NULL);
3228
3229
case DTRACESPEC_ACTIVEONE:
3230
/*
3231
* This speculation is currently active on one CPU.
3232
* Check the offset in the buffer; if it's non-zero,
3233
* that CPU must be us (and we leave the state alone).
3234
* If it's zero, assume that we're starting on a new
3235
* CPU -- and change the state to indicate that the
3236
* speculation is active on more than one CPU.
3237
*/
3238
if (buf->dtb_offset != 0)
3239
return (buf);
3240
3241
new = DTRACESPEC_ACTIVEMANY;
3242
break;
3243
3244
case DTRACESPEC_ACTIVEMANY:
3245
return (buf);
3246
3247
case DTRACESPEC_ACTIVE:
3248
new = DTRACESPEC_ACTIVEONE;
3249
break;
3250
3251
default:
3252
ASSERT(0);
3253
}
3254
} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3255
curstate, new) != curstate);
3256
3257
ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
3258
return (buf);
3259
}
3260
3261
/*
3262
* Return a string. In the event that the user lacks the privilege to access
3263
* arbitrary kernel memory, we copy the string out to scratch memory so that we
3264
* don't fail access checking.
3265
*
3266
* dtrace_dif_variable() uses this routine as a helper for various
3267
* builtin values such as 'execname' and 'probefunc.'
3268
*/
3269
uintptr_t
3270
dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
3271
dtrace_mstate_t *mstate)
3272
{
3273
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3274
uintptr_t ret;
3275
size_t strsz;
3276
3277
/*
3278
* The easy case: this probe is allowed to read all of memory, so
3279
* we can just return this as a vanilla pointer.
3280
*/
3281
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
3282
return (addr);
3283
3284
/*
3285
* This is the tougher case: we copy the string in question from
3286
* kernel memory into scratch memory and return it that way: this
3287
* ensures that we won't trip up when access checking tests the
3288
* BYREF return value.
3289
*/
3290
strsz = dtrace_strlen((char *)addr, size) + 1;
3291
3292
if (mstate->dtms_scratch_ptr + strsz >
3293
mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
3294
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3295
return (0);
3296
}
3297
3298
dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
3299
strsz);
3300
ret = mstate->dtms_scratch_ptr;
3301
mstate->dtms_scratch_ptr += strsz;
3302
return (ret);
3303
}
3304
3305
/*
3306
* Return a string from a memoy address which is known to have one or
3307
* more concatenated, individually zero terminated, sub-strings.
3308
* In the event that the user lacks the privilege to access
3309
* arbitrary kernel memory, we copy the string out to scratch memory so that we
3310
* don't fail access checking.
3311
*
3312
* dtrace_dif_variable() uses this routine as a helper for various
3313
* builtin values such as 'execargs'.
3314
*/
3315
static uintptr_t
3316
dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state,
3317
dtrace_mstate_t *mstate)
3318
{
3319
char *p;
3320
size_t i;
3321
uintptr_t ret;
3322
3323
if (mstate->dtms_scratch_ptr + strsz >
3324
mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
3325
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3326
return (0);
3327
}
3328
3329
dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
3330
strsz);
3331
3332
/* Replace sub-string termination characters with a space. */
3333
for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1;
3334
p++, i++)
3335
if (*p == '\0')
3336
*p = ' ';
3337
3338
ret = mstate->dtms_scratch_ptr;
3339
mstate->dtms_scratch_ptr += strsz;
3340
return (ret);
3341
}
3342
3343
/*
3344
* This function implements the DIF emulator's variable lookups. The emulator
3345
* passes a reserved variable identifier and optional built-in array index.
3346
*/
3347
static uint64_t
3348
dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
3349
uint64_t ndx)
3350
{
3351
/*
3352
* If we're accessing one of the uncached arguments, we'll turn this
3353
* into a reference in the args array.
3354
*/
3355
if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
3356
ndx = v - DIF_VAR_ARG0;
3357
v = DIF_VAR_ARGS;
3358
}
3359
3360
switch (v) {
3361
case DIF_VAR_ARGS:
3362
ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
3363
if (ndx >= sizeof (mstate->dtms_arg) /
3364
sizeof (mstate->dtms_arg[0])) {
3365
int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3366
dtrace_provider_t *pv;
3367
uint64_t val;
3368
3369
pv = mstate->dtms_probe->dtpr_provider;
3370
if (pv->dtpv_pops.dtps_getargval != NULL)
3371
val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
3372
mstate->dtms_probe->dtpr_id,
3373
mstate->dtms_probe->dtpr_arg, ndx, aframes);
3374
else
3375
val = dtrace_getarg(ndx, aframes);
3376
3377
/*
3378
* This is regrettably required to keep the compiler
3379
* from tail-optimizing the call to dtrace_getarg().
3380
* The condition always evaluates to true, but the
3381
* compiler has no way of figuring that out a priori.
3382
* (None of this would be necessary if the compiler
3383
* could be relied upon to _always_ tail-optimize
3384
* the call to dtrace_getarg() -- but it can't.)
3385
*/
3386
if (mstate->dtms_probe != NULL)
3387
return (val);
3388
3389
ASSERT(0);
3390
}
3391
3392
return (mstate->dtms_arg[ndx]);
3393
3394
case DIF_VAR_REGS:
3395
case DIF_VAR_UREGS: {
3396
struct trapframe *tframe;
3397
3398
if (!dtrace_priv_proc(state))
3399
return (0);
3400
3401
if (v == DIF_VAR_REGS)
3402
tframe = curthread->t_dtrace_trapframe;
3403
else
3404
tframe = curthread->td_frame;
3405
3406
if (tframe == NULL) {
3407
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
3408
cpu_core[curcpu].cpuc_dtrace_illval = 0;
3409
return (0);
3410
}
3411
3412
return (dtrace_getreg(tframe, ndx));
3413
}
3414
3415
case DIF_VAR_CURTHREAD:
3416
if (!dtrace_priv_proc(state))
3417
return (0);
3418
return ((uint64_t)(uintptr_t)curthread);
3419
3420
case DIF_VAR_TIMESTAMP:
3421
if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
3422
mstate->dtms_timestamp = dtrace_gethrtime();
3423
mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
3424
}
3425
return (mstate->dtms_timestamp);
3426
3427
case DIF_VAR_VTIMESTAMP:
3428
ASSERT(dtrace_vtime_references != 0);
3429
return (curthread->t_dtrace_vtime);
3430
3431
case DIF_VAR_WALLTIMESTAMP:
3432
if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3433
mstate->dtms_walltimestamp = dtrace_gethrestime();
3434
mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3435
}
3436
return (mstate->dtms_walltimestamp);
3437
3438
#ifdef illumos
3439
case DIF_VAR_IPL:
3440
if (!dtrace_priv_kernel(state))
3441
return (0);
3442
if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
3443
mstate->dtms_ipl = dtrace_getipl();
3444
mstate->dtms_present |= DTRACE_MSTATE_IPL;
3445
}
3446
return (mstate->dtms_ipl);
3447
#endif
3448
3449
case DIF_VAR_EPID:
3450
ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
3451
return (mstate->dtms_epid);
3452
3453
case DIF_VAR_ID:
3454
ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3455
return (mstate->dtms_probe->dtpr_id);
3456
3457
case DIF_VAR_STACKDEPTH:
3458
if (!dtrace_priv_kernel(state))
3459
return (0);
3460
if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
3461
int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3462
3463
mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
3464
mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
3465
}
3466
return (mstate->dtms_stackdepth);
3467
3468
case DIF_VAR_USTACKDEPTH:
3469
if (!dtrace_priv_proc(state))
3470
return (0);
3471
if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
3472
/*
3473
* See comment in DIF_VAR_PID.
3474
*/
3475
if (DTRACE_ANCHORED(mstate->dtms_probe) &&
3476
CPU_ON_INTR(CPU)) {
3477
mstate->dtms_ustackdepth = 0;
3478
} else {
3479
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3480
mstate->dtms_ustackdepth =
3481
dtrace_getustackdepth();
3482
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3483
}
3484
mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
3485
}
3486
return (mstate->dtms_ustackdepth);
3487
3488
case DIF_VAR_CALLER:
3489
if (!dtrace_priv_kernel(state))
3490
return (0);
3491
if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
3492
int aframes = mstate->dtms_probe->dtpr_aframes + 2;
3493
3494
if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3495
/*
3496
* If this is an unanchored probe, we are
3497
* required to go through the slow path:
3498
* dtrace_caller() only guarantees correct
3499
* results for anchored probes.
3500
*/
3501
pc_t caller[2] = {0, 0};
3502
3503
dtrace_getpcstack(caller, 2, aframes,
3504
(uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3505
mstate->dtms_caller = caller[1];
3506
} else if ((mstate->dtms_caller =
3507
dtrace_caller(aframes)) == -1) {
3508
/*
3509
* We have failed to do this the quick way;
3510
* we must resort to the slower approach of
3511
* calling dtrace_getpcstack().
3512
*/
3513
pc_t caller = 0;
3514
3515
dtrace_getpcstack(&caller, 1, aframes, NULL);
3516
mstate->dtms_caller = caller;
3517
}
3518
3519
mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3520
}
3521
return (mstate->dtms_caller);
3522
3523
case DIF_VAR_UCALLER:
3524
if (!dtrace_priv_proc(state))
3525
return (0);
3526
3527
if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3528
uint64_t ustack[3];
3529
3530
/*
3531
* dtrace_getupcstack() fills in the first uint64_t
3532
* with the current PID. The second uint64_t will
3533
* be the program counter at user-level. The third
3534
* uint64_t will contain the caller, which is what
3535
* we're after.
3536
*/
3537
ustack[2] = 0;
3538
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3539
dtrace_getupcstack(ustack, 3);
3540
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3541
mstate->dtms_ucaller = ustack[2];
3542
mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3543
}
3544
3545
return (mstate->dtms_ucaller);
3546
3547
case DIF_VAR_PROBEPROV:
3548
ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3549
return (dtrace_dif_varstr(
3550
(uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3551
state, mstate));
3552
3553
case DIF_VAR_PROBEMOD:
3554
ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3555
return (dtrace_dif_varstr(
3556
(uintptr_t)mstate->dtms_probe->dtpr_mod,
3557
state, mstate));
3558
3559
case DIF_VAR_PROBEFUNC:
3560
ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3561
return (dtrace_dif_varstr(
3562
(uintptr_t)mstate->dtms_probe->dtpr_func,
3563
state, mstate));
3564
3565
case DIF_VAR_PROBENAME:
3566
ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3567
return (dtrace_dif_varstr(
3568
(uintptr_t)mstate->dtms_probe->dtpr_name,
3569
state, mstate));
3570
3571
case DIF_VAR_PID:
3572
if (!dtrace_priv_proc(state))
3573
return (0);
3574
3575
#ifdef illumos
3576
/*
3577
* Note that we are assuming that an unanchored probe is
3578
* always due to a high-level interrupt. (And we're assuming
3579
* that there is only a single high level interrupt.)
3580
*/
3581
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3582
return (pid0.pid_id);
3583
3584
/*
3585
* It is always safe to dereference one's own t_procp pointer:
3586
* it always points to a valid, allocated proc structure.
3587
* Further, it is always safe to dereference the p_pidp member
3588
* of one's own proc structure. (These are truisms becuase
3589
* threads and processes don't clean up their own state --
3590
* they leave that task to whomever reaps them.)
3591
*/
3592
return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
3593
#else
3594
return ((uint64_t)curproc->p_pid);
3595
#endif
3596
3597
case DIF_VAR_PPID:
3598
if (!dtrace_priv_proc(state))
3599
return (0);
3600
3601
#ifdef illumos
3602
/*
3603
* See comment in DIF_VAR_PID.
3604
*/
3605
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3606
return (pid0.pid_id);
3607
3608
/*
3609
* It is always safe to dereference one's own t_procp pointer:
3610
* it always points to a valid, allocated proc structure.
3611
* (This is true because threads don't clean up their own
3612
* state -- they leave that task to whomever reaps them.)
3613
*/
3614
return ((uint64_t)curthread->t_procp->p_ppid);
3615
#else
3616
if (curproc->p_pid == proc0.p_pid)
3617
return (curproc->p_pid);
3618
else
3619
return (curproc->p_pptr->p_pid);
3620
#endif
3621
3622
case DIF_VAR_TID:
3623
#ifdef illumos
3624
/*
3625
* See comment in DIF_VAR_PID.
3626
*/
3627
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3628
return (0);
3629
#endif
3630
3631
return ((uint64_t)curthread->t_tid);
3632
3633
case DIF_VAR_EXECARGS: {
3634
struct pargs *p_args = curthread->td_proc->p_args;
3635
3636
if (p_args == NULL)
3637
return(0);
3638
3639
return (dtrace_dif_varstrz(
3640
(uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate));
3641
}
3642
3643
case DIF_VAR_EXECNAME:
3644
#ifdef illumos
3645
if (!dtrace_priv_proc(state))
3646
return (0);
3647
3648
/*
3649
* See comment in DIF_VAR_PID.
3650
*/
3651
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3652
return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
3653
3654
/*
3655
* It is always safe to dereference one's own t_procp pointer:
3656
* it always points to a valid, allocated proc structure.
3657
* (This is true because threads don't clean up their own
3658
* state -- they leave that task to whomever reaps them.)
3659
*/
3660
return (dtrace_dif_varstr(
3661
(uintptr_t)curthread->t_procp->p_user.u_comm,
3662
state, mstate));
3663
#else
3664
return (dtrace_dif_varstr(
3665
(uintptr_t) curthread->td_proc->p_comm, state, mstate));
3666
#endif
3667
3668
case DIF_VAR_ZONENAME:
3669
#ifdef illumos
3670
if (!dtrace_priv_proc(state))
3671
return (0);
3672
3673
/*
3674
* See comment in DIF_VAR_PID.
3675
*/
3676
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3677
return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
3678
3679
/*
3680
* It is always safe to dereference one's own t_procp pointer:
3681
* it always points to a valid, allocated proc structure.
3682
* (This is true because threads don't clean up their own
3683
* state -- they leave that task to whomever reaps them.)
3684
*/
3685
return (dtrace_dif_varstr(
3686
(uintptr_t)curthread->t_procp->p_zone->zone_name,
3687
state, mstate));
3688
#elif defined(__FreeBSD__)
3689
/*
3690
* On FreeBSD, we introduce compatibility to zonename by falling through
3691
* into jailname.
3692
*/
3693
case DIF_VAR_JAILNAME:
3694
if (!dtrace_priv_kernel(state))
3695
return (0);
3696
3697
return (dtrace_dif_varstr(
3698
(uintptr_t)curthread->td_ucred->cr_prison->pr_name,
3699
state, mstate));
3700
3701
case DIF_VAR_JID:
3702
if (!dtrace_priv_kernel(state))
3703
return (0);
3704
3705
return ((uint64_t)curthread->td_ucred->cr_prison->pr_id);
3706
#else
3707
return (0);
3708
#endif
3709
3710
case DIF_VAR_UID:
3711
if (!dtrace_priv_proc(state))
3712
return (0);
3713
3714
#ifdef illumos
3715
/*
3716
* See comment in DIF_VAR_PID.
3717
*/
3718
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3719
return ((uint64_t)p0.p_cred->cr_uid);
3720
3721
/*
3722
* It is always safe to dereference one's own t_procp pointer:
3723
* it always points to a valid, allocated proc structure.
3724
* (This is true because threads don't clean up their own
3725
* state -- they leave that task to whomever reaps them.)
3726
*
3727
* Additionally, it is safe to dereference one's own process
3728
* credential, since this is never NULL after process birth.
3729
*/
3730
return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
3731
#else
3732
return ((uint64_t)curthread->td_ucred->cr_uid);
3733
#endif
3734
3735
case DIF_VAR_GID:
3736
if (!dtrace_priv_proc(state))
3737
return (0);
3738
3739
#ifdef illumos
3740
/*
3741
* See comment in DIF_VAR_PID.
3742
*/
3743
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3744
return ((uint64_t)p0.p_cred->cr_gid);
3745
3746
/*
3747
* It is always safe to dereference one's own t_procp pointer:
3748
* it always points to a valid, allocated proc structure.
3749
* (This is true because threads don't clean up their own
3750
* state -- they leave that task to whomever reaps them.)
3751
*
3752
* Additionally, it is safe to dereference one's own process
3753
* credential, since this is never NULL after process birth.
3754
*/
3755
return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
3756
#else
3757
return ((uint64_t)curthread->td_ucred->cr_gid);
3758
#endif
3759
3760
case DIF_VAR_ERRNO: {
3761
#ifdef illumos
3762
klwp_t *lwp;
3763
if (!dtrace_priv_proc(state))
3764
return (0);
3765
3766
/*
3767
* See comment in DIF_VAR_PID.
3768
*/
3769
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3770
return (0);
3771
3772
/*
3773
* It is always safe to dereference one's own t_lwp pointer in
3774
* the event that this pointer is non-NULL. (This is true
3775
* because threads and lwps don't clean up their own state --
3776
* they leave that task to whomever reaps them.)
3777
*/
3778
if ((lwp = curthread->t_lwp) == NULL)
3779
return (0);
3780
3781
return ((uint64_t)lwp->lwp_errno);
3782
#else
3783
return (curthread->td_errno);
3784
#endif
3785
}
3786
#ifndef illumos
3787
case DIF_VAR_CPU: {
3788
return curcpu;
3789
}
3790
#endif
3791
default:
3792
DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3793
return (0);
3794
}
3795
}
3796
3797
3798
typedef enum dtrace_json_state {
3799
DTRACE_JSON_REST = 1,
3800
DTRACE_JSON_OBJECT,
3801
DTRACE_JSON_STRING,
3802
DTRACE_JSON_STRING_ESCAPE,
3803
DTRACE_JSON_STRING_ESCAPE_UNICODE,
3804
DTRACE_JSON_COLON,
3805
DTRACE_JSON_COMMA,
3806
DTRACE_JSON_VALUE,
3807
DTRACE_JSON_IDENTIFIER,
3808
DTRACE_JSON_NUMBER,
3809
DTRACE_JSON_NUMBER_FRAC,
3810
DTRACE_JSON_NUMBER_EXP,
3811
DTRACE_JSON_COLLECT_OBJECT
3812
} dtrace_json_state_t;
3813
3814
/*
3815
* This function possesses just enough knowledge about JSON to extract a single
3816
* value from a JSON string and store it in the scratch buffer. It is able
3817
* to extract nested object values, and members of arrays by index.
3818
*
3819
* elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
3820
* be looked up as we descend into the object tree. e.g.
3821
*
3822
* foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
3823
* with nelems = 5.
3824
*
3825
* The run time of this function must be bounded above by strsize to limit the
3826
* amount of work done in probe context. As such, it is implemented as a
3827
* simple state machine, reading one character at a time using safe loads
3828
* until we find the requested element, hit a parsing error or run off the
3829
* end of the object or string.
3830
*
3831
* As there is no way for a subroutine to return an error without interrupting
3832
* clause execution, we simply return NULL in the event of a missing key or any
3833
* other error condition. Each NULL return in this function is commented with
3834
* the error condition it represents -- parsing or otherwise.
3835
*
3836
* The set of states for the state machine closely matches the JSON
3837
* specification (http://json.org/). Briefly:
3838
*
3839
* DTRACE_JSON_REST:
3840
* Skip whitespace until we find either a top-level Object, moving
3841
* to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
3842
*
3843
* DTRACE_JSON_OBJECT:
3844
* Locate the next key String in an Object. Sets a flag to denote
3845
* the next String as a key string and moves to DTRACE_JSON_STRING.
3846
*
3847
* DTRACE_JSON_COLON:
3848
* Skip whitespace until we find the colon that separates key Strings
3849
* from their values. Once found, move to DTRACE_JSON_VALUE.
3850
*
3851
* DTRACE_JSON_VALUE:
3852
* Detects the type of the next value (String, Number, Identifier, Object
3853
* or Array) and routes to the states that process that type. Here we also
3854
* deal with the element selector list if we are requested to traverse down
3855
* into the object tree.
3856
*
3857
* DTRACE_JSON_COMMA:
3858
* Skip whitespace until we find the comma that separates key-value pairs
3859
* in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
3860
* (similarly DTRACE_JSON_VALUE). All following literal value processing
3861
* states return to this state at the end of their value, unless otherwise
3862
* noted.
3863
*
3864
* DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
3865
* Processes a Number literal from the JSON, including any exponent
3866
* component that may be present. Numbers are returned as strings, which
3867
* may be passed to strtoll() if an integer is required.
3868
*
3869
* DTRACE_JSON_IDENTIFIER:
3870
* Processes a "true", "false" or "null" literal in the JSON.
3871
*
3872
* DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
3873
* DTRACE_JSON_STRING_ESCAPE_UNICODE:
3874
* Processes a String literal from the JSON, whether the String denotes
3875
* a key, a value or part of a larger Object. Handles all escape sequences
3876
* present in the specification, including four-digit unicode characters,
3877
* but merely includes the escape sequence without converting it to the
3878
* actual escaped character. If the String is flagged as a key, we
3879
* move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
3880
*
3881
* DTRACE_JSON_COLLECT_OBJECT:
3882
* This state collects an entire Object (or Array), correctly handling
3883
* embedded strings. If the full element selector list matches this nested
3884
* object, we return the Object in full as a string. If not, we use this
3885
* state to skip to the next value at this level and continue processing.
3886
*
3887
* NOTE: This function uses various macros from strtolctype.h to manipulate
3888
* digit values, etc -- these have all been checked to ensure they make
3889
* no additional function calls.
3890
*/
3891
static char *
3892
dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
3893
char *dest)
3894
{
3895
dtrace_json_state_t state = DTRACE_JSON_REST;
3896
int64_t array_elem = INT64_MIN;
3897
int64_t array_pos = 0;
3898
uint8_t escape_unicount = 0;
3899
boolean_t string_is_key = B_FALSE;
3900
boolean_t collect_object = B_FALSE;
3901
boolean_t found_key = B_FALSE;
3902
boolean_t in_array = B_FALSE;
3903
uint32_t braces = 0, brackets = 0;
3904
char *elem = elemlist;
3905
char *dd = dest;
3906
uintptr_t cur;
3907
3908
for (cur = json; cur < json + size; cur++) {
3909
char cc = dtrace_load8(cur);
3910
if (cc == '\0')
3911
return (NULL);
3912
3913
switch (state) {
3914
case DTRACE_JSON_REST:
3915
if (isspace(cc))
3916
break;
3917
3918
if (cc == '{') {
3919
state = DTRACE_JSON_OBJECT;
3920
break;
3921
}
3922
3923
if (cc == '[') {
3924
in_array = B_TRUE;
3925
array_pos = 0;
3926
array_elem = dtrace_strtoll(elem, 10, size);
3927
found_key = array_elem == 0 ? B_TRUE : B_FALSE;
3928
state = DTRACE_JSON_VALUE;
3929
break;
3930
}
3931
3932
/*
3933
* ERROR: expected to find a top-level object or array.
3934
*/
3935
return (NULL);
3936
case DTRACE_JSON_OBJECT:
3937
if (isspace(cc))
3938
break;
3939
3940
if (cc == '"') {
3941
state = DTRACE_JSON_STRING;
3942
string_is_key = B_TRUE;
3943
break;
3944
}
3945
3946
/*
3947
* ERROR: either the object did not start with a key
3948
* string, or we've run off the end of the object
3949
* without finding the requested key.
3950
*/
3951
return (NULL);
3952
case DTRACE_JSON_STRING:
3953
if (cc == '\\') {
3954
*dd++ = '\\';
3955
state = DTRACE_JSON_STRING_ESCAPE;
3956
break;
3957
}
3958
3959
if (cc == '"') {
3960
if (collect_object) {
3961
/*
3962
* We don't reset the dest here, as
3963
* the string is part of a larger
3964
* object being collected.
3965
*/
3966
*dd++ = cc;
3967
collect_object = B_FALSE;
3968
state = DTRACE_JSON_COLLECT_OBJECT;
3969
break;
3970
}
3971
*dd = '\0';
3972
dd = dest; /* reset string buffer */
3973
if (string_is_key) {
3974
if (dtrace_strncmp(dest, elem,
3975
size) == 0)
3976
found_key = B_TRUE;
3977
} else if (found_key) {
3978
if (nelems > 1) {
3979
/*
3980
* We expected an object, not
3981
* this string.
3982
*/
3983
return (NULL);
3984
}
3985
return (dest);
3986
}
3987
state = string_is_key ? DTRACE_JSON_COLON :
3988
DTRACE_JSON_COMMA;
3989
string_is_key = B_FALSE;
3990
break;
3991
}
3992
3993
*dd++ = cc;
3994
break;
3995
case DTRACE_JSON_STRING_ESCAPE:
3996
*dd++ = cc;
3997
if (cc == 'u') {
3998
escape_unicount = 0;
3999
state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
4000
} else {
4001
state = DTRACE_JSON_STRING;
4002
}
4003
break;
4004
case DTRACE_JSON_STRING_ESCAPE_UNICODE:
4005
if (!isxdigit(cc)) {
4006
/*
4007
* ERROR: invalid unicode escape, expected
4008
* four valid hexidecimal digits.
4009
*/
4010
return (NULL);
4011
}
4012
4013
*dd++ = cc;
4014
if (++escape_unicount == 4)
4015
state = DTRACE_JSON_STRING;
4016
break;
4017
case DTRACE_JSON_COLON:
4018
if (isspace(cc))
4019
break;
4020
4021
if (cc == ':') {
4022
state = DTRACE_JSON_VALUE;
4023
break;
4024
}
4025
4026
/*
4027
* ERROR: expected a colon.
4028
*/
4029
return (NULL);
4030
case DTRACE_JSON_COMMA:
4031
if (isspace(cc))
4032
break;
4033
4034
if (cc == ',') {
4035
if (in_array) {
4036
state = DTRACE_JSON_VALUE;
4037
if (++array_pos == array_elem)
4038
found_key = B_TRUE;
4039
} else {
4040
state = DTRACE_JSON_OBJECT;
4041
}
4042
break;
4043
}
4044
4045
/*
4046
* ERROR: either we hit an unexpected character, or
4047
* we reached the end of the object or array without
4048
* finding the requested key.
4049
*/
4050
return (NULL);
4051
case DTRACE_JSON_IDENTIFIER:
4052
if (islower(cc)) {
4053
*dd++ = cc;
4054
break;
4055
}
4056
4057
*dd = '\0';
4058
dd = dest; /* reset string buffer */
4059
4060
if (dtrace_strncmp(dest, "true", 5) == 0 ||
4061
dtrace_strncmp(dest, "false", 6) == 0 ||
4062
dtrace_strncmp(dest, "null", 5) == 0) {
4063
if (found_key) {
4064
if (nelems > 1) {
4065
/*
4066
* ERROR: We expected an object,
4067
* not this identifier.
4068
*/
4069
return (NULL);
4070
}
4071
return (dest);
4072
} else {
4073
cur--;
4074
state = DTRACE_JSON_COMMA;
4075
break;
4076
}
4077
}
4078
4079
/*
4080
* ERROR: we did not recognise the identifier as one
4081
* of those in the JSON specification.
4082
*/
4083
return (NULL);
4084
case DTRACE_JSON_NUMBER:
4085
if (cc == '.') {
4086
*dd++ = cc;
4087
state = DTRACE_JSON_NUMBER_FRAC;
4088
break;
4089
}
4090
4091
if (cc == 'x' || cc == 'X') {
4092
/*
4093
* ERROR: specification explicitly excludes
4094
* hexidecimal or octal numbers.
4095
*/
4096
return (NULL);
4097
}
4098
4099
/* FALLTHRU */
4100
case DTRACE_JSON_NUMBER_FRAC:
4101
if (cc == 'e' || cc == 'E') {
4102
*dd++ = cc;
4103
state = DTRACE_JSON_NUMBER_EXP;
4104
break;
4105
}
4106
4107
if (cc == '+' || cc == '-') {
4108
/*
4109
* ERROR: expect sign as part of exponent only.
4110
*/
4111
return (NULL);
4112
}
4113
/* FALLTHRU */
4114
case DTRACE_JSON_NUMBER_EXP:
4115
if (isdigit(cc) || cc == '+' || cc == '-') {
4116
*dd++ = cc;
4117
break;
4118
}
4119
4120
*dd = '\0';
4121
dd = dest; /* reset string buffer */
4122
if (found_key) {
4123
if (nelems > 1) {
4124
/*
4125
* ERROR: We expected an object, not
4126
* this number.
4127
*/
4128
return (NULL);
4129
}
4130
return (dest);
4131
}
4132
4133
cur--;
4134
state = DTRACE_JSON_COMMA;
4135
break;
4136
case DTRACE_JSON_VALUE:
4137
if (isspace(cc))
4138
break;
4139
4140
if (cc == '{' || cc == '[') {
4141
if (nelems > 1 && found_key) {
4142
in_array = cc == '[' ? B_TRUE : B_FALSE;
4143
/*
4144
* If our element selector directs us
4145
* to descend into this nested object,
4146
* then move to the next selector
4147
* element in the list and restart the
4148
* state machine.
4149
*/
4150
while (*elem != '\0')
4151
elem++;
4152
elem++; /* skip the inter-element NUL */
4153
nelems--;
4154
dd = dest;
4155
if (in_array) {
4156
state = DTRACE_JSON_VALUE;
4157
array_pos = 0;
4158
array_elem = dtrace_strtoll(
4159
elem, 10, size);
4160
found_key = array_elem == 0 ?
4161
B_TRUE : B_FALSE;
4162
} else {
4163
found_key = B_FALSE;
4164
state = DTRACE_JSON_OBJECT;
4165
}
4166
break;
4167
}
4168
4169
/*
4170
* Otherwise, we wish to either skip this
4171
* nested object or return it in full.
4172
*/
4173
if (cc == '[')
4174
brackets = 1;
4175
else
4176
braces = 1;
4177
*dd++ = cc;
4178
state = DTRACE_JSON_COLLECT_OBJECT;
4179
break;
4180
}
4181
4182
if (cc == '"') {
4183
state = DTRACE_JSON_STRING;
4184
break;
4185
}
4186
4187
if (islower(cc)) {
4188
/*
4189
* Here we deal with true, false and null.
4190
*/
4191
*dd++ = cc;
4192
state = DTRACE_JSON_IDENTIFIER;
4193
break;
4194
}
4195
4196
if (cc == '-' || isdigit(cc)) {
4197
*dd++ = cc;
4198
state = DTRACE_JSON_NUMBER;
4199
break;
4200
}
4201
4202
/*
4203
* ERROR: unexpected character at start of value.
4204
*/
4205
return (NULL);
4206
case DTRACE_JSON_COLLECT_OBJECT:
4207
if (cc == '\0')
4208
/*
4209
* ERROR: unexpected end of input.
4210
*/
4211
return (NULL);
4212
4213
*dd++ = cc;
4214
if (cc == '"') {
4215
collect_object = B_TRUE;
4216
state = DTRACE_JSON_STRING;
4217
break;
4218
}
4219
4220
if (cc == ']') {
4221
if (brackets-- == 0) {
4222
/*
4223
* ERROR: unbalanced brackets.
4224
*/
4225
return (NULL);
4226
}
4227
} else if (cc == '}') {
4228
if (braces-- == 0) {
4229
/*
4230
* ERROR: unbalanced braces.
4231
*/
4232
return (NULL);
4233
}
4234
} else if (cc == '{') {
4235
braces++;
4236
} else if (cc == '[') {
4237
brackets++;
4238
}
4239
4240
if (brackets == 0 && braces == 0) {
4241
if (found_key) {
4242
*dd = '\0';
4243
return (dest);
4244
}
4245
dd = dest; /* reset string buffer */
4246
state = DTRACE_JSON_COMMA;
4247
}
4248
break;
4249
}
4250
}
4251
return (NULL);
4252
}
4253
4254
/*
4255
* Emulate the execution of DTrace ID subroutines invoked by the call opcode.
4256
* Notice that we don't bother validating the proper number of arguments or
4257
* their types in the tuple stack. This isn't needed because all argument
4258
* interpretation is safe because of our load safety -- the worst that can
4259
* happen is that a bogus program can obtain bogus results.
4260
*/
4261
static void
4262
dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
4263
dtrace_key_t *tupregs, int nargs,
4264
dtrace_mstate_t *mstate, dtrace_state_t *state)
4265
{
4266
volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
4267
volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
4268
dtrace_vstate_t *vstate = &state->dts_vstate;
4269
4270
#ifdef illumos
4271
union {
4272
mutex_impl_t mi;
4273
uint64_t mx;
4274
} m;
4275
4276
union {
4277
krwlock_t ri;
4278
uintptr_t rw;
4279
} r;
4280
#else
4281
struct thread *lowner;
4282
union {
4283
struct lock_object *li;
4284
uintptr_t lx;
4285
} l;
4286
#endif
4287
4288
switch (subr) {
4289
case DIF_SUBR_RAND:
4290
regs[rd] = dtrace_xoroshiro128_plus_next(
4291
state->dts_rstate[curcpu]);
4292
break;
4293
4294
#ifdef illumos
4295
case DIF_SUBR_MUTEX_OWNED:
4296
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4297
mstate, vstate)) {
4298
regs[rd] = 0;
4299
break;
4300
}
4301
4302
m.mx = dtrace_load64(tupregs[0].dttk_value);
4303
if (MUTEX_TYPE_ADAPTIVE(&m.mi))
4304
regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
4305
else
4306
regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
4307
break;
4308
4309
case DIF_SUBR_MUTEX_OWNER:
4310
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4311
mstate, vstate)) {
4312
regs[rd] = 0;
4313
break;
4314
}
4315
4316
m.mx = dtrace_load64(tupregs[0].dttk_value);
4317
if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
4318
MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
4319
regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
4320
else
4321
regs[rd] = 0;
4322
break;
4323
4324
case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
4325
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4326
mstate, vstate)) {
4327
regs[rd] = 0;
4328
break;
4329
}
4330
4331
m.mx = dtrace_load64(tupregs[0].dttk_value);
4332
regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
4333
break;
4334
4335
case DIF_SUBR_MUTEX_TYPE_SPIN:
4336
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
4337
mstate, vstate)) {
4338
regs[rd] = 0;
4339
break;
4340
}
4341
4342
m.mx = dtrace_load64(tupregs[0].dttk_value);
4343
regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
4344
break;
4345
4346
case DIF_SUBR_RW_READ_HELD: {
4347
uintptr_t tmp;
4348
4349
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
4350
mstate, vstate)) {
4351
regs[rd] = 0;
4352
break;
4353
}
4354
4355
r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4356
regs[rd] = _RW_READ_HELD(&r.ri, tmp);
4357
break;
4358
}
4359
4360
case DIF_SUBR_RW_WRITE_HELD:
4361
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
4362
mstate, vstate)) {
4363
regs[rd] = 0;
4364
break;
4365
}
4366
4367
r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4368
regs[rd] = _RW_WRITE_HELD(&r.ri);
4369
break;
4370
4371
case DIF_SUBR_RW_ISWRITER:
4372
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
4373
mstate, vstate)) {
4374
regs[rd] = 0;
4375
break;
4376
}
4377
4378
r.rw = dtrace_loadptr(tupregs[0].dttk_value);
4379
regs[rd] = _RW_ISWRITER(&r.ri);
4380
break;
4381
4382
#else /* !illumos */
4383
case DIF_SUBR_MUTEX_OWNED:
4384
if (!dtrace_canload(tupregs[0].dttk_value,
4385
sizeof (struct lock_object), mstate, vstate)) {
4386
regs[rd] = 0;
4387
break;
4388
}
4389
l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
4390
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4391
regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
4392
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4393
break;
4394
4395
case DIF_SUBR_MUTEX_OWNER:
4396
if (!dtrace_canload(tupregs[0].dttk_value,
4397
sizeof (struct lock_object), mstate, vstate)) {
4398
regs[rd] = 0;
4399
break;
4400
}
4401
l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
4402
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4403
LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
4404
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4405
regs[rd] = (uintptr_t)lowner;
4406
break;
4407
4408
case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
4409
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
4410
mstate, vstate)) {
4411
regs[rd] = 0;
4412
break;
4413
}
4414
l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
4415
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4416
regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SLEEPLOCK) != 0;
4417
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4418
break;
4419
4420
case DIF_SUBR_MUTEX_TYPE_SPIN:
4421
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
4422
mstate, vstate)) {
4423
regs[rd] = 0;
4424
break;
4425
}
4426
l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
4427
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4428
regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0;
4429
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4430
break;
4431
4432
case DIF_SUBR_RW_READ_HELD:
4433
case DIF_SUBR_SX_SHARED_HELD:
4434
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
4435
mstate, vstate)) {
4436
regs[rd] = 0;
4437
break;
4438
}
4439
l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
4440
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4441
regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
4442
lowner == NULL;
4443
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4444
break;
4445
4446
case DIF_SUBR_RW_WRITE_HELD:
4447
case DIF_SUBR_SX_EXCLUSIVE_HELD:
4448
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
4449
mstate, vstate)) {
4450
regs[rd] = 0;
4451
break;
4452
}
4453
l.lx = dtrace_loadptr(tupregs[0].dttk_value);
4454
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4455
regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
4456
lowner != NULL;
4457
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4458
break;
4459
4460
case DIF_SUBR_RW_ISWRITER:
4461
case DIF_SUBR_SX_ISEXCLUSIVE:
4462
if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
4463
mstate, vstate)) {
4464
regs[rd] = 0;
4465
break;
4466
}
4467
l.lx = dtrace_loadptr(tupregs[0].dttk_value);
4468
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4469
LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
4470
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4471
regs[rd] = (lowner == curthread);
4472
break;
4473
#endif /* illumos */
4474
4475
case DIF_SUBR_BCOPY: {
4476
/*
4477
* We need to be sure that the destination is in the scratch
4478
* region -- no other region is allowed.
4479
*/
4480
uintptr_t src = tupregs[0].dttk_value;
4481
uintptr_t dest = tupregs[1].dttk_value;
4482
size_t size = tupregs[2].dttk_value;
4483
4484
if (!dtrace_inscratch(dest, size, mstate)) {
4485
*flags |= CPU_DTRACE_BADADDR;
4486
*illval = regs[rd];
4487
break;
4488
}
4489
4490
if (!dtrace_canload(src, size, mstate, vstate)) {
4491
regs[rd] = 0;
4492
break;
4493
}
4494
4495
dtrace_bcopy((void *)src, (void *)dest, size);
4496
break;
4497
}
4498
4499
case DIF_SUBR_ALLOCA:
4500
case DIF_SUBR_COPYIN: {
4501
uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
4502
uint64_t size =
4503
tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
4504
size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
4505
4506
/*
4507
* This action doesn't require any credential checks since
4508
* probes will not activate in user contexts to which the
4509
* enabling user does not have permissions.
4510
*/
4511
4512
/*
4513
* Rounding up the user allocation size could have overflowed
4514
* a large, bogus allocation (like -1ULL) to 0.
4515
*/
4516
if (scratch_size < size ||
4517
!DTRACE_INSCRATCH(mstate, scratch_size)) {
4518
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4519
regs[rd] = 0;
4520
break;
4521
}
4522
4523
if (subr == DIF_SUBR_COPYIN) {
4524
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4525
dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
4526
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4527
}
4528
4529
mstate->dtms_scratch_ptr += scratch_size;
4530
regs[rd] = dest;
4531
break;
4532
}
4533
4534
case DIF_SUBR_COPYINTO: {
4535
uint64_t size = tupregs[1].dttk_value;
4536
uintptr_t dest = tupregs[2].dttk_value;
4537
4538
/*
4539
* This action doesn't require any credential checks since
4540
* probes will not activate in user contexts to which the
4541
* enabling user does not have permissions.
4542
*/
4543
if (!dtrace_inscratch(dest, size, mstate)) {
4544
*flags |= CPU_DTRACE_BADADDR;
4545
*illval = regs[rd];
4546
break;
4547
}
4548
4549
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4550
dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
4551
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4552
break;
4553
}
4554
4555
case DIF_SUBR_COPYINSTR: {
4556
uintptr_t dest = mstate->dtms_scratch_ptr;
4557
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4558
4559
if (nargs > 1 && tupregs[1].dttk_value < size)
4560
size = tupregs[1].dttk_value + 1;
4561
4562
/*
4563
* This action doesn't require any credential checks since
4564
* probes will not activate in user contexts to which the
4565
* enabling user does not have permissions.
4566
*/
4567
if (!DTRACE_INSCRATCH(mstate, size)) {
4568
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4569
regs[rd] = 0;
4570
break;
4571
}
4572
4573
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4574
dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
4575
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4576
4577
((char *)dest)[size - 1] = '\0';
4578
mstate->dtms_scratch_ptr += size;
4579
regs[rd] = dest;
4580
break;
4581
}
4582
4583
#ifdef illumos
4584
case DIF_SUBR_MSGSIZE:
4585
case DIF_SUBR_MSGDSIZE: {
4586
uintptr_t baddr = tupregs[0].dttk_value, daddr;
4587
uintptr_t wptr, rptr;
4588
size_t count = 0;
4589
int cont = 0;
4590
4591
while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) {
4592
4593
if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
4594
vstate)) {
4595
regs[rd] = 0;
4596
break;
4597
}
4598
4599
wptr = dtrace_loadptr(baddr +
4600
offsetof(mblk_t, b_wptr));
4601
4602
rptr = dtrace_loadptr(baddr +
4603
offsetof(mblk_t, b_rptr));
4604
4605
if (wptr < rptr) {
4606
*flags |= CPU_DTRACE_BADADDR;
4607
*illval = tupregs[0].dttk_value;
4608
break;
4609
}
4610
4611
daddr = dtrace_loadptr(baddr +
4612
offsetof(mblk_t, b_datap));
4613
4614
baddr = dtrace_loadptr(baddr +
4615
offsetof(mblk_t, b_cont));
4616
4617
/*
4618
* We want to prevent against denial-of-service here,
4619
* so we're only going to search the list for
4620
* dtrace_msgdsize_max mblks.
4621
*/
4622
if (cont++ > dtrace_msgdsize_max) {
4623
*flags |= CPU_DTRACE_ILLOP;
4624
break;
4625
}
4626
4627
if (subr == DIF_SUBR_MSGDSIZE) {
4628
if (dtrace_load8(daddr +
4629
offsetof(dblk_t, db_type)) != M_DATA)
4630
continue;
4631
}
4632
4633
count += wptr - rptr;
4634
}
4635
4636
if (!(*flags & CPU_DTRACE_FAULT))
4637
regs[rd] = count;
4638
4639
break;
4640
}
4641
#endif
4642
4643
case DIF_SUBR_PROGENYOF: {
4644
pid_t pid = tupregs[0].dttk_value;
4645
proc_t *p;
4646
int rval = 0;
4647
4648
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4649
4650
for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
4651
#ifdef illumos
4652
if (p->p_pidp->pid_id == pid) {
4653
#else
4654
if (p->p_pid == pid) {
4655
#endif
4656
rval = 1;
4657
break;
4658
}
4659
}
4660
4661
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4662
4663
regs[rd] = rval;
4664
break;
4665
}
4666
4667
case DIF_SUBR_SPECULATION:
4668
regs[rd] = dtrace_speculation(state);
4669
break;
4670
4671
case DIF_SUBR_COPYOUT: {
4672
uintptr_t kaddr = tupregs[0].dttk_value;
4673
uintptr_t uaddr = tupregs[1].dttk_value;
4674
uint64_t size = tupregs[2].dttk_value;
4675
4676
if (!dtrace_destructive_disallow &&
4677
dtrace_priv_proc_control(state) &&
4678
!dtrace_istoxic(kaddr, size) &&
4679
dtrace_canload(kaddr, size, mstate, vstate)) {
4680
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4681
dtrace_copyout(kaddr, uaddr, size, flags);
4682
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4683
}
4684
break;
4685
}
4686
4687
case DIF_SUBR_COPYOUTSTR: {
4688
uintptr_t kaddr = tupregs[0].dttk_value;
4689
uintptr_t uaddr = tupregs[1].dttk_value;
4690
uint64_t size = tupregs[2].dttk_value;
4691
size_t lim;
4692
4693
if (!dtrace_destructive_disallow &&
4694
dtrace_priv_proc_control(state) &&
4695
!dtrace_istoxic(kaddr, size) &&
4696
dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
4697
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
4698
dtrace_copyoutstr(kaddr, uaddr, lim, flags);
4699
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
4700
}
4701
break;
4702
}
4703
4704
case DIF_SUBR_STRLEN: {
4705
size_t size = state->dts_options[DTRACEOPT_STRSIZE];
4706
uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
4707
size_t lim;
4708
4709
if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
4710
regs[rd] = 0;
4711
break;
4712
}
4713
4714
regs[rd] = dtrace_strlen((char *)addr, lim);
4715
break;
4716
}
4717
4718
case DIF_SUBR_STRCHR:
4719
case DIF_SUBR_STRRCHR: {
4720
/*
4721
* We're going to iterate over the string looking for the
4722
* specified character. We will iterate until we have reached
4723
* the string length or we have found the character. If this
4724
* is DIF_SUBR_STRRCHR, we will look for the last occurrence
4725
* of the specified character instead of the first.
4726
*/
4727
uintptr_t addr = tupregs[0].dttk_value;
4728
uintptr_t addr_limit;
4729
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4730
size_t lim;
4731
char c, target = (char)tupregs[1].dttk_value;
4732
4733
if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
4734
regs[rd] = 0;
4735
break;
4736
}
4737
addr_limit = addr + lim;
4738
4739
for (regs[rd] = 0; addr < addr_limit; addr++) {
4740
if ((c = dtrace_load8(addr)) == target) {
4741
regs[rd] = addr;
4742
4743
if (subr == DIF_SUBR_STRCHR)
4744
break;
4745
}
4746
4747
if (c == '\0')
4748
break;
4749
}
4750
break;
4751
}
4752
4753
case DIF_SUBR_STRSTR:
4754
case DIF_SUBR_INDEX:
4755
case DIF_SUBR_RINDEX: {
4756
/*
4757
* We're going to iterate over the string looking for the
4758
* specified string. We will iterate until we have reached
4759
* the string length or we have found the string. (Yes, this
4760
* is done in the most naive way possible -- but considering
4761
* that the string we're searching for is likely to be
4762
* relatively short, the complexity of Rabin-Karp or similar
4763
* hardly seems merited.)
4764
*/
4765
char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
4766
char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
4767
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4768
size_t len = dtrace_strlen(addr, size);
4769
size_t sublen = dtrace_strlen(substr, size);
4770
char *limit = addr + len, *orig = addr;
4771
int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
4772
int inc = 1;
4773
4774
regs[rd] = notfound;
4775
4776
if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
4777
regs[rd] = 0;
4778
break;
4779
}
4780
4781
if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
4782
vstate)) {
4783
regs[rd] = 0;
4784
break;
4785
}
4786
4787
/*
4788
* strstr() and index()/rindex() have similar semantics if
4789
* both strings are the empty string: strstr() returns a
4790
* pointer to the (empty) string, and index() and rindex()
4791
* both return index 0 (regardless of any position argument).
4792
*/
4793
if (sublen == 0 && len == 0) {
4794
if (subr == DIF_SUBR_STRSTR)
4795
regs[rd] = (uintptr_t)addr;
4796
else
4797
regs[rd] = 0;
4798
break;
4799
}
4800
4801
if (subr != DIF_SUBR_STRSTR) {
4802
if (subr == DIF_SUBR_RINDEX) {
4803
limit = orig - 1;
4804
addr += len;
4805
inc = -1;
4806
}
4807
4808
/*
4809
* Both index() and rindex() take an optional position
4810
* argument that denotes the starting position.
4811
*/
4812
if (nargs == 3) {
4813
int64_t pos = (int64_t)tupregs[2].dttk_value;
4814
4815
/*
4816
* If the position argument to index() is
4817
* negative, Perl implicitly clamps it at
4818
* zero. This semantic is a little surprising
4819
* given the special meaning of negative
4820
* positions to similar Perl functions like
4821
* substr(), but it appears to reflect a
4822
* notion that index() can start from a
4823
* negative index and increment its way up to
4824
* the string. Given this notion, Perl's
4825
* rindex() is at least self-consistent in
4826
* that it implicitly clamps positions greater
4827
* than the string length to be the string
4828
* length. Where Perl completely loses
4829
* coherence, however, is when the specified
4830
* substring is the empty string (""). In
4831
* this case, even if the position is
4832
* negative, rindex() returns 0 -- and even if
4833
* the position is greater than the length,
4834
* index() returns the string length. These
4835
* semantics violate the notion that index()
4836
* should never return a value less than the
4837
* specified position and that rindex() should
4838
* never return a value greater than the
4839
* specified position. (One assumes that
4840
* these semantics are artifacts of Perl's
4841
* implementation and not the results of
4842
* deliberate design -- it beggars belief that
4843
* even Larry Wall could desire such oddness.)
4844
* While in the abstract one would wish for
4845
* consistent position semantics across
4846
* substr(), index() and rindex() -- or at the
4847
* very least self-consistent position
4848
* semantics for index() and rindex() -- we
4849
* instead opt to keep with the extant Perl
4850
* semantics, in all their broken glory. (Do
4851
* we have more desire to maintain Perl's
4852
* semantics than Perl does? Probably.)
4853
*/
4854
if (subr == DIF_SUBR_RINDEX) {
4855
if (pos < 0) {
4856
if (sublen == 0)
4857
regs[rd] = 0;
4858
break;
4859
}
4860
4861
if (pos > len)
4862
pos = len;
4863
} else {
4864
if (pos < 0)
4865
pos = 0;
4866
4867
if (pos >= len) {
4868
if (sublen == 0)
4869
regs[rd] = len;
4870
break;
4871
}
4872
}
4873
4874
addr = orig + pos;
4875
}
4876
}
4877
4878
for (regs[rd] = notfound; addr != limit; addr += inc) {
4879
if (dtrace_strncmp(addr, substr, sublen) == 0) {
4880
if (subr != DIF_SUBR_STRSTR) {
4881
/*
4882
* As D index() and rindex() are
4883
* modeled on Perl (and not on awk),
4884
* we return a zero-based (and not a
4885
* one-based) index. (For you Perl
4886
* weenies: no, we're not going to add
4887
* $[ -- and shouldn't you be at a con
4888
* or something?)
4889
*/
4890
regs[rd] = (uintptr_t)(addr - orig);
4891
break;
4892
}
4893
4894
ASSERT(subr == DIF_SUBR_STRSTR);
4895
regs[rd] = (uintptr_t)addr;
4896
break;
4897
}
4898
}
4899
4900
break;
4901
}
4902
4903
case DIF_SUBR_STRTOK: {
4904
uintptr_t addr = tupregs[0].dttk_value;
4905
uintptr_t tokaddr = tupregs[1].dttk_value;
4906
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4907
uintptr_t limit, toklimit;
4908
size_t clim;
4909
uint8_t c = 0, tokmap[32]; /* 256 / 8 */
4910
char *dest = (char *)mstate->dtms_scratch_ptr;
4911
int i;
4912
4913
/*
4914
* Check both the token buffer and (later) the input buffer,
4915
* since both could be non-scratch addresses.
4916
*/
4917
if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
4918
regs[rd] = 0;
4919
break;
4920
}
4921
toklimit = tokaddr + clim;
4922
4923
if (!DTRACE_INSCRATCH(mstate, size)) {
4924
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4925
regs[rd] = 0;
4926
break;
4927
}
4928
4929
if (addr == 0) {
4930
/*
4931
* If the address specified is NULL, we use our saved
4932
* strtok pointer from the mstate. Note that this
4933
* means that the saved strtok pointer is _only_
4934
* valid within multiple enablings of the same probe --
4935
* it behaves like an implicit clause-local variable.
4936
*/
4937
addr = mstate->dtms_strtok;
4938
limit = mstate->dtms_strtok_limit;
4939
} else {
4940
/*
4941
* If the user-specified address is non-NULL we must
4942
* access check it. This is the only time we have
4943
* a chance to do so, since this address may reside
4944
* in the string table of this clause-- future calls
4945
* (when we fetch addr from mstate->dtms_strtok)
4946
* would fail this access check.
4947
*/
4948
if (!dtrace_strcanload(addr, size, &clim, mstate,
4949
vstate)) {
4950
regs[rd] = 0;
4951
break;
4952
}
4953
limit = addr + clim;
4954
}
4955
4956
/*
4957
* First, zero the token map, and then process the token
4958
* string -- setting a bit in the map for every character
4959
* found in the token string.
4960
*/
4961
for (i = 0; i < sizeof (tokmap); i++)
4962
tokmap[i] = 0;
4963
4964
for (; tokaddr < toklimit; tokaddr++) {
4965
if ((c = dtrace_load8(tokaddr)) == '\0')
4966
break;
4967
4968
ASSERT((c >> 3) < sizeof (tokmap));
4969
tokmap[c >> 3] |= (1 << (c & 0x7));
4970
}
4971
4972
for (; addr < limit; addr++) {
4973
/*
4974
* We're looking for a character that is _not_
4975
* contained in the token string.
4976
*/
4977
if ((c = dtrace_load8(addr)) == '\0')
4978
break;
4979
4980
if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
4981
break;
4982
}
4983
4984
if (c == '\0') {
4985
/*
4986
* We reached the end of the string without finding
4987
* any character that was not in the token string.
4988
* We return NULL in this case, and we set the saved
4989
* address to NULL as well.
4990
*/
4991
regs[rd] = 0;
4992
mstate->dtms_strtok = 0;
4993
mstate->dtms_strtok_limit = 0;
4994
break;
4995
}
4996
4997
/*
4998
* From here on, we're copying into the destination string.
4999
*/
5000
for (i = 0; addr < limit && i < size - 1; addr++) {
5001
if ((c = dtrace_load8(addr)) == '\0')
5002
break;
5003
5004
if (tokmap[c >> 3] & (1 << (c & 0x7)))
5005
break;
5006
5007
ASSERT(i < size);
5008
dest[i++] = c;
5009
}
5010
5011
ASSERT(i < size);
5012
dest[i] = '\0';
5013
regs[rd] = (uintptr_t)dest;
5014
mstate->dtms_scratch_ptr += size;
5015
mstate->dtms_strtok = addr;
5016
mstate->dtms_strtok_limit = limit;
5017
break;
5018
}
5019
5020
case DIF_SUBR_SUBSTR: {
5021
uintptr_t s = tupregs[0].dttk_value;
5022
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5023
char *d = (char *)mstate->dtms_scratch_ptr;
5024
int64_t index = (int64_t)tupregs[1].dttk_value;
5025
int64_t remaining = (int64_t)tupregs[2].dttk_value;
5026
size_t len = dtrace_strlen((char *)s, size);
5027
int64_t i;
5028
5029
if (!dtrace_canload(s, len + 1, mstate, vstate)) {
5030
regs[rd] = 0;
5031
break;
5032
}
5033
5034
if (!DTRACE_INSCRATCH(mstate, size)) {
5035
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5036
regs[rd] = 0;
5037
break;
5038
}
5039
5040
if (nargs <= 2)
5041
remaining = (int64_t)size;
5042
5043
if (index < 0) {
5044
index += len;
5045
5046
if (index < 0 && index + remaining > 0) {
5047
remaining += index;
5048
index = 0;
5049
}
5050
}
5051
5052
if (index >= len || index < 0) {
5053
remaining = 0;
5054
} else if (remaining < 0) {
5055
remaining += len - index;
5056
} else if (index + remaining > size) {
5057
remaining = size - index;
5058
}
5059
5060
for (i = 0; i < remaining; i++) {
5061
if ((d[i] = dtrace_load8(s + index + i)) == '\0')
5062
break;
5063
}
5064
5065
d[i] = '\0';
5066
5067
mstate->dtms_scratch_ptr += size;
5068
regs[rd] = (uintptr_t)d;
5069
break;
5070
}
5071
5072
case DIF_SUBR_JSON: {
5073
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5074
uintptr_t json = tupregs[0].dttk_value;
5075
size_t jsonlen = dtrace_strlen((char *)json, size);
5076
uintptr_t elem = tupregs[1].dttk_value;
5077
size_t elemlen = dtrace_strlen((char *)elem, size);
5078
5079
char *dest = (char *)mstate->dtms_scratch_ptr;
5080
char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1;
5081
char *ee = elemlist;
5082
int nelems = 1;
5083
uintptr_t cur;
5084
5085
if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) ||
5086
!dtrace_canload(elem, elemlen + 1, mstate, vstate)) {
5087
regs[rd] = 0;
5088
break;
5089
}
5090
5091
if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) {
5092
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5093
regs[rd] = 0;
5094
break;
5095
}
5096
5097
/*
5098
* Read the element selector and split it up into a packed list
5099
* of strings.
5100
*/
5101
for (cur = elem; cur < elem + elemlen; cur++) {
5102
char cc = dtrace_load8(cur);
5103
5104
if (cur == elem && cc == '[') {
5105
/*
5106
* If the first element selector key is
5107
* actually an array index then ignore the
5108
* bracket.
5109
*/
5110
continue;
5111
}
5112
5113
if (cc == ']')
5114
continue;
5115
5116
if (cc == '.' || cc == '[') {
5117
nelems++;
5118
cc = '\0';
5119
}
5120
5121
*ee++ = cc;
5122
}
5123
*ee++ = '\0';
5124
5125
if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist,
5126
nelems, dest)) != 0)
5127
mstate->dtms_scratch_ptr += jsonlen + 1;
5128
break;
5129
}
5130
5131
case DIF_SUBR_TOUPPER:
5132
case DIF_SUBR_TOLOWER: {
5133
uintptr_t s = tupregs[0].dttk_value;
5134
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5135
char *dest = (char *)mstate->dtms_scratch_ptr, c;
5136
size_t len = dtrace_strlen((char *)s, size);
5137
char lower, upper, convert;
5138
int64_t i;
5139
5140
if (subr == DIF_SUBR_TOUPPER) {
5141
lower = 'a';
5142
upper = 'z';
5143
convert = 'A';
5144
} else {
5145
lower = 'A';
5146
upper = 'Z';
5147
convert = 'a';
5148
}
5149
5150
if (!dtrace_canload(s, len + 1, mstate, vstate)) {
5151
regs[rd] = 0;
5152
break;
5153
}
5154
5155
if (!DTRACE_INSCRATCH(mstate, size)) {
5156
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5157
regs[rd] = 0;
5158
break;
5159
}
5160
5161
for (i = 0; i < size - 1; i++) {
5162
if ((c = dtrace_load8(s + i)) == '\0')
5163
break;
5164
5165
if (c >= lower && c <= upper)
5166
c = convert + (c - lower);
5167
5168
dest[i] = c;
5169
}
5170
5171
ASSERT(i < size);
5172
dest[i] = '\0';
5173
regs[rd] = (uintptr_t)dest;
5174
mstate->dtms_scratch_ptr += size;
5175
break;
5176
}
5177
5178
#ifdef illumos
5179
case DIF_SUBR_GETMAJOR:
5180
#ifdef _LP64
5181
regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
5182
#else
5183
regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
5184
#endif
5185
break;
5186
5187
case DIF_SUBR_GETMINOR:
5188
#ifdef _LP64
5189
regs[rd] = tupregs[0].dttk_value & MAXMIN64;
5190
#else
5191
regs[rd] = tupregs[0].dttk_value & MAXMIN;
5192
#endif
5193
break;
5194
5195
case DIF_SUBR_DDI_PATHNAME: {
5196
/*
5197
* This one is a galactic mess. We are going to roughly
5198
* emulate ddi_pathname(), but it's made more complicated
5199
* by the fact that we (a) want to include the minor name and
5200
* (b) must proceed iteratively instead of recursively.
5201
*/
5202
uintptr_t dest = mstate->dtms_scratch_ptr;
5203
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5204
char *start = (char *)dest, *end = start + size - 1;
5205
uintptr_t daddr = tupregs[0].dttk_value;
5206
int64_t minor = (int64_t)tupregs[1].dttk_value;
5207
char *s;
5208
int i, len, depth = 0;
5209
5210
/*
5211
* Due to all the pointer jumping we do and context we must
5212
* rely upon, we just mandate that the user must have kernel
5213
* read privileges to use this routine.
5214
*/
5215
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
5216
*flags |= CPU_DTRACE_KPRIV;
5217
*illval = daddr;
5218
regs[rd] = 0;
5219
}
5220
5221
if (!DTRACE_INSCRATCH(mstate, size)) {
5222
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5223
regs[rd] = 0;
5224
break;
5225
}
5226
5227
*end = '\0';
5228
5229
/*
5230
* We want to have a name for the minor. In order to do this,
5231
* we need to walk the minor list from the devinfo. We want
5232
* to be sure that we don't infinitely walk a circular list,
5233
* so we check for circularity by sending a scout pointer
5234
* ahead two elements for every element that we iterate over;
5235
* if the list is circular, these will ultimately point to the
5236
* same element. You may recognize this little trick as the
5237
* answer to a stupid interview question -- one that always
5238
* seems to be asked by those who had to have it laboriously
5239
* explained to them, and who can't even concisely describe
5240
* the conditions under which one would be forced to resort to
5241
* this technique. Needless to say, those conditions are
5242
* found here -- and probably only here. Is this the only use
5243
* of this infamous trick in shipping, production code? If it
5244
* isn't, it probably should be...
5245
*/
5246
if (minor != -1) {
5247
uintptr_t maddr = dtrace_loadptr(daddr +
5248
offsetof(struct dev_info, devi_minor));
5249
5250
uintptr_t next = offsetof(struct ddi_minor_data, next);
5251
uintptr_t name = offsetof(struct ddi_minor_data,
5252
d_minor) + offsetof(struct ddi_minor, name);
5253
uintptr_t dev = offsetof(struct ddi_minor_data,
5254
d_minor) + offsetof(struct ddi_minor, dev);
5255
uintptr_t scout;
5256
5257
if (maddr != NULL)
5258
scout = dtrace_loadptr(maddr + next);
5259
5260
while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
5261
uint64_t m;
5262
#ifdef _LP64
5263
m = dtrace_load64(maddr + dev) & MAXMIN64;
5264
#else
5265
m = dtrace_load32(maddr + dev) & MAXMIN;
5266
#endif
5267
if (m != minor) {
5268
maddr = dtrace_loadptr(maddr + next);
5269
5270
if (scout == NULL)
5271
continue;
5272
5273
scout = dtrace_loadptr(scout + next);
5274
5275
if (scout == NULL)
5276
continue;
5277
5278
scout = dtrace_loadptr(scout + next);
5279
5280
if (scout == NULL)
5281
continue;
5282
5283
if (scout == maddr) {
5284
*flags |= CPU_DTRACE_ILLOP;
5285
break;
5286
}
5287
5288
continue;
5289
}
5290
5291
/*
5292
* We have the minor data. Now we need to
5293
* copy the minor's name into the end of the
5294
* pathname.
5295
*/
5296
s = (char *)dtrace_loadptr(maddr + name);
5297
len = dtrace_strlen(s, size);
5298
5299
if (*flags & CPU_DTRACE_FAULT)
5300
break;
5301
5302
if (len != 0) {
5303
if ((end -= (len + 1)) < start)
5304
break;
5305
5306
*end = ':';
5307
}
5308
5309
for (i = 1; i <= len; i++)
5310
end[i] = dtrace_load8((uintptr_t)s++);
5311
break;
5312
}
5313
}
5314
5315
while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
5316
ddi_node_state_t devi_state;
5317
5318
devi_state = dtrace_load32(daddr +
5319
offsetof(struct dev_info, devi_node_state));
5320
5321
if (*flags & CPU_DTRACE_FAULT)
5322
break;
5323
5324
if (devi_state >= DS_INITIALIZED) {
5325
s = (char *)dtrace_loadptr(daddr +
5326
offsetof(struct dev_info, devi_addr));
5327
len = dtrace_strlen(s, size);
5328
5329
if (*flags & CPU_DTRACE_FAULT)
5330
break;
5331
5332
if (len != 0) {
5333
if ((end -= (len + 1)) < start)
5334
break;
5335
5336
*end = '@';
5337
}
5338
5339
for (i = 1; i <= len; i++)
5340
end[i] = dtrace_load8((uintptr_t)s++);
5341
}
5342
5343
/*
5344
* Now for the node name...
5345
*/
5346
s = (char *)dtrace_loadptr(daddr +
5347
offsetof(struct dev_info, devi_node_name));
5348
5349
daddr = dtrace_loadptr(daddr +
5350
offsetof(struct dev_info, devi_parent));
5351
5352
/*
5353
* If our parent is NULL (that is, if we're the root
5354
* node), we're going to use the special path
5355
* "devices".
5356
*/
5357
if (daddr == 0)
5358
s = "devices";
5359
5360
len = dtrace_strlen(s, size);
5361
if (*flags & CPU_DTRACE_FAULT)
5362
break;
5363
5364
if ((end -= (len + 1)) < start)
5365
break;
5366
5367
for (i = 1; i <= len; i++)
5368
end[i] = dtrace_load8((uintptr_t)s++);
5369
*end = '/';
5370
5371
if (depth++ > dtrace_devdepth_max) {
5372
*flags |= CPU_DTRACE_ILLOP;
5373
break;
5374
}
5375
}
5376
5377
if (end < start)
5378
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5379
5380
if (daddr == 0) {
5381
regs[rd] = (uintptr_t)end;
5382
mstate->dtms_scratch_ptr += size;
5383
}
5384
5385
break;
5386
}
5387
#endif
5388
5389
case DIF_SUBR_STRJOIN: {
5390
char *d = (char *)mstate->dtms_scratch_ptr;
5391
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5392
uintptr_t s1 = tupregs[0].dttk_value;
5393
uintptr_t s2 = tupregs[1].dttk_value;
5394
int i = 0, j = 0;
5395
size_t lim1, lim2;
5396
char c;
5397
5398
if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
5399
!dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
5400
regs[rd] = 0;
5401
break;
5402
}
5403
5404
if (!DTRACE_INSCRATCH(mstate, size)) {
5405
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5406
regs[rd] = 0;
5407
break;
5408
}
5409
5410
for (;;) {
5411
if (i >= size) {
5412
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5413
regs[rd] = 0;
5414
break;
5415
}
5416
c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
5417
if ((d[i++] = c) == '\0') {
5418
i--;
5419
break;
5420
}
5421
}
5422
5423
for (;;) {
5424
if (i >= size) {
5425
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5426
regs[rd] = 0;
5427
break;
5428
}
5429
5430
c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
5431
if ((d[i++] = c) == '\0')
5432
break;
5433
}
5434
5435
if (i < size) {
5436
mstate->dtms_scratch_ptr += i;
5437
regs[rd] = (uintptr_t)d;
5438
}
5439
5440
break;
5441
}
5442
5443
case DIF_SUBR_STRTOLL: {
5444
uintptr_t s = tupregs[0].dttk_value;
5445
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5446
size_t lim;
5447
int base = 10;
5448
5449
if (nargs > 1) {
5450
if ((base = tupregs[1].dttk_value) <= 1 ||
5451
base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
5452
*flags |= CPU_DTRACE_ILLOP;
5453
break;
5454
}
5455
}
5456
5457
if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) {
5458
regs[rd] = INT64_MIN;
5459
break;
5460
}
5461
5462
regs[rd] = dtrace_strtoll((char *)s, base, lim);
5463
break;
5464
}
5465
5466
case DIF_SUBR_LLTOSTR: {
5467
int64_t i = (int64_t)tupregs[0].dttk_value;
5468
uint64_t val, digit;
5469
uint64_t size = 65; /* enough room for 2^64 in binary */
5470
char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
5471
int base = 10;
5472
5473
if (nargs > 1) {
5474
if ((base = tupregs[1].dttk_value) <= 1 ||
5475
base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
5476
*flags |= CPU_DTRACE_ILLOP;
5477
break;
5478
}
5479
}
5480
5481
val = (base == 10 && i < 0) ? i * -1 : i;
5482
5483
if (!DTRACE_INSCRATCH(mstate, size)) {
5484
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5485
regs[rd] = 0;
5486
break;
5487
}
5488
5489
for (*end-- = '\0'; val; val /= base) {
5490
if ((digit = val % base) <= '9' - '0') {
5491
*end-- = '0' + digit;
5492
} else {
5493
*end-- = 'a' + (digit - ('9' - '0') - 1);
5494
}
5495
}
5496
5497
if (i == 0 && base == 16)
5498
*end-- = '0';
5499
5500
if (base == 16)
5501
*end-- = 'x';
5502
5503
if (i == 0 || base == 8 || base == 16)
5504
*end-- = '0';
5505
5506
if (i < 0 && base == 10)
5507
*end-- = '-';
5508
5509
regs[rd] = (uintptr_t)end + 1;
5510
mstate->dtms_scratch_ptr += size;
5511
break;
5512
}
5513
5514
case DIF_SUBR_HTONS:
5515
case DIF_SUBR_NTOHS:
5516
#if BYTE_ORDER == BIG_ENDIAN
5517
regs[rd] = (uint16_t)tupregs[0].dttk_value;
5518
#else
5519
regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
5520
#endif
5521
break;
5522
5523
5524
case DIF_SUBR_HTONL:
5525
case DIF_SUBR_NTOHL:
5526
#if BYTE_ORDER == BIG_ENDIAN
5527
regs[rd] = (uint32_t)tupregs[0].dttk_value;
5528
#else
5529
regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
5530
#endif
5531
break;
5532
5533
5534
case DIF_SUBR_HTONLL:
5535
case DIF_SUBR_NTOHLL:
5536
#if BYTE_ORDER == BIG_ENDIAN
5537
regs[rd] = (uint64_t)tupregs[0].dttk_value;
5538
#else
5539
regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
5540
#endif
5541
break;
5542
5543
5544
case DIF_SUBR_DIRNAME:
5545
case DIF_SUBR_BASENAME: {
5546
char *dest = (char *)mstate->dtms_scratch_ptr;
5547
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5548
uintptr_t src = tupregs[0].dttk_value;
5549
int i, j, len = dtrace_strlen((char *)src, size);
5550
int lastbase = -1, firstbase = -1, lastdir = -1;
5551
int start, end;
5552
5553
if (!dtrace_canload(src, len + 1, mstate, vstate)) {
5554
regs[rd] = 0;
5555
break;
5556
}
5557
5558
if (!DTRACE_INSCRATCH(mstate, size)) {
5559
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5560
regs[rd] = 0;
5561
break;
5562
}
5563
5564
/*
5565
* The basename and dirname for a zero-length string is
5566
* defined to be "."
5567
*/
5568
if (len == 0) {
5569
len = 1;
5570
src = (uintptr_t)".";
5571
}
5572
5573
/*
5574
* Start from the back of the string, moving back toward the
5575
* front until we see a character that isn't a slash. That
5576
* character is the last character in the basename.
5577
*/
5578
for (i = len - 1; i >= 0; i--) {
5579
if (dtrace_load8(src + i) != '/')
5580
break;
5581
}
5582
5583
if (i >= 0)
5584
lastbase = i;
5585
5586
/*
5587
* Starting from the last character in the basename, move
5588
* towards the front until we find a slash. The character
5589
* that we processed immediately before that is the first
5590
* character in the basename.
5591
*/
5592
for (; i >= 0; i--) {
5593
if (dtrace_load8(src + i) == '/')
5594
break;
5595
}
5596
5597
if (i >= 0)
5598
firstbase = i + 1;
5599
5600
/*
5601
* Now keep going until we find a non-slash character. That
5602
* character is the last character in the dirname.
5603
*/
5604
for (; i >= 0; i--) {
5605
if (dtrace_load8(src + i) != '/')
5606
break;
5607
}
5608
5609
if (i >= 0)
5610
lastdir = i;
5611
5612
ASSERT(!(lastbase == -1 && firstbase != -1));
5613
ASSERT(!(firstbase == -1 && lastdir != -1));
5614
5615
if (lastbase == -1) {
5616
/*
5617
* We didn't find a non-slash character. We know that
5618
* the length is non-zero, so the whole string must be
5619
* slashes. In either the dirname or the basename
5620
* case, we return '/'.
5621
*/
5622
ASSERT(firstbase == -1);
5623
firstbase = lastbase = lastdir = 0;
5624
}
5625
5626
if (firstbase == -1) {
5627
/*
5628
* The entire string consists only of a basename
5629
* component. If we're looking for dirname, we need
5630
* to change our string to be just "."; if we're
5631
* looking for a basename, we'll just set the first
5632
* character of the basename to be 0.
5633
*/
5634
if (subr == DIF_SUBR_DIRNAME) {
5635
ASSERT(lastdir == -1);
5636
src = (uintptr_t)".";
5637
lastdir = 0;
5638
} else {
5639
firstbase = 0;
5640
}
5641
}
5642
5643
if (subr == DIF_SUBR_DIRNAME) {
5644
if (lastdir == -1) {
5645
/*
5646
* We know that we have a slash in the name --
5647
* or lastdir would be set to 0, above. And
5648
* because lastdir is -1, we know that this
5649
* slash must be the first character. (That
5650
* is, the full string must be of the form
5651
* "/basename".) In this case, the last
5652
* character of the directory name is 0.
5653
*/
5654
lastdir = 0;
5655
}
5656
5657
start = 0;
5658
end = lastdir;
5659
} else {
5660
ASSERT(subr == DIF_SUBR_BASENAME);
5661
ASSERT(firstbase != -1 && lastbase != -1);
5662
start = firstbase;
5663
end = lastbase;
5664
}
5665
5666
for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
5667
dest[j] = dtrace_load8(src + i);
5668
5669
dest[j] = '\0';
5670
regs[rd] = (uintptr_t)dest;
5671
mstate->dtms_scratch_ptr += size;
5672
break;
5673
}
5674
5675
case DIF_SUBR_GETF: {
5676
uintptr_t fd = tupregs[0].dttk_value;
5677
struct filedesc *fdp;
5678
file_t *fp;
5679
5680
if (!dtrace_priv_proc(state)) {
5681
regs[rd] = 0;
5682
break;
5683
}
5684
fdp = curproc->p_fd;
5685
FILEDESC_SLOCK(fdp);
5686
/*
5687
* XXXMJG this looks broken as no ref is taken.
5688
*/
5689
fp = fget_noref(fdp, fd);
5690
mstate->dtms_getf = fp;
5691
regs[rd] = (uintptr_t)fp;
5692
FILEDESC_SUNLOCK(fdp);
5693
break;
5694
}
5695
5696
case DIF_SUBR_CLEANPATH: {
5697
char *dest = (char *)mstate->dtms_scratch_ptr, c;
5698
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5699
uintptr_t src = tupregs[0].dttk_value;
5700
size_t lim;
5701
int i = 0, j = 0;
5702
#ifdef illumos
5703
zone_t *z;
5704
#endif
5705
5706
if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
5707
regs[rd] = 0;
5708
break;
5709
}
5710
5711
if (!DTRACE_INSCRATCH(mstate, size)) {
5712
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5713
regs[rd] = 0;
5714
break;
5715
}
5716
5717
/*
5718
* Move forward, loading each character.
5719
*/
5720
do {
5721
c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5722
next:
5723
if (j + 5 >= size) /* 5 = strlen("/..c\0") */
5724
break;
5725
5726
if (c != '/') {
5727
dest[j++] = c;
5728
continue;
5729
}
5730
5731
c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5732
5733
if (c == '/') {
5734
/*
5735
* We have two slashes -- we can just advance
5736
* to the next character.
5737
*/
5738
goto next;
5739
}
5740
5741
if (c != '.') {
5742
/*
5743
* This is not "." and it's not ".." -- we can
5744
* just store the "/" and this character and
5745
* drive on.
5746
*/
5747
dest[j++] = '/';
5748
dest[j++] = c;
5749
continue;
5750
}
5751
5752
c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5753
5754
if (c == '/') {
5755
/*
5756
* This is a "/./" component. We're not going
5757
* to store anything in the destination buffer;
5758
* we're just going to go to the next component.
5759
*/
5760
goto next;
5761
}
5762
5763
if (c != '.') {
5764
/*
5765
* This is not ".." -- we can just store the
5766
* "/." and this character and continue
5767
* processing.
5768
*/
5769
dest[j++] = '/';
5770
dest[j++] = '.';
5771
dest[j++] = c;
5772
continue;
5773
}
5774
5775
c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
5776
5777
if (c != '/' && c != '\0') {
5778
/*
5779
* This is not ".." -- it's "..[mumble]".
5780
* We'll store the "/.." and this character
5781
* and continue processing.
5782
*/
5783
dest[j++] = '/';
5784
dest[j++] = '.';
5785
dest[j++] = '.';
5786
dest[j++] = c;
5787
continue;
5788
}
5789
5790
/*
5791
* This is "/../" or "/..\0". We need to back up
5792
* our destination pointer until we find a "/".
5793
*/
5794
i--;
5795
while (j != 0 && dest[--j] != '/')
5796
continue;
5797
5798
if (c == '\0')
5799
dest[++j] = '/';
5800
} while (c != '\0');
5801
5802
dest[j] = '\0';
5803
5804
#ifdef illumos
5805
if (mstate->dtms_getf != NULL &&
5806
!(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
5807
(z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
5808
/*
5809
* If we've done a getf() as a part of this ECB and we
5810
* don't have kernel access (and we're not in the global
5811
* zone), check if the path we cleaned up begins with
5812
* the zone's root path, and trim it off if so. Note
5813
* that this is an output cleanliness issue, not a
5814
* security issue: knowing one's zone root path does
5815
* not enable privilege escalation.
5816
*/
5817
if (strstr(dest, z->zone_rootpath) == dest)
5818
dest += strlen(z->zone_rootpath) - 1;
5819
}
5820
#endif
5821
5822
regs[rd] = (uintptr_t)dest;
5823
mstate->dtms_scratch_ptr += size;
5824
break;
5825
}
5826
5827
case DIF_SUBR_INET_NTOA:
5828
case DIF_SUBR_INET_NTOA6:
5829
case DIF_SUBR_INET_NTOP: {
5830
size_t size;
5831
int af, argi, i;
5832
char *base, *end;
5833
5834
if (subr == DIF_SUBR_INET_NTOP) {
5835
af = (int)tupregs[0].dttk_value;
5836
argi = 1;
5837
} else {
5838
af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
5839
argi = 0;
5840
}
5841
5842
if (af == AF_INET) {
5843
ipaddr_t ip4;
5844
uint8_t *ptr8, val;
5845
5846
if (!dtrace_canload(tupregs[argi].dttk_value,
5847
sizeof (ipaddr_t), mstate, vstate)) {
5848
regs[rd] = 0;
5849
break;
5850
}
5851
5852
/*
5853
* Safely load the IPv4 address.
5854
*/
5855
ip4 = dtrace_load32(tupregs[argi].dttk_value);
5856
5857
/*
5858
* Check an IPv4 string will fit in scratch.
5859
*/
5860
size = INET_ADDRSTRLEN;
5861
if (!DTRACE_INSCRATCH(mstate, size)) {
5862
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5863
regs[rd] = 0;
5864
break;
5865
}
5866
base = (char *)mstate->dtms_scratch_ptr;
5867
end = (char *)mstate->dtms_scratch_ptr + size - 1;
5868
5869
/*
5870
* Stringify as a dotted decimal quad.
5871
*/
5872
*end-- = '\0';
5873
ptr8 = (uint8_t *)&ip4;
5874
for (i = 3; i >= 0; i--) {
5875
val = ptr8[i];
5876
5877
if (val == 0) {
5878
*end-- = '0';
5879
} else {
5880
for (; val; val /= 10) {
5881
*end-- = '0' + (val % 10);
5882
}
5883
}
5884
5885
if (i > 0)
5886
*end-- = '.';
5887
}
5888
ASSERT(end + 1 >= base);
5889
5890
} else if (af == AF_INET6) {
5891
struct in6_addr ip6;
5892
int firstzero, tryzero, numzero, v6end;
5893
uint16_t val;
5894
const char digits[] = "0123456789abcdef";
5895
5896
/*
5897
* Stringify using RFC 1884 convention 2 - 16 bit
5898
* hexadecimal values with a zero-run compression.
5899
* Lower case hexadecimal digits are used.
5900
* eg, fe80::214:4fff:fe0b:76c8.
5901
* The IPv4 embedded form is returned for inet_ntop,
5902
* just the IPv4 string is returned for inet_ntoa6.
5903
*/
5904
5905
if (!dtrace_canload(tupregs[argi].dttk_value,
5906
sizeof (struct in6_addr), mstate, vstate)) {
5907
regs[rd] = 0;
5908
break;
5909
}
5910
5911
/*
5912
* Safely load the IPv6 address.
5913
*/
5914
dtrace_bcopy(
5915
(void *)(uintptr_t)tupregs[argi].dttk_value,
5916
(void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
5917
5918
/*
5919
* Check an IPv6 string will fit in scratch.
5920
*/
5921
size = INET6_ADDRSTRLEN;
5922
if (!DTRACE_INSCRATCH(mstate, size)) {
5923
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
5924
regs[rd] = 0;
5925
break;
5926
}
5927
base = (char *)mstate->dtms_scratch_ptr;
5928
end = (char *)mstate->dtms_scratch_ptr + size - 1;
5929
*end-- = '\0';
5930
5931
/*
5932
* Find the longest run of 16 bit zero values
5933
* for the single allowed zero compression - "::".
5934
*/
5935
firstzero = -1;
5936
tryzero = -1;
5937
numzero = 1;
5938
for (i = 0; i < sizeof (struct in6_addr); i++) {
5939
#ifdef illumos
5940
if (ip6._S6_un._S6_u8[i] == 0 &&
5941
#else
5942
if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
5943
#endif
5944
tryzero == -1 && i % 2 == 0) {
5945
tryzero = i;
5946
continue;
5947
}
5948
5949
if (tryzero != -1 &&
5950
#ifdef illumos
5951
(ip6._S6_un._S6_u8[i] != 0 ||
5952
#else
5953
(ip6.__u6_addr.__u6_addr8[i] != 0 ||
5954
#endif
5955
i == sizeof (struct in6_addr) - 1)) {
5956
5957
if (i - tryzero <= numzero) {
5958
tryzero = -1;
5959
continue;
5960
}
5961
5962
firstzero = tryzero;
5963
numzero = i - i % 2 - tryzero;
5964
tryzero = -1;
5965
5966
#ifdef illumos
5967
if (ip6._S6_un._S6_u8[i] == 0 &&
5968
#else
5969
if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
5970
#endif
5971
i == sizeof (struct in6_addr) - 1)
5972
numzero += 2;
5973
}
5974
}
5975
ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
5976
5977
/*
5978
* Check for an IPv4 embedded address.
5979
*/
5980
v6end = sizeof (struct in6_addr) - 2;
5981
if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
5982
IN6_IS_ADDR_V4COMPAT(&ip6)) {
5983
for (i = sizeof (struct in6_addr) - 1;
5984
i >= DTRACE_V4MAPPED_OFFSET; i--) {
5985
ASSERT(end >= base);
5986
5987
#ifdef illumos
5988
val = ip6._S6_un._S6_u8[i];
5989
#else
5990
val = ip6.__u6_addr.__u6_addr8[i];
5991
#endif
5992
5993
if (val == 0) {
5994
*end-- = '0';
5995
} else {
5996
for (; val; val /= 10) {
5997
*end-- = '0' + val % 10;
5998
}
5999
}
6000
6001
if (i > DTRACE_V4MAPPED_OFFSET)
6002
*end-- = '.';
6003
}
6004
6005
if (subr == DIF_SUBR_INET_NTOA6)
6006
goto inetout;
6007
6008
/*
6009
* Set v6end to skip the IPv4 address that
6010
* we have already stringified.
6011
*/
6012
v6end = 10;
6013
}
6014
6015
/*
6016
* Build the IPv6 string by working through the
6017
* address in reverse.
6018
*/
6019
for (i = v6end; i >= 0; i -= 2) {
6020
ASSERT(end >= base);
6021
6022
if (i == firstzero + numzero - 2) {
6023
*end-- = ':';
6024
*end-- = ':';
6025
i -= numzero - 2;
6026
continue;
6027
}
6028
6029
if (i < 14 && i != firstzero - 2)
6030
*end-- = ':';
6031
6032
#ifdef illumos
6033
val = (ip6._S6_un._S6_u8[i] << 8) +
6034
ip6._S6_un._S6_u8[i + 1];
6035
#else
6036
val = (ip6.__u6_addr.__u6_addr8[i] << 8) +
6037
ip6.__u6_addr.__u6_addr8[i + 1];
6038
#endif
6039
6040
if (val == 0) {
6041
*end-- = '0';
6042
} else {
6043
for (; val; val /= 16) {
6044
*end-- = digits[val % 16];
6045
}
6046
}
6047
}
6048
ASSERT(end + 1 >= base);
6049
6050
} else {
6051
/*
6052
* The user didn't use AH_INET or AH_INET6.
6053
*/
6054
DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
6055
regs[rd] = 0;
6056
break;
6057
}
6058
6059
inetout: regs[rd] = (uintptr_t)end + 1;
6060
mstate->dtms_scratch_ptr += size;
6061
break;
6062
}
6063
6064
case DIF_SUBR_MEMREF: {
6065
uintptr_t size = 2 * sizeof(uintptr_t);
6066
uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t));
6067
size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size;
6068
6069
/* address and length */
6070
memref[0] = tupregs[0].dttk_value;
6071
memref[1] = tupregs[1].dttk_value;
6072
6073
regs[rd] = (uintptr_t) memref;
6074
mstate->dtms_scratch_ptr += scratch_size;
6075
break;
6076
}
6077
6078
#ifndef illumos
6079
case DIF_SUBR_MEMSTR: {
6080
char *str = (char *)mstate->dtms_scratch_ptr;
6081
uintptr_t mem = tupregs[0].dttk_value;
6082
char c = tupregs[1].dttk_value;
6083
size_t size = tupregs[2].dttk_value;
6084
uint8_t n;
6085
int i;
6086
6087
regs[rd] = 0;
6088
6089
if (size == 0)
6090
break;
6091
6092
if (!dtrace_canload(mem, size - 1, mstate, vstate))
6093
break;
6094
6095
if (!DTRACE_INSCRATCH(mstate, size)) {
6096
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6097
break;
6098
}
6099
6100
if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) {
6101
*flags |= CPU_DTRACE_ILLOP;
6102
break;
6103
}
6104
6105
for (i = 0; i < size - 1; i++) {
6106
n = dtrace_load8(mem++);
6107
str[i] = (n == 0) ? c : n;
6108
}
6109
str[size - 1] = 0;
6110
6111
regs[rd] = (uintptr_t)str;
6112
mstate->dtms_scratch_ptr += size;
6113
break;
6114
}
6115
#endif
6116
}
6117
}
6118
6119
/*
6120
* Emulate the execution of DTrace IR instructions specified by the given
6121
* DIF object. This function is deliberately void of assertions as all of
6122
* the necessary checks are handled by a call to dtrace_difo_validate().
6123
*/
6124
static uint64_t
6125
dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
6126
dtrace_vstate_t *vstate, dtrace_state_t *state)
6127
{
6128
const dif_instr_t *text = difo->dtdo_buf;
6129
const uint_t textlen = difo->dtdo_len;
6130
const char *strtab = difo->dtdo_strtab;
6131
const uint64_t *inttab = difo->dtdo_inttab;
6132
6133
uint64_t rval = 0;
6134
dtrace_statvar_t *svar;
6135
dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
6136
dtrace_difv_t *v;
6137
volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
6138
volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
6139
6140
dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
6141
uint64_t regs[DIF_DIR_NREGS];
6142
uint64_t *tmp;
6143
6144
uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
6145
int64_t cc_r;
6146
uint_t pc = 0, id, opc = 0;
6147
uint8_t ttop = 0;
6148
dif_instr_t instr;
6149
uint_t r1, r2, rd;
6150
6151
/*
6152
* We stash the current DIF object into the machine state: we need it
6153
* for subsequent access checking.
6154
*/
6155
mstate->dtms_difo = difo;
6156
6157
regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
6158
6159
while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
6160
opc = pc;
6161
6162
instr = text[pc++];
6163
r1 = DIF_INSTR_R1(instr);
6164
r2 = DIF_INSTR_R2(instr);
6165
rd = DIF_INSTR_RD(instr);
6166
6167
switch (DIF_INSTR_OP(instr)) {
6168
case DIF_OP_OR:
6169
regs[rd] = regs[r1] | regs[r2];
6170
break;
6171
case DIF_OP_XOR:
6172
regs[rd] = regs[r1] ^ regs[r2];
6173
break;
6174
case DIF_OP_AND:
6175
regs[rd] = regs[r1] & regs[r2];
6176
break;
6177
case DIF_OP_SLL:
6178
regs[rd] = regs[r1] << regs[r2];
6179
break;
6180
case DIF_OP_SRL:
6181
regs[rd] = regs[r1] >> regs[r2];
6182
break;
6183
case DIF_OP_SUB:
6184
regs[rd] = regs[r1] - regs[r2];
6185
break;
6186
case DIF_OP_ADD:
6187
regs[rd] = regs[r1] + regs[r2];
6188
break;
6189
case DIF_OP_MUL:
6190
regs[rd] = regs[r1] * regs[r2];
6191
break;
6192
case DIF_OP_SDIV:
6193
if (regs[r2] == 0) {
6194
regs[rd] = 0;
6195
*flags |= CPU_DTRACE_DIVZERO;
6196
} else {
6197
regs[rd] = (int64_t)regs[r1] /
6198
(int64_t)regs[r2];
6199
}
6200
break;
6201
6202
case DIF_OP_UDIV:
6203
if (regs[r2] == 0) {
6204
regs[rd] = 0;
6205
*flags |= CPU_DTRACE_DIVZERO;
6206
} else {
6207
regs[rd] = regs[r1] / regs[r2];
6208
}
6209
break;
6210
6211
case DIF_OP_SREM:
6212
if (regs[r2] == 0) {
6213
regs[rd] = 0;
6214
*flags |= CPU_DTRACE_DIVZERO;
6215
} else {
6216
regs[rd] = (int64_t)regs[r1] %
6217
(int64_t)regs[r2];
6218
}
6219
break;
6220
6221
case DIF_OP_UREM:
6222
if (regs[r2] == 0) {
6223
regs[rd] = 0;
6224
*flags |= CPU_DTRACE_DIVZERO;
6225
} else {
6226
regs[rd] = regs[r1] % regs[r2];
6227
}
6228
break;
6229
6230
case DIF_OP_NOT:
6231
regs[rd] = ~regs[r1];
6232
break;
6233
case DIF_OP_MOV:
6234
regs[rd] = regs[r1];
6235
break;
6236
case DIF_OP_CMP:
6237
cc_r = regs[r1] - regs[r2];
6238
cc_n = cc_r < 0;
6239
cc_z = cc_r == 0;
6240
cc_v = 0;
6241
cc_c = regs[r1] < regs[r2];
6242
break;
6243
case DIF_OP_TST:
6244
cc_n = cc_v = cc_c = 0;
6245
cc_z = regs[r1] == 0;
6246
break;
6247
case DIF_OP_BA:
6248
pc = DIF_INSTR_LABEL(instr);
6249
break;
6250
case DIF_OP_BE:
6251
if (cc_z)
6252
pc = DIF_INSTR_LABEL(instr);
6253
break;
6254
case DIF_OP_BNE:
6255
if (cc_z == 0)
6256
pc = DIF_INSTR_LABEL(instr);
6257
break;
6258
case DIF_OP_BG:
6259
if ((cc_z | (cc_n ^ cc_v)) == 0)
6260
pc = DIF_INSTR_LABEL(instr);
6261
break;
6262
case DIF_OP_BGU:
6263
if ((cc_c | cc_z) == 0)
6264
pc = DIF_INSTR_LABEL(instr);
6265
break;
6266
case DIF_OP_BGE:
6267
if ((cc_n ^ cc_v) == 0)
6268
pc = DIF_INSTR_LABEL(instr);
6269
break;
6270
case DIF_OP_BGEU:
6271
if (cc_c == 0)
6272
pc = DIF_INSTR_LABEL(instr);
6273
break;
6274
case DIF_OP_BL:
6275
if (cc_n ^ cc_v)
6276
pc = DIF_INSTR_LABEL(instr);
6277
break;
6278
case DIF_OP_BLU:
6279
if (cc_c)
6280
pc = DIF_INSTR_LABEL(instr);
6281
break;
6282
case DIF_OP_BLE:
6283
if (cc_z | (cc_n ^ cc_v))
6284
pc = DIF_INSTR_LABEL(instr);
6285
break;
6286
case DIF_OP_BLEU:
6287
if (cc_c | cc_z)
6288
pc = DIF_INSTR_LABEL(instr);
6289
break;
6290
case DIF_OP_RLDSB:
6291
if (!dtrace_canload(regs[r1], 1, mstate, vstate))
6292
break;
6293
/*FALLTHROUGH*/
6294
case DIF_OP_LDSB:
6295
regs[rd] = (int8_t)dtrace_load8(regs[r1]);
6296
break;
6297
case DIF_OP_RLDSH:
6298
if (!dtrace_canload(regs[r1], 2, mstate, vstate))
6299
break;
6300
/*FALLTHROUGH*/
6301
case DIF_OP_LDSH:
6302
regs[rd] = (int16_t)dtrace_load16(regs[r1]);
6303
break;
6304
case DIF_OP_RLDSW:
6305
if (!dtrace_canload(regs[r1], 4, mstate, vstate))
6306
break;
6307
/*FALLTHROUGH*/
6308
case DIF_OP_LDSW:
6309
regs[rd] = (int32_t)dtrace_load32(regs[r1]);
6310
break;
6311
case DIF_OP_RLDUB:
6312
if (!dtrace_canload(regs[r1], 1, mstate, vstate))
6313
break;
6314
/*FALLTHROUGH*/
6315
case DIF_OP_LDUB:
6316
regs[rd] = dtrace_load8(regs[r1]);
6317
break;
6318
case DIF_OP_RLDUH:
6319
if (!dtrace_canload(regs[r1], 2, mstate, vstate))
6320
break;
6321
/*FALLTHROUGH*/
6322
case DIF_OP_LDUH:
6323
regs[rd] = dtrace_load16(regs[r1]);
6324
break;
6325
case DIF_OP_RLDUW:
6326
if (!dtrace_canload(regs[r1], 4, mstate, vstate))
6327
break;
6328
/*FALLTHROUGH*/
6329
case DIF_OP_LDUW:
6330
regs[rd] = dtrace_load32(regs[r1]);
6331
break;
6332
case DIF_OP_RLDX:
6333
if (!dtrace_canload(regs[r1], 8, mstate, vstate))
6334
break;
6335
/*FALLTHROUGH*/
6336
case DIF_OP_LDX:
6337
regs[rd] = dtrace_load64(regs[r1]);
6338
break;
6339
case DIF_OP_ULDSB:
6340
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6341
regs[rd] = (int8_t)
6342
dtrace_fuword8((void *)(uintptr_t)regs[r1]);
6343
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6344
break;
6345
case DIF_OP_ULDSH:
6346
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6347
regs[rd] = (int16_t)
6348
dtrace_fuword16((void *)(uintptr_t)regs[r1]);
6349
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6350
break;
6351
case DIF_OP_ULDSW:
6352
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6353
regs[rd] = (int32_t)
6354
dtrace_fuword32((void *)(uintptr_t)regs[r1]);
6355
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6356
break;
6357
case DIF_OP_ULDUB:
6358
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6359
regs[rd] =
6360
dtrace_fuword8((void *)(uintptr_t)regs[r1]);
6361
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6362
break;
6363
case DIF_OP_ULDUH:
6364
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6365
regs[rd] =
6366
dtrace_fuword16((void *)(uintptr_t)regs[r1]);
6367
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6368
break;
6369
case DIF_OP_ULDUW:
6370
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6371
regs[rd] =
6372
dtrace_fuword32((void *)(uintptr_t)regs[r1]);
6373
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6374
break;
6375
case DIF_OP_ULDX:
6376
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6377
regs[rd] =
6378
dtrace_fuword64((void *)(uintptr_t)regs[r1]);
6379
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6380
break;
6381
case DIF_OP_RET:
6382
rval = regs[rd];
6383
pc = textlen;
6384
break;
6385
case DIF_OP_NOP:
6386
break;
6387
case DIF_OP_SETX:
6388
regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
6389
break;
6390
case DIF_OP_SETS:
6391
regs[rd] = (uint64_t)(uintptr_t)
6392
(strtab + DIF_INSTR_STRING(instr));
6393
break;
6394
case DIF_OP_SCMP: {
6395
size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
6396
uintptr_t s1 = regs[r1];
6397
uintptr_t s2 = regs[r2];
6398
size_t lim1, lim2;
6399
6400
/*
6401
* If one of the strings is NULL then the limit becomes
6402
* 0 which compares 0 characters in dtrace_strncmp()
6403
* resulting in a false positive. dtrace_strncmp()
6404
* treats a NULL as an empty 1-char string.
6405
*/
6406
lim1 = lim2 = 1;
6407
6408
if (s1 != 0 &&
6409
!dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
6410
break;
6411
if (s2 != 0 &&
6412
!dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
6413
break;
6414
6415
cc_r = dtrace_strncmp((char *)s1, (char *)s2,
6416
MIN(lim1, lim2));
6417
6418
cc_n = cc_r < 0;
6419
cc_z = cc_r == 0;
6420
cc_v = cc_c = 0;
6421
break;
6422
}
6423
case DIF_OP_LDGA:
6424
regs[rd] = dtrace_dif_variable(mstate, state,
6425
r1, regs[r2]);
6426
break;
6427
case DIF_OP_LDGS:
6428
id = DIF_INSTR_VAR(instr);
6429
6430
if (id >= DIF_VAR_OTHER_UBASE) {
6431
uintptr_t a;
6432
6433
id -= DIF_VAR_OTHER_UBASE;
6434
svar = vstate->dtvs_globals[id];
6435
ASSERT(svar != NULL);
6436
v = &svar->dtsv_var;
6437
6438
if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
6439
regs[rd] = svar->dtsv_data;
6440
break;
6441
}
6442
6443
a = (uintptr_t)svar->dtsv_data;
6444
6445
if (*(uint8_t *)a == UINT8_MAX) {
6446
/*
6447
* If the 0th byte is set to UINT8_MAX
6448
* then this is to be treated as a
6449
* reference to a NULL variable.
6450
*/
6451
regs[rd] = 0;
6452
} else {
6453
regs[rd] = a + sizeof (uint64_t);
6454
}
6455
6456
break;
6457
}
6458
6459
regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
6460
break;
6461
6462
case DIF_OP_STGS:
6463
id = DIF_INSTR_VAR(instr);
6464
6465
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6466
id -= DIF_VAR_OTHER_UBASE;
6467
6468
VERIFY(id < vstate->dtvs_nglobals);
6469
svar = vstate->dtvs_globals[id];
6470
ASSERT(svar != NULL);
6471
v = &svar->dtsv_var;
6472
6473
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6474
uintptr_t a = (uintptr_t)svar->dtsv_data;
6475
size_t lim;
6476
6477
ASSERT(a != 0);
6478
ASSERT(svar->dtsv_size != 0);
6479
6480
if (regs[rd] == 0) {
6481
*(uint8_t *)a = UINT8_MAX;
6482
break;
6483
} else {
6484
*(uint8_t *)a = 0;
6485
a += sizeof (uint64_t);
6486
}
6487
if (!dtrace_vcanload(
6488
(void *)(uintptr_t)regs[rd], &v->dtdv_type,
6489
&lim, mstate, vstate))
6490
break;
6491
6492
dtrace_vcopy((void *)(uintptr_t)regs[rd],
6493
(void *)a, &v->dtdv_type, lim);
6494
break;
6495
}
6496
6497
svar->dtsv_data = regs[rd];
6498
break;
6499
6500
case DIF_OP_LDTA:
6501
/*
6502
* There are no DTrace built-in thread-local arrays at
6503
* present. This opcode is saved for future work.
6504
*/
6505
*flags |= CPU_DTRACE_ILLOP;
6506
regs[rd] = 0;
6507
break;
6508
6509
case DIF_OP_LDLS:
6510
id = DIF_INSTR_VAR(instr);
6511
6512
if (id < DIF_VAR_OTHER_UBASE) {
6513
/*
6514
* For now, this has no meaning.
6515
*/
6516
regs[rd] = 0;
6517
break;
6518
}
6519
6520
id -= DIF_VAR_OTHER_UBASE;
6521
6522
ASSERT(id < vstate->dtvs_nlocals);
6523
ASSERT(vstate->dtvs_locals != NULL);
6524
6525
svar = vstate->dtvs_locals[id];
6526
ASSERT(svar != NULL);
6527
v = &svar->dtsv_var;
6528
6529
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6530
uintptr_t a = (uintptr_t)svar->dtsv_data;
6531
size_t sz = v->dtdv_type.dtdt_size;
6532
size_t lim;
6533
6534
sz += sizeof (uint64_t);
6535
ASSERT(svar->dtsv_size == (mp_maxid + 1) * sz);
6536
a += curcpu * sz;
6537
6538
if (*(uint8_t *)a == UINT8_MAX) {
6539
/*
6540
* If the 0th byte is set to UINT8_MAX
6541
* then this is to be treated as a
6542
* reference to a NULL variable.
6543
*/
6544
regs[rd] = 0;
6545
} else {
6546
regs[rd] = a + sizeof (uint64_t);
6547
}
6548
6549
break;
6550
}
6551
6552
ASSERT(svar->dtsv_size ==
6553
(mp_maxid + 1) * sizeof (uint64_t));
6554
tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
6555
regs[rd] = tmp[curcpu];
6556
break;
6557
6558
case DIF_OP_STLS:
6559
id = DIF_INSTR_VAR(instr);
6560
6561
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6562
id -= DIF_VAR_OTHER_UBASE;
6563
VERIFY(id < vstate->dtvs_nlocals);
6564
6565
ASSERT(vstate->dtvs_locals != NULL);
6566
svar = vstate->dtvs_locals[id];
6567
ASSERT(svar != NULL);
6568
v = &svar->dtsv_var;
6569
6570
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6571
uintptr_t a = (uintptr_t)svar->dtsv_data;
6572
size_t sz = v->dtdv_type.dtdt_size;
6573
size_t lim;
6574
6575
sz += sizeof (uint64_t);
6576
ASSERT(svar->dtsv_size == (mp_maxid + 1) * sz);
6577
a += curcpu * sz;
6578
6579
if (regs[rd] == 0) {
6580
*(uint8_t *)a = UINT8_MAX;
6581
break;
6582
} else {
6583
*(uint8_t *)a = 0;
6584
a += sizeof (uint64_t);
6585
}
6586
6587
if (!dtrace_vcanload(
6588
(void *)(uintptr_t)regs[rd], &v->dtdv_type,
6589
&lim, mstate, vstate))
6590
break;
6591
6592
dtrace_vcopy((void *)(uintptr_t)regs[rd],
6593
(void *)a, &v->dtdv_type, lim);
6594
break;
6595
}
6596
6597
ASSERT(svar->dtsv_size ==
6598
(mp_maxid + 1) * sizeof (uint64_t));
6599
tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
6600
tmp[curcpu] = regs[rd];
6601
break;
6602
6603
case DIF_OP_LDTS: {
6604
dtrace_dynvar_t *dvar;
6605
dtrace_key_t *key;
6606
6607
id = DIF_INSTR_VAR(instr);
6608
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6609
id -= DIF_VAR_OTHER_UBASE;
6610
v = &vstate->dtvs_tlocals[id];
6611
6612
key = &tupregs[DIF_DTR_NREGS];
6613
key[0].dttk_value = (uint64_t)id;
6614
key[0].dttk_size = 0;
6615
DTRACE_TLS_THRKEY(key[1].dttk_value);
6616
key[1].dttk_size = 0;
6617
6618
dvar = dtrace_dynvar(dstate, 2, key,
6619
sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
6620
mstate, vstate);
6621
6622
if (dvar == NULL) {
6623
regs[rd] = 0;
6624
break;
6625
}
6626
6627
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6628
regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
6629
} else {
6630
regs[rd] = *((uint64_t *)dvar->dtdv_data);
6631
}
6632
6633
break;
6634
}
6635
6636
case DIF_OP_STTS: {
6637
dtrace_dynvar_t *dvar;
6638
dtrace_key_t *key;
6639
6640
id = DIF_INSTR_VAR(instr);
6641
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6642
id -= DIF_VAR_OTHER_UBASE;
6643
VERIFY(id < vstate->dtvs_ntlocals);
6644
6645
key = &tupregs[DIF_DTR_NREGS];
6646
key[0].dttk_value = (uint64_t)id;
6647
key[0].dttk_size = 0;
6648
DTRACE_TLS_THRKEY(key[1].dttk_value);
6649
key[1].dttk_size = 0;
6650
v = &vstate->dtvs_tlocals[id];
6651
6652
dvar = dtrace_dynvar(dstate, 2, key,
6653
v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6654
v->dtdv_type.dtdt_size : sizeof (uint64_t),
6655
regs[rd] ? DTRACE_DYNVAR_ALLOC :
6656
DTRACE_DYNVAR_DEALLOC, mstate, vstate);
6657
6658
/*
6659
* Given that we're storing to thread-local data,
6660
* we need to flush our predicate cache.
6661
*/
6662
curthread->t_predcache = 0;
6663
6664
if (dvar == NULL)
6665
break;
6666
6667
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6668
size_t lim;
6669
6670
if (!dtrace_vcanload(
6671
(void *)(uintptr_t)regs[rd],
6672
&v->dtdv_type, &lim, mstate, vstate))
6673
break;
6674
6675
dtrace_vcopy((void *)(uintptr_t)regs[rd],
6676
dvar->dtdv_data, &v->dtdv_type, lim);
6677
} else {
6678
*((uint64_t *)dvar->dtdv_data) = regs[rd];
6679
}
6680
6681
break;
6682
}
6683
6684
case DIF_OP_SRA:
6685
regs[rd] = (int64_t)regs[r1] >> regs[r2];
6686
break;
6687
6688
case DIF_OP_CALL:
6689
dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
6690
regs, tupregs, ttop, mstate, state);
6691
break;
6692
6693
case DIF_OP_PUSHTR:
6694
if (ttop == DIF_DTR_NREGS) {
6695
*flags |= CPU_DTRACE_TUPOFLOW;
6696
break;
6697
}
6698
6699
if (r1 == DIF_TYPE_STRING) {
6700
/*
6701
* If this is a string type and the size is 0,
6702
* we'll use the system-wide default string
6703
* size. Note that we are _not_ looking at
6704
* the value of the DTRACEOPT_STRSIZE option;
6705
* had this been set, we would expect to have
6706
* a non-zero size value in the "pushtr".
6707
*/
6708
tupregs[ttop].dttk_size =
6709
dtrace_strlen((char *)(uintptr_t)regs[rd],
6710
regs[r2] ? regs[r2] :
6711
dtrace_strsize_default) + 1;
6712
} else {
6713
if (regs[r2] > LONG_MAX) {
6714
*flags |= CPU_DTRACE_ILLOP;
6715
break;
6716
}
6717
6718
tupregs[ttop].dttk_size = regs[r2];
6719
}
6720
6721
tupregs[ttop++].dttk_value = regs[rd];
6722
break;
6723
6724
case DIF_OP_PUSHTV:
6725
if (ttop == DIF_DTR_NREGS) {
6726
*flags |= CPU_DTRACE_TUPOFLOW;
6727
break;
6728
}
6729
6730
tupregs[ttop].dttk_value = regs[rd];
6731
tupregs[ttop++].dttk_size = 0;
6732
break;
6733
6734
case DIF_OP_POPTS:
6735
if (ttop != 0)
6736
ttop--;
6737
break;
6738
6739
case DIF_OP_FLUSHTS:
6740
ttop = 0;
6741
break;
6742
6743
case DIF_OP_LDGAA:
6744
case DIF_OP_LDTAA: {
6745
dtrace_dynvar_t *dvar;
6746
dtrace_key_t *key = tupregs;
6747
uint_t nkeys = ttop;
6748
6749
id = DIF_INSTR_VAR(instr);
6750
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6751
id -= DIF_VAR_OTHER_UBASE;
6752
6753
key[nkeys].dttk_value = (uint64_t)id;
6754
key[nkeys++].dttk_size = 0;
6755
6756
if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
6757
DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
6758
key[nkeys++].dttk_size = 0;
6759
VERIFY(id < vstate->dtvs_ntlocals);
6760
v = &vstate->dtvs_tlocals[id];
6761
} else {
6762
VERIFY(id < vstate->dtvs_nglobals);
6763
v = &vstate->dtvs_globals[id]->dtsv_var;
6764
}
6765
6766
dvar = dtrace_dynvar(dstate, nkeys, key,
6767
v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6768
v->dtdv_type.dtdt_size : sizeof (uint64_t),
6769
DTRACE_DYNVAR_NOALLOC, mstate, vstate);
6770
6771
if (dvar == NULL) {
6772
regs[rd] = 0;
6773
break;
6774
}
6775
6776
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6777
regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
6778
} else {
6779
regs[rd] = *((uint64_t *)dvar->dtdv_data);
6780
}
6781
6782
break;
6783
}
6784
6785
case DIF_OP_STGAA:
6786
case DIF_OP_STTAA: {
6787
dtrace_dynvar_t *dvar;
6788
dtrace_key_t *key = tupregs;
6789
uint_t nkeys = ttop;
6790
6791
id = DIF_INSTR_VAR(instr);
6792
ASSERT(id >= DIF_VAR_OTHER_UBASE);
6793
id -= DIF_VAR_OTHER_UBASE;
6794
6795
key[nkeys].dttk_value = (uint64_t)id;
6796
key[nkeys++].dttk_size = 0;
6797
6798
if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
6799
DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
6800
key[nkeys++].dttk_size = 0;
6801
VERIFY(id < vstate->dtvs_ntlocals);
6802
v = &vstate->dtvs_tlocals[id];
6803
} else {
6804
VERIFY(id < vstate->dtvs_nglobals);
6805
v = &vstate->dtvs_globals[id]->dtsv_var;
6806
}
6807
6808
dvar = dtrace_dynvar(dstate, nkeys, key,
6809
v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
6810
v->dtdv_type.dtdt_size : sizeof (uint64_t),
6811
regs[rd] ? DTRACE_DYNVAR_ALLOC :
6812
DTRACE_DYNVAR_DEALLOC, mstate, vstate);
6813
6814
if (dvar == NULL)
6815
break;
6816
6817
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
6818
size_t lim;
6819
6820
if (!dtrace_vcanload(
6821
(void *)(uintptr_t)regs[rd], &v->dtdv_type,
6822
&lim, mstate, vstate))
6823
break;
6824
6825
dtrace_vcopy((void *)(uintptr_t)regs[rd],
6826
dvar->dtdv_data, &v->dtdv_type, lim);
6827
} else {
6828
*((uint64_t *)dvar->dtdv_data) = regs[rd];
6829
}
6830
6831
break;
6832
}
6833
6834
case DIF_OP_ALLOCS: {
6835
uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
6836
size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
6837
6838
/*
6839
* Rounding up the user allocation size could have
6840
* overflowed large, bogus allocations (like -1ULL) to
6841
* 0.
6842
*/
6843
if (size < regs[r1] ||
6844
!DTRACE_INSCRATCH(mstate, size)) {
6845
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6846
regs[rd] = 0;
6847
break;
6848
}
6849
6850
dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
6851
mstate->dtms_scratch_ptr += size;
6852
regs[rd] = ptr;
6853
break;
6854
}
6855
6856
case DIF_OP_COPYS:
6857
if (!dtrace_canstore(regs[rd], regs[r2],
6858
mstate, vstate)) {
6859
*flags |= CPU_DTRACE_BADADDR;
6860
*illval = regs[rd];
6861
break;
6862
}
6863
6864
if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
6865
break;
6866
6867
dtrace_bcopy((void *)(uintptr_t)regs[r1],
6868
(void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
6869
break;
6870
6871
case DIF_OP_STB:
6872
if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
6873
*flags |= CPU_DTRACE_BADADDR;
6874
*illval = regs[rd];
6875
break;
6876
}
6877
*((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
6878
break;
6879
6880
case DIF_OP_STH:
6881
if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
6882
*flags |= CPU_DTRACE_BADADDR;
6883
*illval = regs[rd];
6884
break;
6885
}
6886
if (regs[rd] & 1) {
6887
*flags |= CPU_DTRACE_BADALIGN;
6888
*illval = regs[rd];
6889
break;
6890
}
6891
*((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
6892
break;
6893
6894
case DIF_OP_STW:
6895
if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
6896
*flags |= CPU_DTRACE_BADADDR;
6897
*illval = regs[rd];
6898
break;
6899
}
6900
if (regs[rd] & 3) {
6901
*flags |= CPU_DTRACE_BADALIGN;
6902
*illval = regs[rd];
6903
break;
6904
}
6905
*((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
6906
break;
6907
6908
case DIF_OP_STX:
6909
if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
6910
*flags |= CPU_DTRACE_BADADDR;
6911
*illval = regs[rd];
6912
break;
6913
}
6914
if (regs[rd] & 7) {
6915
*flags |= CPU_DTRACE_BADALIGN;
6916
*illval = regs[rd];
6917
break;
6918
}
6919
*((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
6920
break;
6921
}
6922
}
6923
6924
if (!(*flags & CPU_DTRACE_FAULT))
6925
return (rval);
6926
6927
mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
6928
mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
6929
6930
return (0);
6931
}
6932
6933
static void
6934
dtrace_action_breakpoint(dtrace_ecb_t *ecb)
6935
{
6936
dtrace_probe_t *probe = ecb->dte_probe;
6937
dtrace_provider_t *prov = probe->dtpr_provider;
6938
char c[DTRACE_FULLNAMELEN + 80], *str;
6939
char *msg = "dtrace: breakpoint action at probe ";
6940
char *ecbmsg = " (ecb ";
6941
uintptr_t val = (uintptr_t)ecb;
6942
int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
6943
6944
if (dtrace_destructive_disallow)
6945
return;
6946
6947
/*
6948
* It's impossible to be taking action on the NULL probe.
6949
*/
6950
ASSERT(probe != NULL);
6951
6952
/*
6953
* This is a poor man's (destitute man's?) sprintf(): we want to
6954
* print the provider name, module name, function name and name of
6955
* the probe, along with the hex address of the ECB with the breakpoint
6956
* action -- all of which we must place in the character buffer by
6957
* hand.
6958
*/
6959
while (*msg != '\0')
6960
c[i++] = *msg++;
6961
6962
for (str = prov->dtpv_name; *str != '\0'; str++)
6963
c[i++] = *str;
6964
c[i++] = ':';
6965
6966
for (str = probe->dtpr_mod; *str != '\0'; str++)
6967
c[i++] = *str;
6968
c[i++] = ':';
6969
6970
for (str = probe->dtpr_func; *str != '\0'; str++)
6971
c[i++] = *str;
6972
c[i++] = ':';
6973
6974
for (str = probe->dtpr_name; *str != '\0'; str++)
6975
c[i++] = *str;
6976
6977
while (*ecbmsg != '\0')
6978
c[i++] = *ecbmsg++;
6979
6980
while (shift >= 0) {
6981
size_t mask = (size_t)0xf << shift;
6982
6983
if (val >= ((size_t)1 << shift))
6984
c[i++] = "0123456789abcdef"[(val & mask) >> shift];
6985
shift -= 4;
6986
}
6987
6988
c[i++] = ')';
6989
c[i] = '\0';
6990
6991
#ifdef illumos
6992
debug_enter(c);
6993
#else
6994
kdb_enter(KDB_WHY_DTRACE, "breakpoint action");
6995
#endif
6996
}
6997
6998
static void
6999
dtrace_action_panic(dtrace_ecb_t *ecb)
7000
{
7001
dtrace_probe_t *probe = ecb->dte_probe;
7002
7003
/*
7004
* It's impossible to be taking action on the NULL probe.
7005
*/
7006
ASSERT(probe != NULL);
7007
7008
if (dtrace_destructive_disallow)
7009
return;
7010
7011
if (dtrace_panicked != NULL)
7012
return;
7013
7014
if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
7015
return;
7016
7017
/*
7018
* We won the right to panic. (We want to be sure that only one
7019
* thread calls panic() from dtrace_probe(), and that panic() is
7020
* called exactly once.)
7021
*/
7022
dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
7023
probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
7024
probe->dtpr_func, probe->dtpr_name, (void *)ecb);
7025
}
7026
7027
static void
7028
dtrace_action_raise(uint64_t sig)
7029
{
7030
if (dtrace_destructive_disallow)
7031
return;
7032
7033
if (sig >= NSIG) {
7034
DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
7035
return;
7036
}
7037
7038
#ifdef illumos
7039
/*
7040
* raise() has a queue depth of 1 -- we ignore all subsequent
7041
* invocations of the raise() action.
7042
*/
7043
if (curthread->t_dtrace_sig == 0)
7044
curthread->t_dtrace_sig = (uint8_t)sig;
7045
7046
curthread->t_sig_check = 1;
7047
aston(curthread);
7048
#else
7049
struct proc *p = curproc;
7050
PROC_LOCK(p);
7051
kern_psignal(p, sig);
7052
PROC_UNLOCK(p);
7053
#endif
7054
}
7055
7056
static void
7057
dtrace_action_stop(void)
7058
{
7059
if (dtrace_destructive_disallow)
7060
return;
7061
7062
#ifdef illumos
7063
if (!curthread->t_dtrace_stop) {
7064
curthread->t_dtrace_stop = 1;
7065
curthread->t_sig_check = 1;
7066
aston(curthread);
7067
}
7068
#else
7069
struct proc *p = curproc;
7070
PROC_LOCK(p);
7071
kern_psignal(p, SIGSTOP);
7072
PROC_UNLOCK(p);
7073
#endif
7074
}
7075
7076
static void
7077
dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
7078
{
7079
hrtime_t now;
7080
volatile uint16_t *flags;
7081
#ifdef illumos
7082
cpu_t *cpu = CPU;
7083
#else
7084
cpu_t *cpu = &solaris_cpu[curcpu];
7085
#endif
7086
7087
if (dtrace_destructive_disallow)
7088
return;
7089
7090
flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
7091
7092
now = dtrace_gethrtime();
7093
7094
if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
7095
/*
7096
* We need to advance the mark to the current time.
7097
*/
7098
cpu->cpu_dtrace_chillmark = now;
7099
cpu->cpu_dtrace_chilled = 0;
7100
}
7101
7102
/*
7103
* Now check to see if the requested chill time would take us over
7104
* the maximum amount of time allowed in the chill interval. (Or
7105
* worse, if the calculation itself induces overflow.)
7106
*/
7107
if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
7108
cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
7109
*flags |= CPU_DTRACE_ILLOP;
7110
return;
7111
}
7112
7113
while (dtrace_gethrtime() - now < val)
7114
continue;
7115
7116
/*
7117
* Normally, we assure that the value of the variable "timestamp" does
7118
* not change within an ECB. The presence of chill() represents an
7119
* exception to this rule, however.
7120
*/
7121
mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
7122
cpu->cpu_dtrace_chilled += val;
7123
}
7124
7125
static void
7126
dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
7127
uint64_t *buf, uint64_t arg)
7128
{
7129
int nframes = DTRACE_USTACK_NFRAMES(arg);
7130
int strsize = DTRACE_USTACK_STRSIZE(arg);
7131
uint64_t *pcs = &buf[1], *fps;
7132
char *str = (char *)&pcs[nframes];
7133
int size, offs = 0, i, j;
7134
size_t rem;
7135
uintptr_t old = mstate->dtms_scratch_ptr, saved;
7136
uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
7137
char *sym;
7138
7139
/*
7140
* Should be taking a faster path if string space has not been
7141
* allocated.
7142
*/
7143
ASSERT(strsize != 0);
7144
7145
/*
7146
* We will first allocate some temporary space for the frame pointers.
7147
*/
7148
fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
7149
size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
7150
(nframes * sizeof (uint64_t));
7151
7152
if (!DTRACE_INSCRATCH(mstate, size)) {
7153
/*
7154
* Not enough room for our frame pointers -- need to indicate
7155
* that we ran out of scratch space.
7156
*/
7157
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
7158
return;
7159
}
7160
7161
mstate->dtms_scratch_ptr += size;
7162
saved = mstate->dtms_scratch_ptr;
7163
7164
/*
7165
* Now get a stack with both program counters and frame pointers.
7166
*/
7167
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7168
dtrace_getufpstack(buf, fps, nframes + 1);
7169
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7170
7171
/*
7172
* If that faulted, we're cooked.
7173
*/
7174
if (*flags & CPU_DTRACE_FAULT)
7175
goto out;
7176
7177
/*
7178
* Now we want to walk up the stack, calling the USTACK helper. For
7179
* each iteration, we restore the scratch pointer.
7180
*/
7181
for (i = 0; i < nframes; i++) {
7182
mstate->dtms_scratch_ptr = saved;
7183
7184
if (offs >= strsize)
7185
break;
7186
7187
sym = (char *)(uintptr_t)dtrace_helper(
7188
DTRACE_HELPER_ACTION_USTACK,
7189
mstate, state, pcs[i], fps[i]);
7190
7191
/*
7192
* If we faulted while running the helper, we're going to
7193
* clear the fault and null out the corresponding string.
7194
*/
7195
if (*flags & CPU_DTRACE_FAULT) {
7196
*flags &= ~CPU_DTRACE_FAULT;
7197
str[offs++] = '\0';
7198
continue;
7199
}
7200
7201
if (sym == NULL) {
7202
str[offs++] = '\0';
7203
continue;
7204
}
7205
7206
if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate,
7207
&(state->dts_vstate))) {
7208
str[offs++] = '\0';
7209
continue;
7210
}
7211
7212
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7213
7214
/*
7215
* Now copy in the string that the helper returned to us.
7216
*/
7217
for (j = 0; offs + j < strsize && j < rem; j++) {
7218
if ((str[offs + j] = sym[j]) == '\0')
7219
break;
7220
}
7221
7222
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7223
7224
offs += j + 1;
7225
}
7226
7227
if (offs >= strsize) {
7228
/*
7229
* If we didn't have room for all of the strings, we don't
7230
* abort processing -- this needn't be a fatal error -- but we
7231
* still want to increment a counter (dts_stkstroverflows) to
7232
* allow this condition to be warned about. (If this is from
7233
* a jstack() action, it is easily tuned via jstackstrsize.)
7234
*/
7235
dtrace_error(&state->dts_stkstroverflows);
7236
}
7237
7238
while (offs < strsize)
7239
str[offs++] = '\0';
7240
7241
out:
7242
mstate->dtms_scratch_ptr = old;
7243
}
7244
7245
static void
7246
dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
7247
size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
7248
{
7249
volatile uint16_t *flags;
7250
uint64_t val = *valp;
7251
size_t valoffs = *valoffsp;
7252
7253
flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
7254
ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
7255
7256
/*
7257
* If this is a string, we're going to only load until we find the zero
7258
* byte -- after which we'll store zero bytes.
7259
*/
7260
if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
7261
char c = '\0' + 1;
7262
size_t s;
7263
7264
for (s = 0; s < size; s++) {
7265
if (c != '\0' && dtkind == DIF_TF_BYREF) {
7266
c = dtrace_load8(val++);
7267
} else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
7268
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7269
c = dtrace_fuword8((void *)(uintptr_t)val++);
7270
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7271
if (*flags & CPU_DTRACE_FAULT)
7272
break;
7273
}
7274
7275
DTRACE_STORE(uint8_t, tomax, valoffs++, c);
7276
7277
if (c == '\0' && intuple)
7278
break;
7279
}
7280
} else {
7281
uint8_t c;
7282
while (valoffs < end) {
7283
if (dtkind == DIF_TF_BYREF) {
7284
c = dtrace_load8(val++);
7285
} else if (dtkind == DIF_TF_BYUREF) {
7286
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7287
c = dtrace_fuword8((void *)(uintptr_t)val++);
7288
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7289
if (*flags & CPU_DTRACE_FAULT)
7290
break;
7291
}
7292
7293
DTRACE_STORE(uint8_t, tomax,
7294
valoffs++, c);
7295
}
7296
}
7297
7298
*valp = val;
7299
*valoffsp = valoffs;
7300
}
7301
7302
/*
7303
* Disables interrupts and sets the per-thread inprobe flag. When DEBUG is
7304
* defined, we also assert that we are not recursing unless the probe ID is an
7305
* error probe.
7306
*/
7307
static dtrace_icookie_t
7308
dtrace_probe_enter(dtrace_id_t id)
7309
{
7310
dtrace_icookie_t cookie;
7311
7312
cookie = dtrace_interrupt_disable();
7313
7314
/*
7315
* Unless this is an ERROR probe, we are not allowed to recurse in
7316
* dtrace_probe(). Recursing into DTrace probe usually means that a
7317
* function is instrumented that should not have been instrumented or
7318
* that the ordering guarantee of the records will be violated,
7319
* resulting in unexpected output. If there is an exception to this
7320
* assertion, a new case should be added.
7321
*/
7322
ASSERT(curthread->t_dtrace_inprobe == 0 ||
7323
id == dtrace_probeid_error);
7324
curthread->t_dtrace_inprobe = 1;
7325
7326
return (cookie);
7327
}
7328
7329
/*
7330
* Clears the per-thread inprobe flag and enables interrupts.
7331
*/
7332
static void
7333
dtrace_probe_exit(dtrace_icookie_t cookie)
7334
{
7335
7336
curthread->t_dtrace_inprobe = 0;
7337
dtrace_interrupt_enable(cookie);
7338
}
7339
7340
/*
7341
* If you're looking for the epicenter of DTrace, you just found it. This
7342
* is the function called by the provider to fire a probe -- from which all
7343
* subsequent probe-context DTrace activity emanates.
7344
*/
7345
void
7346
dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
7347
uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
7348
{
7349
processorid_t cpuid;
7350
dtrace_icookie_t cookie;
7351
dtrace_probe_t *probe;
7352
dtrace_mstate_t mstate;
7353
dtrace_ecb_t *ecb;
7354
dtrace_action_t *act;
7355
intptr_t offs;
7356
size_t size;
7357
int vtime, onintr;
7358
volatile uint16_t *flags;
7359
hrtime_t now;
7360
7361
if (KERNEL_PANICKED())
7362
return;
7363
7364
#ifdef illumos
7365
/*
7366
* Kick out immediately if this CPU is still being born (in which case
7367
* curthread will be set to -1) or the current thread can't allow
7368
* probes in its current context.
7369
*/
7370
if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
7371
return;
7372
#endif
7373
7374
cookie = dtrace_probe_enter(id);
7375
probe = dtrace_probes[id - 1];
7376
cpuid = curcpu;
7377
onintr = CPU_ON_INTR(CPU);
7378
7379
if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
7380
probe->dtpr_predcache == curthread->t_predcache) {
7381
/*
7382
* We have hit in the predicate cache; we know that
7383
* this predicate would evaluate to be false.
7384
*/
7385
dtrace_probe_exit(cookie);
7386
return;
7387
}
7388
7389
#ifdef illumos
7390
if (panic_quiesce) {
7391
#else
7392
if (KERNEL_PANICKED()) {
7393
#endif
7394
/*
7395
* We don't trace anything if we're panicking.
7396
*/
7397
dtrace_probe_exit(cookie);
7398
return;
7399
}
7400
7401
now = mstate.dtms_timestamp = dtrace_gethrtime();
7402
mstate.dtms_present = DTRACE_MSTATE_TIMESTAMP;
7403
vtime = dtrace_vtime_references != 0;
7404
7405
if (vtime && curthread->t_dtrace_start)
7406
curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
7407
7408
mstate.dtms_difo = NULL;
7409
mstate.dtms_probe = probe;
7410
mstate.dtms_strtok = 0;
7411
mstate.dtms_arg[0] = arg0;
7412
mstate.dtms_arg[1] = arg1;
7413
mstate.dtms_arg[2] = arg2;
7414
mstate.dtms_arg[3] = arg3;
7415
mstate.dtms_arg[4] = arg4;
7416
7417
flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
7418
7419
for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
7420
dtrace_predicate_t *pred = ecb->dte_predicate;
7421
dtrace_state_t *state = ecb->dte_state;
7422
dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
7423
dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
7424
dtrace_vstate_t *vstate = &state->dts_vstate;
7425
dtrace_provider_t *prov = probe->dtpr_provider;
7426
uint64_t tracememsize = 0;
7427
int committed = 0;
7428
caddr_t tomax;
7429
7430
/*
7431
* A little subtlety with the following (seemingly innocuous)
7432
* declaration of the automatic 'val': by looking at the
7433
* code, you might think that it could be declared in the
7434
* action processing loop, below. (That is, it's only used in
7435
* the action processing loop.) However, it must be declared
7436
* out of that scope because in the case of DIF expression
7437
* arguments to aggregating actions, one iteration of the
7438
* action loop will use the last iteration's value.
7439
*/
7440
uint64_t val = 0;
7441
7442
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
7443
mstate.dtms_getf = NULL;
7444
7445
*flags &= ~CPU_DTRACE_ERROR;
7446
7447
if (prov == dtrace_provider) {
7448
/*
7449
* If dtrace itself is the provider of this probe,
7450
* we're only going to continue processing the ECB if
7451
* arg0 (the dtrace_state_t) is equal to the ECB's
7452
* creating state. (This prevents disjoint consumers
7453
* from seeing one another's metaprobes.)
7454
*/
7455
if (arg0 != (uint64_t)(uintptr_t)state)
7456
continue;
7457
}
7458
7459
if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
7460
/*
7461
* We're not currently active. If our provider isn't
7462
* the dtrace pseudo provider, we're not interested.
7463
*/
7464
if (prov != dtrace_provider)
7465
continue;
7466
7467
/*
7468
* Now we must further check if we are in the BEGIN
7469
* probe. If we are, we will only continue processing
7470
* if we're still in WARMUP -- if one BEGIN enabling
7471
* has invoked the exit() action, we don't want to
7472
* evaluate subsequent BEGIN enablings.
7473
*/
7474
if (probe->dtpr_id == dtrace_probeid_begin &&
7475
state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
7476
ASSERT(state->dts_activity ==
7477
DTRACE_ACTIVITY_DRAINING);
7478
continue;
7479
}
7480
}
7481
7482
if (ecb->dte_cond) {
7483
/*
7484
* If the dte_cond bits indicate that this
7485
* consumer is only allowed to see user-mode firings
7486
* of this probe, call the provider's dtps_usermode()
7487
* entry point to check that the probe was fired
7488
* while in a user context. Skip this ECB if that's
7489
* not the case.
7490
*/
7491
if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
7492
prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
7493
probe->dtpr_id, probe->dtpr_arg) == 0)
7494
continue;
7495
7496
#ifdef illumos
7497
/*
7498
* This is more subtle than it looks. We have to be
7499
* absolutely certain that CRED() isn't going to
7500
* change out from under us so it's only legit to
7501
* examine that structure if we're in constrained
7502
* situations. Currently, the only times we'll this
7503
* check is if a non-super-user has enabled the
7504
* profile or syscall providers -- providers that
7505
* allow visibility of all processes. For the
7506
* profile case, the check above will ensure that
7507
* we're examining a user context.
7508
*/
7509
if (ecb->dte_cond & DTRACE_COND_OWNER) {
7510
cred_t *cr;
7511
cred_t *s_cr =
7512
ecb->dte_state->dts_cred.dcr_cred;
7513
proc_t *proc;
7514
7515
ASSERT(s_cr != NULL);
7516
7517
if ((cr = CRED()) == NULL ||
7518
s_cr->cr_uid != cr->cr_uid ||
7519
s_cr->cr_uid != cr->cr_ruid ||
7520
s_cr->cr_uid != cr->cr_suid ||
7521
s_cr->cr_gid != cr->cr_gid ||
7522
s_cr->cr_gid != cr->cr_rgid ||
7523
s_cr->cr_gid != cr->cr_sgid ||
7524
(proc = ttoproc(curthread)) == NULL ||
7525
(proc->p_flag & SNOCD))
7526
continue;
7527
}
7528
7529
if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
7530
cred_t *cr;
7531
cred_t *s_cr =
7532
ecb->dte_state->dts_cred.dcr_cred;
7533
7534
ASSERT(s_cr != NULL);
7535
7536
if ((cr = CRED()) == NULL ||
7537
s_cr->cr_zone->zone_id !=
7538
cr->cr_zone->zone_id)
7539
continue;
7540
}
7541
#endif
7542
}
7543
7544
if (now - state->dts_alive > dtrace_deadman_timeout) {
7545
/*
7546
* We seem to be dead. Unless we (a) have kernel
7547
* destructive permissions (b) have explicitly enabled
7548
* destructive actions and (c) destructive actions have
7549
* not been disabled, we're going to transition into
7550
* the KILLED state, from which no further processing
7551
* on this state will be performed.
7552
*/
7553
if (!dtrace_priv_kernel_destructive(state) ||
7554
!state->dts_cred.dcr_destructive ||
7555
dtrace_destructive_disallow) {
7556
void *activity = &state->dts_activity;
7557
dtrace_activity_t curstate;
7558
7559
do {
7560
curstate = state->dts_activity;
7561
} while (dtrace_cas32(activity, curstate,
7562
DTRACE_ACTIVITY_KILLED) != curstate);
7563
7564
continue;
7565
}
7566
}
7567
7568
if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
7569
ecb->dte_alignment, state, &mstate)) < 0)
7570
continue;
7571
7572
tomax = buf->dtb_tomax;
7573
ASSERT(tomax != NULL);
7574
7575
if (ecb->dte_size != 0) {
7576
dtrace_rechdr_t dtrh;
7577
if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
7578
mstate.dtms_timestamp = dtrace_gethrtime();
7579
mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
7580
}
7581
ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
7582
dtrh.dtrh_epid = ecb->dte_epid;
7583
DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
7584
mstate.dtms_timestamp);
7585
*((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
7586
}
7587
7588
mstate.dtms_epid = ecb->dte_epid;
7589
mstate.dtms_present |= DTRACE_MSTATE_EPID;
7590
7591
if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
7592
mstate.dtms_access = DTRACE_ACCESS_KERNEL;
7593
else
7594
mstate.dtms_access = 0;
7595
7596
if (pred != NULL) {
7597
dtrace_difo_t *dp = pred->dtp_difo;
7598
uint64_t rval;
7599
7600
rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
7601
7602
if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
7603
dtrace_cacheid_t cid = probe->dtpr_predcache;
7604
7605
if (cid != DTRACE_CACHEIDNONE && !onintr) {
7606
/*
7607
* Update the predicate cache...
7608
*/
7609
ASSERT(cid == pred->dtp_cacheid);
7610
curthread->t_predcache = cid;
7611
}
7612
7613
continue;
7614
}
7615
}
7616
7617
for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
7618
act != NULL; act = act->dta_next) {
7619
size_t valoffs;
7620
dtrace_difo_t *dp;
7621
dtrace_recdesc_t *rec = &act->dta_rec;
7622
7623
size = rec->dtrd_size;
7624
valoffs = offs + rec->dtrd_offset;
7625
7626
if (DTRACEACT_ISAGG(act->dta_kind)) {
7627
uint64_t v = 0xbad;
7628
dtrace_aggregation_t *agg;
7629
7630
agg = (dtrace_aggregation_t *)act;
7631
7632
if ((dp = act->dta_difo) != NULL)
7633
v = dtrace_dif_emulate(dp,
7634
&mstate, vstate, state);
7635
7636
if (*flags & CPU_DTRACE_ERROR)
7637
continue;
7638
7639
/*
7640
* Note that we always pass the expression
7641
* value from the previous iteration of the
7642
* action loop. This value will only be used
7643
* if there is an expression argument to the
7644
* aggregating action, denoted by the
7645
* dtag_hasarg field.
7646
*/
7647
dtrace_aggregate(agg, buf,
7648
offs, aggbuf, v, val);
7649
continue;
7650
}
7651
7652
switch (act->dta_kind) {
7653
case DTRACEACT_STOP:
7654
if (dtrace_priv_proc_destructive(state))
7655
dtrace_action_stop();
7656
continue;
7657
7658
case DTRACEACT_BREAKPOINT:
7659
if (dtrace_priv_kernel_destructive(state))
7660
dtrace_action_breakpoint(ecb);
7661
continue;
7662
7663
case DTRACEACT_PANIC:
7664
if (dtrace_priv_kernel_destructive(state))
7665
dtrace_action_panic(ecb);
7666
continue;
7667
7668
case DTRACEACT_STACK:
7669
if (!dtrace_priv_kernel(state))
7670
continue;
7671
7672
dtrace_getpcstack((pc_t *)(tomax + valoffs),
7673
size / sizeof (pc_t), probe->dtpr_aframes,
7674
DTRACE_ANCHORED(probe) ? NULL :
7675
(uint32_t *)arg0);
7676
continue;
7677
7678
case DTRACEACT_JSTACK:
7679
case DTRACEACT_USTACK:
7680
if (!dtrace_priv_proc(state))
7681
continue;
7682
7683
/*
7684
* See comment in DIF_VAR_PID.
7685
*/
7686
if (DTRACE_ANCHORED(mstate.dtms_probe) &&
7687
CPU_ON_INTR(CPU)) {
7688
int depth = DTRACE_USTACK_NFRAMES(
7689
rec->dtrd_arg) + 1;
7690
7691
dtrace_bzero((void *)(tomax + valoffs),
7692
DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
7693
+ depth * sizeof (uint64_t));
7694
7695
continue;
7696
}
7697
7698
if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
7699
curproc->p_dtrace_helpers != NULL) {
7700
/*
7701
* This is the slow path -- we have
7702
* allocated string space, and we're
7703
* getting the stack of a process that
7704
* has helpers. Call into a separate
7705
* routine to perform this processing.
7706
*/
7707
dtrace_action_ustack(&mstate, state,
7708
(uint64_t *)(tomax + valoffs),
7709
rec->dtrd_arg);
7710
continue;
7711
}
7712
7713
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
7714
dtrace_getupcstack((uint64_t *)
7715
(tomax + valoffs),
7716
DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
7717
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
7718
continue;
7719
7720
default:
7721
break;
7722
}
7723
7724
dp = act->dta_difo;
7725
ASSERT(dp != NULL);
7726
7727
val = dtrace_dif_emulate(dp, &mstate, vstate, state);
7728
7729
if (*flags & CPU_DTRACE_ERROR)
7730
continue;
7731
7732
switch (act->dta_kind) {
7733
case DTRACEACT_SPECULATE: {
7734
dtrace_rechdr_t *dtrh;
7735
7736
ASSERT(buf == &state->dts_buffer[cpuid]);
7737
buf = dtrace_speculation_buffer(state,
7738
cpuid, val);
7739
7740
if (buf == NULL) {
7741
*flags |= CPU_DTRACE_DROP;
7742
continue;
7743
}
7744
7745
offs = dtrace_buffer_reserve(buf,
7746
ecb->dte_needed, ecb->dte_alignment,
7747
state, NULL);
7748
7749
if (offs < 0) {
7750
*flags |= CPU_DTRACE_DROP;
7751
continue;
7752
}
7753
7754
tomax = buf->dtb_tomax;
7755
ASSERT(tomax != NULL);
7756
7757
if (ecb->dte_size == 0)
7758
continue;
7759
7760
ASSERT3U(ecb->dte_size, >=,
7761
sizeof (dtrace_rechdr_t));
7762
dtrh = ((void *)(tomax + offs));
7763
dtrh->dtrh_epid = ecb->dte_epid;
7764
/*
7765
* When the speculation is committed, all of
7766
* the records in the speculative buffer will
7767
* have their timestamps set to the commit
7768
* time. Until then, it is set to a sentinel
7769
* value, for debugability.
7770
*/
7771
DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
7772
continue;
7773
}
7774
7775
case DTRACEACT_PRINTM: {
7776
/*
7777
* printm() assumes that the DIF returns a
7778
* pointer returned by memref(). memref() is a
7779
* subroutine that is used to get around the
7780
* single-valued returns of DIF and is assumed
7781
* to always be allocated in the scratch space.
7782
* Therefore, we need to validate that the
7783
* pointer given to printm() is in the scratch
7784
* space in order to avoid a potential panic.
7785
*/
7786
uintptr_t *memref = (uintptr_t *)(uintptr_t) val;
7787
7788
if (!DTRACE_INSCRATCHPTR(&mstate,
7789
(uintptr_t) memref,
7790
sizeof (uintptr_t) + sizeof (size_t))) {
7791
*flags |= CPU_DTRACE_BADADDR;
7792
continue;
7793
}
7794
7795
/* Get the size from the memref. */
7796
size = memref[1];
7797
7798
/*
7799
* Check if the size exceeds the allocated
7800
* buffer size.
7801
*/
7802
if (size + sizeof (size_t) >
7803
dp->dtdo_rtype.dtdt_size) {
7804
/* Flag a drop! */
7805
*flags |= CPU_DTRACE_DROP;
7806
continue;
7807
}
7808
7809
/* Store the size in the buffer first. */
7810
DTRACE_STORE(size_t, tomax, valoffs, size);
7811
7812
/*
7813
* Offset the buffer address to the start
7814
* of the data.
7815
*/
7816
valoffs += sizeof(size_t);
7817
7818
/*
7819
* Reset to the memory address rather than
7820
* the memref array, then let the BYREF
7821
* code below do the work to store the
7822
* memory data in the buffer.
7823
*/
7824
val = memref[0];
7825
break;
7826
}
7827
7828
case DTRACEACT_CHILL:
7829
if (dtrace_priv_kernel_destructive(state))
7830
dtrace_action_chill(&mstate, val);
7831
continue;
7832
7833
case DTRACEACT_RAISE:
7834
if (dtrace_priv_proc_destructive(state))
7835
dtrace_action_raise(val);
7836
continue;
7837
7838
case DTRACEACT_COMMIT:
7839
ASSERT(!committed);
7840
7841
/*
7842
* We need to commit our buffer state.
7843
*/
7844
if (ecb->dte_size)
7845
buf->dtb_offset = offs + ecb->dte_size;
7846
buf = &state->dts_buffer[cpuid];
7847
dtrace_speculation_commit(state, cpuid, val);
7848
committed = 1;
7849
continue;
7850
7851
case DTRACEACT_DISCARD:
7852
dtrace_speculation_discard(state, cpuid, val);
7853
continue;
7854
7855
case DTRACEACT_DIFEXPR:
7856
case DTRACEACT_LIBACT:
7857
case DTRACEACT_PRINTF:
7858
case DTRACEACT_PRINTA:
7859
case DTRACEACT_SYSTEM:
7860
case DTRACEACT_FREOPEN:
7861
case DTRACEACT_TRACEMEM:
7862
break;
7863
7864
case DTRACEACT_TRACEMEM_DYNSIZE:
7865
tracememsize = val;
7866
break;
7867
7868
case DTRACEACT_SYM:
7869
case DTRACEACT_MOD:
7870
if (!dtrace_priv_kernel(state))
7871
continue;
7872
break;
7873
7874
case DTRACEACT_USYM:
7875
case DTRACEACT_UMOD:
7876
case DTRACEACT_UADDR: {
7877
#ifdef illumos
7878
struct pid *pid = curthread->t_procp->p_pidp;
7879
#endif
7880
7881
if (!dtrace_priv_proc(state))
7882
continue;
7883
7884
DTRACE_STORE(uint64_t, tomax,
7885
#ifdef illumos
7886
valoffs, (uint64_t)pid->pid_id);
7887
#else
7888
valoffs, (uint64_t) curproc->p_pid);
7889
#endif
7890
DTRACE_STORE(uint64_t, tomax,
7891
valoffs + sizeof (uint64_t), val);
7892
7893
continue;
7894
}
7895
7896
case DTRACEACT_EXIT: {
7897
/*
7898
* For the exit action, we are going to attempt
7899
* to atomically set our activity to be
7900
* draining. If this fails (either because
7901
* another CPU has beat us to the exit action,
7902
* or because our current activity is something
7903
* other than ACTIVE or WARMUP), we will
7904
* continue. This assures that the exit action
7905
* can be successfully recorded at most once
7906
* when we're in the ACTIVE state. If we're
7907
* encountering the exit() action while in
7908
* COOLDOWN, however, we want to honor the new
7909
* status code. (We know that we're the only
7910
* thread in COOLDOWN, so there is no race.)
7911
*/
7912
void *activity = &state->dts_activity;
7913
dtrace_activity_t curstate = state->dts_activity;
7914
7915
if (curstate == DTRACE_ACTIVITY_COOLDOWN)
7916
break;
7917
7918
if (curstate != DTRACE_ACTIVITY_WARMUP)
7919
curstate = DTRACE_ACTIVITY_ACTIVE;
7920
7921
if (dtrace_cas32(activity, curstate,
7922
DTRACE_ACTIVITY_DRAINING) != curstate) {
7923
*flags |= CPU_DTRACE_DROP;
7924
continue;
7925
}
7926
7927
break;
7928
}
7929
7930
default:
7931
ASSERT(0);
7932
}
7933
7934
if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ||
7935
dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) {
7936
uintptr_t end = valoffs + size;
7937
7938
if (tracememsize != 0 &&
7939
valoffs + tracememsize < end) {
7940
end = valoffs + tracememsize;
7941
tracememsize = 0;
7942
}
7943
7944
if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
7945
!dtrace_vcanload((void *)(uintptr_t)val,
7946
&dp->dtdo_rtype, NULL, &mstate, vstate))
7947
continue;
7948
7949
dtrace_store_by_ref(dp, tomax, size, &valoffs,
7950
&val, end, act->dta_intuple,
7951
dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
7952
DIF_TF_BYREF: DIF_TF_BYUREF);
7953
continue;
7954
}
7955
7956
switch (size) {
7957
case 0:
7958
break;
7959
7960
case sizeof (uint8_t):
7961
DTRACE_STORE(uint8_t, tomax, valoffs, val);
7962
break;
7963
case sizeof (uint16_t):
7964
DTRACE_STORE(uint16_t, tomax, valoffs, val);
7965
break;
7966
case sizeof (uint32_t):
7967
DTRACE_STORE(uint32_t, tomax, valoffs, val);
7968
break;
7969
case sizeof (uint64_t):
7970
DTRACE_STORE(uint64_t, tomax, valoffs, val);
7971
break;
7972
default:
7973
/*
7974
* Any other size should have been returned by
7975
* reference, not by value.
7976
*/
7977
ASSERT(0);
7978
break;
7979
}
7980
}
7981
7982
if (*flags & CPU_DTRACE_DROP)
7983
continue;
7984
7985
if (*flags & CPU_DTRACE_FAULT) {
7986
int ndx;
7987
dtrace_action_t *err;
7988
7989
buf->dtb_errors++;
7990
7991
if (probe->dtpr_id == dtrace_probeid_error) {
7992
/*
7993
* There's nothing we can do -- we had an
7994
* error on the error probe. We bump an
7995
* error counter to at least indicate that
7996
* this condition happened.
7997
*/
7998
dtrace_error(&state->dts_dblerrors);
7999
continue;
8000
}
8001
8002
if (vtime) {
8003
/*
8004
* Before recursing on dtrace_probe(), we
8005
* need to explicitly clear out our start
8006
* time to prevent it from being accumulated
8007
* into t_dtrace_vtime.
8008
*/
8009
curthread->t_dtrace_start = 0;
8010
}
8011
8012
/*
8013
* Iterate over the actions to figure out which action
8014
* we were processing when we experienced the error.
8015
* Note that act points _past_ the faulting action; if
8016
* act is ecb->dte_action, the fault was in the
8017
* predicate, if it's ecb->dte_action->dta_next it's
8018
* in action #1, and so on.
8019
*/
8020
for (err = ecb->dte_action, ndx = 0;
8021
err != act; err = err->dta_next, ndx++)
8022
continue;
8023
8024
dtrace_probe_error(state, ecb->dte_epid, ndx,
8025
(mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
8026
mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
8027
cpu_core[cpuid].cpuc_dtrace_illval);
8028
8029
continue;
8030
}
8031
8032
if (!committed)
8033
buf->dtb_offset = offs + ecb->dte_size;
8034
}
8035
8036
if (vtime)
8037
curthread->t_dtrace_start = dtrace_gethrtime();
8038
8039
dtrace_probe_exit(cookie);
8040
}
8041
8042
/*
8043
* DTrace Probe Hashing Functions
8044
*
8045
* The functions in this section (and indeed, the functions in remaining
8046
* sections) are not _called_ from probe context. (Any exceptions to this are
8047
* marked with a "Note:".) Rather, they are called from elsewhere in the
8048
* DTrace framework to look-up probes in, add probes to and remove probes from
8049
* the DTrace probe hashes. (Each probe is hashed by each element of the
8050
* probe tuple -- allowing for fast lookups, regardless of what was
8051
* specified.)
8052
*/
8053
static uint_t
8054
dtrace_hash_str(const char *p)
8055
{
8056
unsigned int g;
8057
uint_t hval = 0;
8058
8059
while (*p) {
8060
hval = (hval << 4) + *p++;
8061
if ((g = (hval & 0xf0000000)) != 0)
8062
hval ^= g >> 24;
8063
hval &= ~g;
8064
}
8065
return (hval);
8066
}
8067
8068
static dtrace_hash_t *
8069
dtrace_hash_create(size_t stroffs, size_t nextoffs, size_t prevoffs)
8070
{
8071
dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
8072
8073
hash->dth_stroffs = stroffs;
8074
hash->dth_nextoffs = nextoffs;
8075
hash->dth_prevoffs = prevoffs;
8076
8077
hash->dth_size = 1;
8078
hash->dth_mask = hash->dth_size - 1;
8079
8080
hash->dth_tab = kmem_zalloc(hash->dth_size *
8081
sizeof (dtrace_hashbucket_t *), KM_SLEEP);
8082
8083
return (hash);
8084
}
8085
8086
static void
8087
dtrace_hash_destroy(dtrace_hash_t *hash)
8088
{
8089
#ifdef DEBUG
8090
int i;
8091
8092
for (i = 0; i < hash->dth_size; i++)
8093
ASSERT(hash->dth_tab[i] == NULL);
8094
#endif
8095
8096
kmem_free(hash->dth_tab,
8097
hash->dth_size * sizeof (dtrace_hashbucket_t *));
8098
kmem_free(hash, sizeof (dtrace_hash_t));
8099
}
8100
8101
static void
8102
dtrace_hash_resize(dtrace_hash_t *hash)
8103
{
8104
int size = hash->dth_size, i, ndx;
8105
int new_size = hash->dth_size << 1;
8106
int new_mask = new_size - 1;
8107
dtrace_hashbucket_t **new_tab, *bucket, *next;
8108
8109
ASSERT((new_size & new_mask) == 0);
8110
8111
new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
8112
8113
for (i = 0; i < size; i++) {
8114
for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
8115
dtrace_probe_t *probe = bucket->dthb_chain;
8116
8117
ASSERT(probe != NULL);
8118
ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
8119
8120
next = bucket->dthb_next;
8121
bucket->dthb_next = new_tab[ndx];
8122
new_tab[ndx] = bucket;
8123
}
8124
}
8125
8126
kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
8127
hash->dth_tab = new_tab;
8128
hash->dth_size = new_size;
8129
hash->dth_mask = new_mask;
8130
}
8131
8132
static void
8133
dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
8134
{
8135
int hashval = DTRACE_HASHSTR(hash, new);
8136
int ndx = hashval & hash->dth_mask;
8137
dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
8138
dtrace_probe_t **nextp, **prevp;
8139
8140
for (; bucket != NULL; bucket = bucket->dthb_next) {
8141
if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
8142
goto add;
8143
}
8144
8145
if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
8146
dtrace_hash_resize(hash);
8147
dtrace_hash_add(hash, new);
8148
return;
8149
}
8150
8151
bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
8152
bucket->dthb_next = hash->dth_tab[ndx];
8153
hash->dth_tab[ndx] = bucket;
8154
hash->dth_nbuckets++;
8155
8156
add:
8157
nextp = DTRACE_HASHNEXT(hash, new);
8158
ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
8159
*nextp = bucket->dthb_chain;
8160
8161
if (bucket->dthb_chain != NULL) {
8162
prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
8163
ASSERT(*prevp == NULL);
8164
*prevp = new;
8165
}
8166
8167
bucket->dthb_chain = new;
8168
bucket->dthb_len++;
8169
}
8170
8171
static dtrace_probe_t *
8172
dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
8173
{
8174
int hashval = DTRACE_HASHSTR(hash, template);
8175
int ndx = hashval & hash->dth_mask;
8176
dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
8177
8178
for (; bucket != NULL; bucket = bucket->dthb_next) {
8179
if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
8180
return (bucket->dthb_chain);
8181
}
8182
8183
return (NULL);
8184
}
8185
8186
static int
8187
dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
8188
{
8189
int hashval = DTRACE_HASHSTR(hash, template);
8190
int ndx = hashval & hash->dth_mask;
8191
dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
8192
8193
for (; bucket != NULL; bucket = bucket->dthb_next) {
8194
if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
8195
return (bucket->dthb_len);
8196
}
8197
8198
return (0);
8199
}
8200
8201
static void
8202
dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
8203
{
8204
int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
8205
dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
8206
8207
dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
8208
dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
8209
8210
/*
8211
* Find the bucket that we're removing this probe from.
8212
*/
8213
for (; bucket != NULL; bucket = bucket->dthb_next) {
8214
if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
8215
break;
8216
}
8217
8218
ASSERT(bucket != NULL);
8219
8220
if (*prevp == NULL) {
8221
if (*nextp == NULL) {
8222
/*
8223
* The removed probe was the only probe on this
8224
* bucket; we need to remove the bucket.
8225
*/
8226
dtrace_hashbucket_t *b = hash->dth_tab[ndx];
8227
8228
ASSERT(bucket->dthb_chain == probe);
8229
ASSERT(b != NULL);
8230
8231
if (b == bucket) {
8232
hash->dth_tab[ndx] = bucket->dthb_next;
8233
} else {
8234
while (b->dthb_next != bucket)
8235
b = b->dthb_next;
8236
b->dthb_next = bucket->dthb_next;
8237
}
8238
8239
ASSERT(hash->dth_nbuckets > 0);
8240
hash->dth_nbuckets--;
8241
kmem_free(bucket, sizeof (dtrace_hashbucket_t));
8242
return;
8243
}
8244
8245
bucket->dthb_chain = *nextp;
8246
} else {
8247
*(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
8248
}
8249
8250
if (*nextp != NULL)
8251
*(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
8252
}
8253
8254
/*
8255
* DTrace Utility Functions
8256
*
8257
* These are random utility functions that are _not_ called from probe context.
8258
*/
8259
static int
8260
dtrace_badattr(const dtrace_attribute_t *a)
8261
{
8262
return (a->dtat_name > DTRACE_STABILITY_MAX ||
8263
a->dtat_data > DTRACE_STABILITY_MAX ||
8264
a->dtat_class > DTRACE_CLASS_MAX);
8265
}
8266
8267
/*
8268
* Return a duplicate copy of a string. If the specified string is NULL,
8269
* this function returns a zero-length string.
8270
*/
8271
static char *
8272
dtrace_strdup(const char *str)
8273
{
8274
char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
8275
8276
if (str != NULL)
8277
(void) strcpy(new, str);
8278
8279
return (new);
8280
}
8281
8282
#define DTRACE_ISALPHA(c) \
8283
(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
8284
8285
static int
8286
dtrace_badname(const char *s)
8287
{
8288
char c;
8289
8290
if (s == NULL || (c = *s++) == '\0')
8291
return (0);
8292
8293
if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
8294
return (1);
8295
8296
while ((c = *s++) != '\0') {
8297
if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
8298
c != '-' && c != '_' && c != '.' && c != '`')
8299
return (1);
8300
}
8301
8302
return (0);
8303
}
8304
8305
static void
8306
dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
8307
{
8308
uint32_t priv;
8309
8310
#ifdef illumos
8311
if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
8312
/*
8313
* For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
8314
*/
8315
priv = DTRACE_PRIV_ALL;
8316
} else {
8317
*uidp = crgetuid(cr);
8318
*zoneidp = crgetzoneid(cr);
8319
8320
priv = 0;
8321
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
8322
priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
8323
else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
8324
priv |= DTRACE_PRIV_USER;
8325
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
8326
priv |= DTRACE_PRIV_PROC;
8327
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
8328
priv |= DTRACE_PRIV_OWNER;
8329
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
8330
priv |= DTRACE_PRIV_ZONEOWNER;
8331
}
8332
#else
8333
priv = DTRACE_PRIV_ALL;
8334
#endif
8335
8336
*privp = priv;
8337
}
8338
8339
#ifdef DTRACE_ERRDEBUG
8340
static void
8341
dtrace_errdebug(const char *str)
8342
{
8343
int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ;
8344
int occupied = 0;
8345
8346
mutex_enter(&dtrace_errlock);
8347
dtrace_errlast = str;
8348
dtrace_errthread = curthread;
8349
8350
while (occupied++ < DTRACE_ERRHASHSZ) {
8351
if (dtrace_errhash[hval].dter_msg == str) {
8352
dtrace_errhash[hval].dter_count++;
8353
goto out;
8354
}
8355
8356
if (dtrace_errhash[hval].dter_msg != NULL) {
8357
hval = (hval + 1) % DTRACE_ERRHASHSZ;
8358
continue;
8359
}
8360
8361
dtrace_errhash[hval].dter_msg = str;
8362
dtrace_errhash[hval].dter_count = 1;
8363
goto out;
8364
}
8365
8366
panic("dtrace: undersized error hash");
8367
out:
8368
mutex_exit(&dtrace_errlock);
8369
}
8370
#endif
8371
8372
/*
8373
* DTrace Matching Functions
8374
*
8375
* These functions are used to match groups of probes, given some elements of
8376
* a probe tuple, or some globbed expressions for elements of a probe tuple.
8377
*/
8378
static int
8379
dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
8380
zoneid_t zoneid)
8381
{
8382
if (priv != DTRACE_PRIV_ALL) {
8383
uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
8384
uint32_t match = priv & ppriv;
8385
8386
/*
8387
* No PRIV_DTRACE_* privileges...
8388
*/
8389
if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
8390
DTRACE_PRIV_KERNEL)) == 0)
8391
return (0);
8392
8393
/*
8394
* No matching bits, but there were bits to match...
8395
*/
8396
if (match == 0 && ppriv != 0)
8397
return (0);
8398
8399
/*
8400
* Need to have permissions to the process, but don't...
8401
*/
8402
if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
8403
uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
8404
return (0);
8405
}
8406
8407
/*
8408
* Need to be in the same zone unless we possess the
8409
* privilege to examine all zones.
8410
*/
8411
if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
8412
zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
8413
return (0);
8414
}
8415
}
8416
8417
return (1);
8418
}
8419
8420
/*
8421
* dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
8422
* consists of input pattern strings and an ops-vector to evaluate them.
8423
* This function returns >0 for match, 0 for no match, and <0 for error.
8424
*/
8425
static int
8426
dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
8427
uint32_t priv, uid_t uid, zoneid_t zoneid)
8428
{
8429
dtrace_provider_t *pvp = prp->dtpr_provider;
8430
int rv;
8431
8432
if (pvp->dtpv_defunct)
8433
return (0);
8434
8435
if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
8436
return (rv);
8437
8438
if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
8439
return (rv);
8440
8441
if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
8442
return (rv);
8443
8444
if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
8445
return (rv);
8446
8447
if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
8448
return (0);
8449
8450
return (rv);
8451
}
8452
8453
/*
8454
* dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
8455
* interface for matching a glob pattern 'p' to an input string 's'. Unlike
8456
* libc's version, the kernel version only applies to 8-bit ASCII strings.
8457
* In addition, all of the recursion cases except for '*' matching have been
8458
* unwound. For '*', we still implement recursive evaluation, but a depth
8459
* counter is maintained and matching is aborted if we recurse too deep.
8460
* The function returns 0 if no match, >0 if match, and <0 if recursion error.
8461
*/
8462
static int
8463
dtrace_match_glob(const char *s, const char *p, int depth)
8464
{
8465
const char *olds;
8466
char s1, c;
8467
int gs;
8468
8469
if (depth > DTRACE_PROBEKEY_MAXDEPTH)
8470
return (-1);
8471
8472
if (s == NULL)
8473
s = ""; /* treat NULL as empty string */
8474
8475
top:
8476
olds = s;
8477
s1 = *s++;
8478
8479
if (p == NULL)
8480
return (0);
8481
8482
if ((c = *p++) == '\0')
8483
return (s1 == '\0');
8484
8485
switch (c) {
8486
case '[': {
8487
int ok = 0, notflag = 0;
8488
char lc = '\0';
8489
8490
if (s1 == '\0')
8491
return (0);
8492
8493
if (*p == '!') {
8494
notflag = 1;
8495
p++;
8496
}
8497
8498
if ((c = *p++) == '\0')
8499
return (0);
8500
8501
do {
8502
if (c == '-' && lc != '\0' && *p != ']') {
8503
if ((c = *p++) == '\0')
8504
return (0);
8505
if (c == '\\' && (c = *p++) == '\0')
8506
return (0);
8507
8508
if (notflag) {
8509
if (s1 < lc || s1 > c)
8510
ok++;
8511
else
8512
return (0);
8513
} else if (lc <= s1 && s1 <= c)
8514
ok++;
8515
8516
} else if (c == '\\' && (c = *p++) == '\0')
8517
return (0);
8518
8519
lc = c; /* save left-hand 'c' for next iteration */
8520
8521
if (notflag) {
8522
if (s1 != c)
8523
ok++;
8524
else
8525
return (0);
8526
} else if (s1 == c)
8527
ok++;
8528
8529
if ((c = *p++) == '\0')
8530
return (0);
8531
8532
} while (c != ']');
8533
8534
if (ok)
8535
goto top;
8536
8537
return (0);
8538
}
8539
8540
case '\\':
8541
if ((c = *p++) == '\0')
8542
return (0);
8543
/*FALLTHRU*/
8544
8545
default:
8546
if (c != s1)
8547
return (0);
8548
/*FALLTHRU*/
8549
8550
case '?':
8551
if (s1 != '\0')
8552
goto top;
8553
return (0);
8554
8555
case '*':
8556
while (*p == '*')
8557
p++; /* consecutive *'s are identical to a single one */
8558
8559
if (*p == '\0')
8560
return (1);
8561
8562
for (s = olds; *s != '\0'; s++) {
8563
if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
8564
return (gs);
8565
}
8566
8567
return (0);
8568
}
8569
}
8570
8571
/*ARGSUSED*/
8572
static int
8573
dtrace_match_string(const char *s, const char *p, int depth)
8574
{
8575
return (s != NULL && strcmp(s, p) == 0);
8576
}
8577
8578
/*ARGSUSED*/
8579
static int
8580
dtrace_match_nul(const char *s, const char *p, int depth)
8581
{
8582
return (1); /* always match the empty pattern */
8583
}
8584
8585
/*ARGSUSED*/
8586
static int
8587
dtrace_match_nonzero(const char *s, const char *p, int depth)
8588
{
8589
return (s != NULL && s[0] != '\0');
8590
}
8591
8592
static int
8593
dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
8594
zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
8595
{
8596
dtrace_probe_t template, *probe;
8597
dtrace_hash_t *hash = NULL;
8598
int len, best = INT_MAX, nmatched = 0;
8599
dtrace_id_t i;
8600
8601
ASSERT(MUTEX_HELD(&dtrace_lock));
8602
8603
/*
8604
* If the probe ID is specified in the key, just lookup by ID and
8605
* invoke the match callback once if a matching probe is found.
8606
*/
8607
if (pkp->dtpk_id != DTRACE_IDNONE) {
8608
if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
8609
dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
8610
(void) (*matched)(probe, arg);
8611
nmatched++;
8612
}
8613
return (nmatched);
8614
}
8615
8616
template.dtpr_mod = (char *)pkp->dtpk_mod;
8617
template.dtpr_func = (char *)pkp->dtpk_func;
8618
template.dtpr_name = (char *)pkp->dtpk_name;
8619
8620
/*
8621
* We want to find the most distinct of the module name, function
8622
* name, and name. So for each one that is not a glob pattern or
8623
* empty string, we perform a lookup in the corresponding hash and
8624
* use the hash table with the fewest collisions to do our search.
8625
*/
8626
if (pkp->dtpk_mmatch == &dtrace_match_string &&
8627
(len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
8628
best = len;
8629
hash = dtrace_bymod;
8630
}
8631
8632
if (pkp->dtpk_fmatch == &dtrace_match_string &&
8633
(len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
8634
best = len;
8635
hash = dtrace_byfunc;
8636
}
8637
8638
if (pkp->dtpk_nmatch == &dtrace_match_string &&
8639
(len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
8640
best = len;
8641
hash = dtrace_byname;
8642
}
8643
8644
/*
8645
* If we did not select a hash table, iterate over every probe and
8646
* invoke our callback for each one that matches our input probe key.
8647
*/
8648
if (hash == NULL) {
8649
for (i = 0; i < dtrace_nprobes; i++) {
8650
if ((probe = dtrace_probes[i]) == NULL ||
8651
dtrace_match_probe(probe, pkp, priv, uid,
8652
zoneid) <= 0)
8653
continue;
8654
8655
nmatched++;
8656
8657
if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
8658
break;
8659
}
8660
8661
return (nmatched);
8662
}
8663
8664
/*
8665
* If we selected a hash table, iterate over each probe of the same key
8666
* name and invoke the callback for every probe that matches the other
8667
* attributes of our input probe key.
8668
*/
8669
for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
8670
probe = *(DTRACE_HASHNEXT(hash, probe))) {
8671
8672
if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
8673
continue;
8674
8675
nmatched++;
8676
8677
if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
8678
break;
8679
}
8680
8681
return (nmatched);
8682
}
8683
8684
/*
8685
* Return the function pointer dtrace_probecmp() should use to compare the
8686
* specified pattern with a string. For NULL or empty patterns, we select
8687
* dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
8688
* For non-empty non-glob strings, we use dtrace_match_string().
8689
*/
8690
static dtrace_probekey_f *
8691
dtrace_probekey_func(const char *p)
8692
{
8693
char c;
8694
8695
if (p == NULL || *p == '\0')
8696
return (&dtrace_match_nul);
8697
8698
while ((c = *p++) != '\0') {
8699
if (c == '[' || c == '?' || c == '*' || c == '\\')
8700
return (&dtrace_match_glob);
8701
}
8702
8703
return (&dtrace_match_string);
8704
}
8705
8706
/*
8707
* Build a probe comparison key for use with dtrace_match_probe() from the
8708
* given probe description. By convention, a null key only matches anchored
8709
* probes: if each field is the empty string, reset dtpk_fmatch to
8710
* dtrace_match_nonzero().
8711
*/
8712
static void
8713
dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
8714
{
8715
pkp->dtpk_prov = pdp->dtpd_provider;
8716
pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
8717
8718
pkp->dtpk_mod = pdp->dtpd_mod;
8719
pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
8720
8721
pkp->dtpk_func = pdp->dtpd_func;
8722
pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
8723
8724
pkp->dtpk_name = pdp->dtpd_name;
8725
pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
8726
8727
pkp->dtpk_id = pdp->dtpd_id;
8728
8729
if (pkp->dtpk_id == DTRACE_IDNONE &&
8730
pkp->dtpk_pmatch == &dtrace_match_nul &&
8731
pkp->dtpk_mmatch == &dtrace_match_nul &&
8732
pkp->dtpk_fmatch == &dtrace_match_nul &&
8733
pkp->dtpk_nmatch == &dtrace_match_nul)
8734
pkp->dtpk_fmatch = &dtrace_match_nonzero;
8735
}
8736
8737
/*
8738
* DTrace Provider-to-Framework API Functions
8739
*
8740
* These functions implement much of the Provider-to-Framework API, as
8741
* described in <sys/dtrace.h>. The parts of the API not in this section are
8742
* the functions in the API for probe management (found below), and
8743
* dtrace_probe() itself (found above).
8744
*/
8745
8746
/*
8747
* Register the calling provider with the DTrace framework. This should
8748
* generally be called by DTrace providers in their attach(9E) entry point.
8749
*/
8750
int
8751
dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
8752
cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
8753
{
8754
dtrace_provider_t *provider;
8755
8756
if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
8757
cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8758
"arguments", name ? name : "<NULL>");
8759
return (EINVAL);
8760
}
8761
8762
if (name[0] == '\0' || dtrace_badname(name)) {
8763
cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8764
"provider name", name);
8765
return (EINVAL);
8766
}
8767
8768
if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
8769
pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
8770
pops->dtps_destroy == NULL ||
8771
((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
8772
cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8773
"provider ops", name);
8774
return (EINVAL);
8775
}
8776
8777
if (dtrace_badattr(&pap->dtpa_provider) ||
8778
dtrace_badattr(&pap->dtpa_mod) ||
8779
dtrace_badattr(&pap->dtpa_func) ||
8780
dtrace_badattr(&pap->dtpa_name) ||
8781
dtrace_badattr(&pap->dtpa_args)) {
8782
cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8783
"provider attributes", name);
8784
return (EINVAL);
8785
}
8786
8787
if (priv & ~DTRACE_PRIV_ALL) {
8788
cmn_err(CE_WARN, "failed to register provider '%s': invalid "
8789
"privilege attributes", name);
8790
return (EINVAL);
8791
}
8792
8793
if ((priv & DTRACE_PRIV_KERNEL) &&
8794
(priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
8795
pops->dtps_usermode == NULL) {
8796
cmn_err(CE_WARN, "failed to register provider '%s': need "
8797
"dtps_usermode() op for given privilege attributes", name);
8798
return (EINVAL);
8799
}
8800
8801
provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
8802
provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
8803
(void) strcpy(provider->dtpv_name, name);
8804
8805
provider->dtpv_attr = *pap;
8806
provider->dtpv_priv.dtpp_flags = priv;
8807
if (cr != NULL) {
8808
provider->dtpv_priv.dtpp_uid = crgetuid(cr);
8809
provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
8810
}
8811
provider->dtpv_pops = *pops;
8812
8813
if (pops->dtps_provide == NULL) {
8814
ASSERT(pops->dtps_provide_module != NULL);
8815
provider->dtpv_pops.dtps_provide =
8816
(void (*)(void *, dtrace_probedesc_t *))dtrace_nullop;
8817
}
8818
8819
if (pops->dtps_provide_module == NULL) {
8820
ASSERT(pops->dtps_provide != NULL);
8821
provider->dtpv_pops.dtps_provide_module =
8822
(void (*)(void *, modctl_t *))dtrace_nullop;
8823
}
8824
8825
if (pops->dtps_suspend == NULL) {
8826
ASSERT(pops->dtps_resume == NULL);
8827
provider->dtpv_pops.dtps_suspend =
8828
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
8829
provider->dtpv_pops.dtps_resume =
8830
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
8831
}
8832
8833
provider->dtpv_arg = arg;
8834
*idp = (dtrace_provider_id_t)provider;
8835
8836
if (pops == &dtrace_provider_ops) {
8837
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
8838
ASSERT(MUTEX_HELD(&dtrace_lock));
8839
ASSERT(dtrace_anon.dta_enabling == NULL);
8840
8841
/*
8842
* We make sure that the DTrace provider is at the head of
8843
* the provider chain.
8844
*/
8845
provider->dtpv_next = dtrace_provider;
8846
dtrace_provider = provider;
8847
return (0);
8848
}
8849
8850
mutex_enter(&dtrace_provider_lock);
8851
mutex_enter(&dtrace_lock);
8852
8853
/*
8854
* If there is at least one provider registered, we'll add this
8855
* provider after the first provider.
8856
*/
8857
if (dtrace_provider != NULL) {
8858
provider->dtpv_next = dtrace_provider->dtpv_next;
8859
dtrace_provider->dtpv_next = provider;
8860
} else {
8861
dtrace_provider = provider;
8862
}
8863
8864
if (dtrace_retained != NULL) {
8865
dtrace_enabling_provide(provider);
8866
8867
/*
8868
* Now we need to call dtrace_enabling_matchall() -- which
8869
* will acquire cpu_lock and dtrace_lock. We therefore need
8870
* to drop all of our locks before calling into it...
8871
*/
8872
mutex_exit(&dtrace_lock);
8873
mutex_exit(&dtrace_provider_lock);
8874
dtrace_enabling_matchall();
8875
8876
return (0);
8877
}
8878
8879
mutex_exit(&dtrace_lock);
8880
mutex_exit(&dtrace_provider_lock);
8881
8882
return (0);
8883
}
8884
8885
/*
8886
* Unregister the specified provider from the DTrace framework. This should
8887
* generally be called by DTrace providers in their detach(9E) entry point.
8888
*/
8889
int
8890
dtrace_unregister(dtrace_provider_id_t id)
8891
{
8892
dtrace_provider_t *old = (dtrace_provider_t *)id;
8893
dtrace_provider_t *prev = NULL;
8894
int i, self = 0, noreap = 0;
8895
dtrace_probe_t *probe, *first = NULL;
8896
8897
if (old->dtpv_pops.dtps_enable ==
8898
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
8899
/*
8900
* If DTrace itself is the provider, we're called with locks
8901
* already held.
8902
*/
8903
ASSERT(old == dtrace_provider);
8904
#ifdef illumos
8905
ASSERT(dtrace_devi != NULL);
8906
#endif
8907
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
8908
ASSERT(MUTEX_HELD(&dtrace_lock));
8909
self = 1;
8910
8911
if (dtrace_provider->dtpv_next != NULL) {
8912
/*
8913
* There's another provider here; return failure.
8914
*/
8915
return (EBUSY);
8916
}
8917
} else {
8918
mutex_enter(&dtrace_provider_lock);
8919
#ifdef illumos
8920
mutex_enter(&mod_lock);
8921
#endif
8922
mutex_enter(&dtrace_lock);
8923
}
8924
8925
/*
8926
* If anyone has /dev/dtrace open, or if there are anonymous enabled
8927
* probes, we refuse to let providers slither away, unless this
8928
* provider has already been explicitly invalidated.
8929
*/
8930
if (!old->dtpv_defunct &&
8931
(dtrace_opens || (dtrace_anon.dta_state != NULL &&
8932
dtrace_anon.dta_state->dts_necbs > 0))) {
8933
if (!self) {
8934
mutex_exit(&dtrace_lock);
8935
#ifdef illumos
8936
mutex_exit(&mod_lock);
8937
#endif
8938
mutex_exit(&dtrace_provider_lock);
8939
}
8940
return (EBUSY);
8941
}
8942
8943
/*
8944
* Attempt to destroy the probes associated with this provider.
8945
*/
8946
for (i = 0; i < dtrace_nprobes; i++) {
8947
if ((probe = dtrace_probes[i]) == NULL)
8948
continue;
8949
8950
if (probe->dtpr_provider != old)
8951
continue;
8952
8953
if (probe->dtpr_ecb == NULL)
8954
continue;
8955
8956
/*
8957
* If we are trying to unregister a defunct provider, and the
8958
* provider was made defunct within the interval dictated by
8959
* dtrace_unregister_defunct_reap, we'll (asynchronously)
8960
* attempt to reap our enablings. To denote that the provider
8961
* should reattempt to unregister itself at some point in the
8962
* future, we will return a differentiable error code (EAGAIN
8963
* instead of EBUSY) in this case.
8964
*/
8965
if (dtrace_gethrtime() - old->dtpv_defunct >
8966
dtrace_unregister_defunct_reap)
8967
noreap = 1;
8968
8969
if (!self) {
8970
mutex_exit(&dtrace_lock);
8971
#ifdef illumos
8972
mutex_exit(&mod_lock);
8973
#endif
8974
mutex_exit(&dtrace_provider_lock);
8975
}
8976
8977
if (noreap)
8978
return (EBUSY);
8979
8980
(void) taskq_dispatch(dtrace_taskq,
8981
(task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
8982
8983
return (EAGAIN);
8984
}
8985
8986
/*
8987
* All of the probes for this provider are disabled; we can safely
8988
* remove all of them from their hash chains and from the probe array.
8989
*/
8990
for (i = 0; i < dtrace_nprobes; i++) {
8991
if ((probe = dtrace_probes[i]) == NULL)
8992
continue;
8993
8994
if (probe->dtpr_provider != old)
8995
continue;
8996
8997
dtrace_probes[i] = NULL;
8998
8999
dtrace_hash_remove(dtrace_bymod, probe);
9000
dtrace_hash_remove(dtrace_byfunc, probe);
9001
dtrace_hash_remove(dtrace_byname, probe);
9002
9003
if (first == NULL) {
9004
first = probe;
9005
probe->dtpr_nextmod = NULL;
9006
} else {
9007
probe->dtpr_nextmod = first;
9008
first = probe;
9009
}
9010
}
9011
9012
/*
9013
* The provider's probes have been removed from the hash chains and
9014
* from the probe array. Now issue a dtrace_sync() to be sure that
9015
* everyone has cleared out from any probe array processing.
9016
*/
9017
dtrace_sync();
9018
9019
for (probe = first; probe != NULL; probe = first) {
9020
first = probe->dtpr_nextmod;
9021
9022
old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
9023
probe->dtpr_arg);
9024
kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
9025
kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
9026
kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
9027
#ifdef illumos
9028
vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
9029
#else
9030
free_unr(dtrace_arena, probe->dtpr_id);
9031
#endif
9032
kmem_free(probe, sizeof (dtrace_probe_t));
9033
}
9034
9035
if ((prev = dtrace_provider) == old) {
9036
#ifdef illumos
9037
ASSERT(self || dtrace_devi == NULL);
9038
ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
9039
#endif
9040
dtrace_provider = old->dtpv_next;
9041
} else {
9042
while (prev != NULL && prev->dtpv_next != old)
9043
prev = prev->dtpv_next;
9044
9045
if (prev == NULL) {
9046
panic("attempt to unregister non-existent "
9047
"dtrace provider %p\n", (void *)id);
9048
}
9049
9050
prev->dtpv_next = old->dtpv_next;
9051
}
9052
9053
if (!self) {
9054
mutex_exit(&dtrace_lock);
9055
#ifdef illumos
9056
mutex_exit(&mod_lock);
9057
#endif
9058
mutex_exit(&dtrace_provider_lock);
9059
}
9060
9061
kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
9062
kmem_free(old, sizeof (dtrace_provider_t));
9063
9064
return (0);
9065
}
9066
9067
/*
9068
* Invalidate the specified provider. All subsequent probe lookups for the
9069
* specified provider will fail, but its probes will not be removed.
9070
*/
9071
void
9072
dtrace_invalidate(dtrace_provider_id_t id)
9073
{
9074
dtrace_provider_t *pvp = (dtrace_provider_t *)id;
9075
9076
ASSERT(pvp->dtpv_pops.dtps_enable !=
9077
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
9078
9079
mutex_enter(&dtrace_provider_lock);
9080
mutex_enter(&dtrace_lock);
9081
9082
pvp->dtpv_defunct = dtrace_gethrtime();
9083
9084
mutex_exit(&dtrace_lock);
9085
mutex_exit(&dtrace_provider_lock);
9086
}
9087
9088
/*
9089
* Indicate whether or not DTrace has attached.
9090
*/
9091
int
9092
dtrace_attached(void)
9093
{
9094
/*
9095
* dtrace_provider will be non-NULL iff the DTrace driver has
9096
* attached. (It's non-NULL because DTrace is always itself a
9097
* provider.)
9098
*/
9099
return (dtrace_provider != NULL);
9100
}
9101
9102
/*
9103
* Remove all the unenabled probes for the given provider. This function is
9104
* not unlike dtrace_unregister(), except that it doesn't remove the provider
9105
* -- just as many of its associated probes as it can.
9106
*/
9107
int
9108
dtrace_condense(dtrace_provider_id_t id)
9109
{
9110
dtrace_provider_t *prov = (dtrace_provider_t *)id;
9111
int i;
9112
dtrace_probe_t *probe;
9113
9114
/*
9115
* Make sure this isn't the dtrace provider itself.
9116
*/
9117
ASSERT(prov->dtpv_pops.dtps_enable !=
9118
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
9119
9120
mutex_enter(&dtrace_provider_lock);
9121
mutex_enter(&dtrace_lock);
9122
9123
/*
9124
* Attempt to destroy the probes associated with this provider.
9125
*/
9126
for (i = 0; i < dtrace_nprobes; i++) {
9127
if ((probe = dtrace_probes[i]) == NULL)
9128
continue;
9129
9130
if (probe->dtpr_provider != prov)
9131
continue;
9132
9133
if (probe->dtpr_ecb != NULL)
9134
continue;
9135
9136
dtrace_probes[i] = NULL;
9137
9138
dtrace_hash_remove(dtrace_bymod, probe);
9139
dtrace_hash_remove(dtrace_byfunc, probe);
9140
dtrace_hash_remove(dtrace_byname, probe);
9141
9142
prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
9143
probe->dtpr_arg);
9144
kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
9145
kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
9146
kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
9147
kmem_free(probe, sizeof (dtrace_probe_t));
9148
#ifdef illumos
9149
vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
9150
#else
9151
free_unr(dtrace_arena, i + 1);
9152
#endif
9153
}
9154
9155
mutex_exit(&dtrace_lock);
9156
mutex_exit(&dtrace_provider_lock);
9157
9158
return (0);
9159
}
9160
9161
/*
9162
* DTrace Probe Management Functions
9163
*
9164
* The functions in this section perform the DTrace probe management,
9165
* including functions to create probes, look-up probes, and call into the
9166
* providers to request that probes be provided. Some of these functions are
9167
* in the Provider-to-Framework API; these functions can be identified by the
9168
* fact that they are not declared "static".
9169
*/
9170
9171
/*
9172
* Create a probe with the specified module name, function name, and name.
9173
*/
9174
dtrace_id_t
9175
dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
9176
const char *func, const char *name, int aframes, void *arg)
9177
{
9178
dtrace_probe_t *probe, **probes;
9179
dtrace_provider_t *provider = (dtrace_provider_t *)prov;
9180
dtrace_id_t id;
9181
9182
if (provider == dtrace_provider) {
9183
ASSERT(MUTEX_HELD(&dtrace_lock));
9184
} else {
9185
mutex_enter(&dtrace_lock);
9186
}
9187
9188
#ifdef illumos
9189
id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
9190
VM_BESTFIT | VM_SLEEP);
9191
#else
9192
id = alloc_unr(dtrace_arena);
9193
#endif
9194
probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
9195
9196
probe->dtpr_id = id;
9197
probe->dtpr_gen = dtrace_probegen++;
9198
probe->dtpr_mod = dtrace_strdup(mod);
9199
probe->dtpr_func = dtrace_strdup(func);
9200
probe->dtpr_name = dtrace_strdup(name);
9201
probe->dtpr_arg = arg;
9202
probe->dtpr_aframes = aframes;
9203
probe->dtpr_provider = provider;
9204
9205
dtrace_hash_add(dtrace_bymod, probe);
9206
dtrace_hash_add(dtrace_byfunc, probe);
9207
dtrace_hash_add(dtrace_byname, probe);
9208
9209
if (id - 1 >= dtrace_nprobes) {
9210
size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
9211
size_t nsize = osize << 1;
9212
9213
if (nsize == 0) {
9214
ASSERT(osize == 0);
9215
ASSERT(dtrace_probes == NULL);
9216
nsize = sizeof (dtrace_probe_t *);
9217
}
9218
9219
probes = kmem_zalloc(nsize, KM_SLEEP);
9220
9221
if (dtrace_probes == NULL) {
9222
ASSERT(osize == 0);
9223
dtrace_probes = probes;
9224
dtrace_nprobes = 1;
9225
} else {
9226
dtrace_probe_t **oprobes = dtrace_probes;
9227
9228
bcopy(oprobes, probes, osize);
9229
dtrace_membar_producer();
9230
dtrace_probes = probes;
9231
9232
dtrace_sync();
9233
9234
/*
9235
* All CPUs are now seeing the new probes array; we can
9236
* safely free the old array.
9237
*/
9238
kmem_free(oprobes, osize);
9239
dtrace_nprobes <<= 1;
9240
}
9241
9242
ASSERT(id - 1 < dtrace_nprobes);
9243
}
9244
9245
ASSERT(dtrace_probes[id - 1] == NULL);
9246
dtrace_probes[id - 1] = probe;
9247
9248
if (provider != dtrace_provider)
9249
mutex_exit(&dtrace_lock);
9250
9251
return (id);
9252
}
9253
9254
static dtrace_probe_t *
9255
dtrace_probe_lookup_id(dtrace_id_t id)
9256
{
9257
ASSERT(MUTEX_HELD(&dtrace_lock));
9258
9259
if (id == 0 || id > dtrace_nprobes)
9260
return (NULL);
9261
9262
return (dtrace_probes[id - 1]);
9263
}
9264
9265
static int
9266
dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
9267
{
9268
*((dtrace_id_t *)arg) = probe->dtpr_id;
9269
9270
return (DTRACE_MATCH_DONE);
9271
}
9272
9273
/*
9274
* Look up a probe based on provider and one or more of module name, function
9275
* name and probe name.
9276
*/
9277
dtrace_id_t
9278
dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod,
9279
char *func, char *name)
9280
{
9281
dtrace_probekey_t pkey;
9282
dtrace_id_t id;
9283
int match;
9284
9285
pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
9286
pkey.dtpk_pmatch = &dtrace_match_string;
9287
pkey.dtpk_mod = mod;
9288
pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
9289
pkey.dtpk_func = func;
9290
pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
9291
pkey.dtpk_name = name;
9292
pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
9293
pkey.dtpk_id = DTRACE_IDNONE;
9294
9295
mutex_enter(&dtrace_lock);
9296
match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
9297
dtrace_probe_lookup_match, &id);
9298
mutex_exit(&dtrace_lock);
9299
9300
ASSERT(match == 1 || match == 0);
9301
return (match ? id : 0);
9302
}
9303
9304
/*
9305
* Returns the probe argument associated with the specified probe.
9306
*/
9307
void *
9308
dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
9309
{
9310
dtrace_probe_t *probe;
9311
void *rval = NULL;
9312
9313
mutex_enter(&dtrace_lock);
9314
9315
if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
9316
probe->dtpr_provider == (dtrace_provider_t *)id)
9317
rval = probe->dtpr_arg;
9318
9319
mutex_exit(&dtrace_lock);
9320
9321
return (rval);
9322
}
9323
9324
/*
9325
* Copy a probe into a probe description.
9326
*/
9327
static void
9328
dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
9329
{
9330
bzero(pdp, sizeof (dtrace_probedesc_t));
9331
pdp->dtpd_id = prp->dtpr_id;
9332
9333
(void) strncpy(pdp->dtpd_provider,
9334
prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
9335
9336
(void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
9337
(void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
9338
(void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
9339
}
9340
9341
/*
9342
* Called to indicate that a probe -- or probes -- should be provided by a
9343
* specfied provider. If the specified description is NULL, the provider will
9344
* be told to provide all of its probes. (This is done whenever a new
9345
* consumer comes along, or whenever a retained enabling is to be matched.) If
9346
* the specified description is non-NULL, the provider is given the
9347
* opportunity to dynamically provide the specified probe, allowing providers
9348
* to support the creation of probes on-the-fly. (So-called _autocreated_
9349
* probes.) If the provider is NULL, the operations will be applied to all
9350
* providers; if the provider is non-NULL the operations will only be applied
9351
* to the specified provider. The dtrace_provider_lock must be held, and the
9352
* dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
9353
* will need to grab the dtrace_lock when it reenters the framework through
9354
* dtrace_probe_lookup(), dtrace_probe_create(), etc.
9355
*/
9356
static void
9357
dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
9358
{
9359
#ifdef illumos
9360
modctl_t *ctl;
9361
#endif
9362
int all = 0;
9363
9364
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
9365
9366
if (prv == NULL) {
9367
all = 1;
9368
prv = dtrace_provider;
9369
}
9370
9371
do {
9372
/*
9373
* First, call the blanket provide operation.
9374
*/
9375
prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
9376
9377
#ifdef illumos
9378
/*
9379
* Now call the per-module provide operation. We will grab
9380
* mod_lock to prevent the list from being modified. Note
9381
* that this also prevents the mod_busy bits from changing.
9382
* (mod_busy can only be changed with mod_lock held.)
9383
*/
9384
mutex_enter(&mod_lock);
9385
9386
ctl = &modules;
9387
do {
9388
if (ctl->mod_busy || ctl->mod_mp == NULL)
9389
continue;
9390
9391
prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
9392
9393
} while ((ctl = ctl->mod_next) != &modules);
9394
9395
mutex_exit(&mod_lock);
9396
#endif
9397
} while (all && (prv = prv->dtpv_next) != NULL);
9398
}
9399
9400
#ifdef illumos
9401
/*
9402
* Iterate over each probe, and call the Framework-to-Provider API function
9403
* denoted by offs.
9404
*/
9405
static void
9406
dtrace_probe_foreach(uintptr_t offs)
9407
{
9408
dtrace_provider_t *prov;
9409
void (*func)(void *, dtrace_id_t, void *);
9410
dtrace_probe_t *probe;
9411
dtrace_icookie_t cookie;
9412
int i;
9413
9414
/*
9415
* We disable interrupts to walk through the probe array. This is
9416
* safe -- the dtrace_sync() in dtrace_unregister() assures that we
9417
* won't see stale data.
9418
*/
9419
cookie = dtrace_interrupt_disable();
9420
9421
for (i = 0; i < dtrace_nprobes; i++) {
9422
if ((probe = dtrace_probes[i]) == NULL)
9423
continue;
9424
9425
if (probe->dtpr_ecb == NULL) {
9426
/*
9427
* This probe isn't enabled -- don't call the function.
9428
*/
9429
continue;
9430
}
9431
9432
prov = probe->dtpr_provider;
9433
func = *((void(**)(void *, dtrace_id_t, void *))
9434
((uintptr_t)&prov->dtpv_pops + offs));
9435
9436
func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
9437
}
9438
9439
dtrace_interrupt_enable(cookie);
9440
}
9441
#endif
9442
9443
static int
9444
dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
9445
{
9446
dtrace_probekey_t pkey;
9447
uint32_t priv;
9448
uid_t uid;
9449
zoneid_t zoneid;
9450
9451
ASSERT(MUTEX_HELD(&dtrace_lock));
9452
dtrace_ecb_create_cache = NULL;
9453
9454
if (desc == NULL) {
9455
/*
9456
* If we're passed a NULL description, we're being asked to
9457
* create an ECB with a NULL probe.
9458
*/
9459
(void) dtrace_ecb_create_enable(NULL, enab);
9460
return (0);
9461
}
9462
9463
dtrace_probekey(desc, &pkey);
9464
dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
9465
&priv, &uid, &zoneid);
9466
9467
return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
9468
enab));
9469
}
9470
9471
/*
9472
* DTrace Helper Provider Functions
9473
*/
9474
static void
9475
dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
9476
{
9477
attr->dtat_name = DOF_ATTR_NAME(dofattr);
9478
attr->dtat_data = DOF_ATTR_DATA(dofattr);
9479
attr->dtat_class = DOF_ATTR_CLASS(dofattr);
9480
}
9481
9482
static void
9483
dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
9484
const dof_provider_t *dofprov, char *strtab)
9485
{
9486
hprov->dthpv_provname = strtab + dofprov->dofpv_name;
9487
dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
9488
dofprov->dofpv_provattr);
9489
dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
9490
dofprov->dofpv_modattr);
9491
dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
9492
dofprov->dofpv_funcattr);
9493
dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
9494
dofprov->dofpv_nameattr);
9495
dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
9496
dofprov->dofpv_argsattr);
9497
}
9498
9499
static void
9500
dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
9501
{
9502
uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9503
dof_hdr_t *dof = (dof_hdr_t *)daddr;
9504
dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
9505
dof_provider_t *provider;
9506
dof_probe_t *probe;
9507
uint32_t *off, *enoff;
9508
uint8_t *arg;
9509
char *strtab;
9510
uint_t i, nprobes;
9511
dtrace_helper_provdesc_t dhpv;
9512
dtrace_helper_probedesc_t dhpb;
9513
dtrace_meta_t *meta = dtrace_meta_pid;
9514
dtrace_mops_t *mops = &meta->dtm_mops;
9515
void *parg;
9516
9517
provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
9518
str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9519
provider->dofpv_strtab * dof->dofh_secsize);
9520
prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9521
provider->dofpv_probes * dof->dofh_secsize);
9522
arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9523
provider->dofpv_prargs * dof->dofh_secsize);
9524
off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9525
provider->dofpv_proffs * dof->dofh_secsize);
9526
9527
strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
9528
off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
9529
arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
9530
enoff = NULL;
9531
9532
/*
9533
* See dtrace_helper_provider_validate().
9534
*/
9535
if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
9536
provider->dofpv_prenoffs != DOF_SECT_NONE) {
9537
enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9538
provider->dofpv_prenoffs * dof->dofh_secsize);
9539
enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
9540
}
9541
9542
nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
9543
9544
/*
9545
* Create the provider.
9546
*/
9547
dtrace_dofprov2hprov(&dhpv, provider, strtab);
9548
9549
if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
9550
return;
9551
9552
meta->dtm_count++;
9553
9554
/*
9555
* Create the probes.
9556
*/
9557
for (i = 0; i < nprobes; i++) {
9558
probe = (dof_probe_t *)(uintptr_t)(daddr +
9559
prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
9560
9561
/* See the check in dtrace_helper_provider_validate(). */
9562
if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN)
9563
continue;
9564
9565
dhpb.dthpb_mod = dhp->dofhp_mod;
9566
dhpb.dthpb_func = strtab + probe->dofpr_func;
9567
dhpb.dthpb_name = strtab + probe->dofpr_name;
9568
dhpb.dthpb_base = probe->dofpr_addr;
9569
dhpb.dthpb_offs = off + probe->dofpr_offidx;
9570
dhpb.dthpb_noffs = probe->dofpr_noffs;
9571
if (enoff != NULL) {
9572
dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
9573
dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
9574
} else {
9575
dhpb.dthpb_enoffs = NULL;
9576
dhpb.dthpb_nenoffs = 0;
9577
}
9578
dhpb.dthpb_args = arg + probe->dofpr_argidx;
9579
dhpb.dthpb_nargc = probe->dofpr_nargc;
9580
dhpb.dthpb_xargc = probe->dofpr_xargc;
9581
dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
9582
dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
9583
9584
mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
9585
}
9586
}
9587
9588
static void
9589
dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
9590
{
9591
uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9592
dof_hdr_t *dof = (dof_hdr_t *)daddr;
9593
int i;
9594
9595
ASSERT(MUTEX_HELD(&dtrace_meta_lock));
9596
9597
for (i = 0; i < dof->dofh_secnum; i++) {
9598
dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
9599
dof->dofh_secoff + i * dof->dofh_secsize);
9600
9601
if (sec->dofs_type != DOF_SECT_PROVIDER)
9602
continue;
9603
9604
dtrace_helper_provide_one(dhp, sec, pid);
9605
}
9606
9607
/*
9608
* We may have just created probes, so we must now rematch against
9609
* any retained enablings. Note that this call will acquire both
9610
* cpu_lock and dtrace_lock; the fact that we are holding
9611
* dtrace_meta_lock now is what defines the ordering with respect to
9612
* these three locks.
9613
*/
9614
dtrace_enabling_matchall();
9615
}
9616
9617
static void
9618
dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
9619
{
9620
uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9621
dof_hdr_t *dof = (dof_hdr_t *)daddr;
9622
dof_sec_t *str_sec;
9623
dof_provider_t *provider;
9624
char *strtab;
9625
dtrace_helper_provdesc_t dhpv;
9626
dtrace_meta_t *meta = dtrace_meta_pid;
9627
dtrace_mops_t *mops = &meta->dtm_mops;
9628
9629
provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
9630
str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
9631
provider->dofpv_strtab * dof->dofh_secsize);
9632
9633
strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
9634
9635
/*
9636
* Create the provider.
9637
*/
9638
dtrace_dofprov2hprov(&dhpv, provider, strtab);
9639
9640
mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
9641
9642
meta->dtm_count--;
9643
}
9644
9645
static void
9646
dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
9647
{
9648
uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
9649
dof_hdr_t *dof = (dof_hdr_t *)daddr;
9650
int i;
9651
9652
ASSERT(MUTEX_HELD(&dtrace_meta_lock));
9653
9654
for (i = 0; i < dof->dofh_secnum; i++) {
9655
dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
9656
dof->dofh_secoff + i * dof->dofh_secsize);
9657
9658
if (sec->dofs_type != DOF_SECT_PROVIDER)
9659
continue;
9660
9661
dtrace_helper_provider_remove_one(dhp, sec, pid);
9662
}
9663
}
9664
9665
/*
9666
* DTrace Meta Provider-to-Framework API Functions
9667
*
9668
* These functions implement the Meta Provider-to-Framework API, as described
9669
* in <sys/dtrace.h>.
9670
*/
9671
int
9672
dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
9673
dtrace_meta_provider_id_t *idp)
9674
{
9675
dtrace_meta_t *meta;
9676
dtrace_helpers_t *help, *next;
9677
int i;
9678
9679
*idp = DTRACE_METAPROVNONE;
9680
9681
/*
9682
* We strictly don't need the name, but we hold onto it for
9683
* debuggability. All hail error queues!
9684
*/
9685
if (name == NULL) {
9686
cmn_err(CE_WARN, "failed to register meta-provider: "
9687
"invalid name");
9688
return (EINVAL);
9689
}
9690
9691
if (mops == NULL ||
9692
mops->dtms_create_probe == NULL ||
9693
mops->dtms_provide_pid == NULL ||
9694
mops->dtms_remove_pid == NULL) {
9695
cmn_err(CE_WARN, "failed to register meta-register %s: "
9696
"invalid ops", name);
9697
return (EINVAL);
9698
}
9699
9700
meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
9701
meta->dtm_mops = *mops;
9702
meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
9703
(void) strcpy(meta->dtm_name, name);
9704
meta->dtm_arg = arg;
9705
9706
mutex_enter(&dtrace_meta_lock);
9707
mutex_enter(&dtrace_lock);
9708
9709
if (dtrace_meta_pid != NULL) {
9710
mutex_exit(&dtrace_lock);
9711
mutex_exit(&dtrace_meta_lock);
9712
cmn_err(CE_WARN, "failed to register meta-register %s: "
9713
"user-land meta-provider exists", name);
9714
kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
9715
kmem_free(meta, sizeof (dtrace_meta_t));
9716
return (EINVAL);
9717
}
9718
9719
dtrace_meta_pid = meta;
9720
*idp = (dtrace_meta_provider_id_t)meta;
9721
9722
/*
9723
* If there are providers and probes ready to go, pass them
9724
* off to the new meta provider now.
9725
*/
9726
9727
help = dtrace_deferred_pid;
9728
dtrace_deferred_pid = NULL;
9729
9730
mutex_exit(&dtrace_lock);
9731
9732
while (help != NULL) {
9733
for (i = 0; i < help->dthps_nprovs; i++) {
9734
dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
9735
help->dthps_pid);
9736
}
9737
9738
next = help->dthps_next;
9739
help->dthps_next = NULL;
9740
help->dthps_prev = NULL;
9741
help->dthps_deferred = 0;
9742
help = next;
9743
}
9744
9745
mutex_exit(&dtrace_meta_lock);
9746
9747
return (0);
9748
}
9749
9750
int
9751
dtrace_meta_unregister(dtrace_meta_provider_id_t id)
9752
{
9753
dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
9754
9755
mutex_enter(&dtrace_meta_lock);
9756
mutex_enter(&dtrace_lock);
9757
9758
if (old == dtrace_meta_pid) {
9759
pp = &dtrace_meta_pid;
9760
} else {
9761
panic("attempt to unregister non-existent "
9762
"dtrace meta-provider %p\n", (void *)old);
9763
}
9764
9765
if (old->dtm_count != 0) {
9766
mutex_exit(&dtrace_lock);
9767
mutex_exit(&dtrace_meta_lock);
9768
return (EBUSY);
9769
}
9770
9771
*pp = NULL;
9772
9773
mutex_exit(&dtrace_lock);
9774
mutex_exit(&dtrace_meta_lock);
9775
9776
kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
9777
kmem_free(old, sizeof (dtrace_meta_t));
9778
9779
return (0);
9780
}
9781
9782
9783
/*
9784
* DTrace DIF Object Functions
9785
*/
9786
static int
9787
dtrace_difo_err(uint_t pc, const char *format, ...)
9788
{
9789
if (dtrace_err_verbose) {
9790
va_list alist;
9791
9792
(void) uprintf("dtrace DIF object error: [%u]: ", pc);
9793
va_start(alist, format);
9794
(void) vuprintf(format, alist);
9795
va_end(alist);
9796
}
9797
9798
#ifdef DTRACE_ERRDEBUG
9799
dtrace_errdebug(format);
9800
#endif
9801
return (1);
9802
}
9803
9804
/*
9805
* Validate a DTrace DIF object by checking the IR instructions. The following
9806
* rules are currently enforced by dtrace_difo_validate():
9807
*
9808
* 1. Each instruction must have a valid opcode
9809
* 2. Each register, string, variable, or subroutine reference must be valid
9810
* 3. No instruction can modify register %r0 (must be zero)
9811
* 4. All instruction reserved bits must be set to zero
9812
* 5. The last instruction must be a "ret" instruction
9813
* 6. All branch targets must reference a valid instruction _after_ the branch
9814
*/
9815
static int
9816
dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
9817
cred_t *cr)
9818
{
9819
int err = 0, i;
9820
int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
9821
int kcheckload;
9822
uint_t pc;
9823
int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
9824
9825
kcheckload = cr == NULL ||
9826
(vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
9827
9828
dp->dtdo_destructive = 0;
9829
9830
for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
9831
dif_instr_t instr = dp->dtdo_buf[pc];
9832
9833
uint_t r1 = DIF_INSTR_R1(instr);
9834
uint_t r2 = DIF_INSTR_R2(instr);
9835
uint_t rd = DIF_INSTR_RD(instr);
9836
uint_t rs = DIF_INSTR_RS(instr);
9837
uint_t label = DIF_INSTR_LABEL(instr);
9838
uint_t v = DIF_INSTR_VAR(instr);
9839
uint_t subr = DIF_INSTR_SUBR(instr);
9840
uint_t type = DIF_INSTR_TYPE(instr);
9841
uint_t op = DIF_INSTR_OP(instr);
9842
9843
switch (op) {
9844
case DIF_OP_OR:
9845
case DIF_OP_XOR:
9846
case DIF_OP_AND:
9847
case DIF_OP_SLL:
9848
case DIF_OP_SRL:
9849
case DIF_OP_SRA:
9850
case DIF_OP_SUB:
9851
case DIF_OP_ADD:
9852
case DIF_OP_MUL:
9853
case DIF_OP_SDIV:
9854
case DIF_OP_UDIV:
9855
case DIF_OP_SREM:
9856
case DIF_OP_UREM:
9857
case DIF_OP_COPYS:
9858
if (r1 >= nregs)
9859
err += efunc(pc, "invalid register %u\n", r1);
9860
if (r2 >= nregs)
9861
err += efunc(pc, "invalid register %u\n", r2);
9862
if (rd >= nregs)
9863
err += efunc(pc, "invalid register %u\n", rd);
9864
if (rd == 0)
9865
err += efunc(pc, "cannot write to %%r0\n");
9866
break;
9867
case DIF_OP_NOT:
9868
case DIF_OP_MOV:
9869
case DIF_OP_ALLOCS:
9870
if (r1 >= nregs)
9871
err += efunc(pc, "invalid register %u\n", r1);
9872
if (r2 != 0)
9873
err += efunc(pc, "non-zero reserved bits\n");
9874
if (rd >= nregs)
9875
err += efunc(pc, "invalid register %u\n", rd);
9876
if (rd == 0)
9877
err += efunc(pc, "cannot write to %%r0\n");
9878
break;
9879
case DIF_OP_LDSB:
9880
case DIF_OP_LDSH:
9881
case DIF_OP_LDSW:
9882
case DIF_OP_LDUB:
9883
case DIF_OP_LDUH:
9884
case DIF_OP_LDUW:
9885
case DIF_OP_LDX:
9886
if (r1 >= nregs)
9887
err += efunc(pc, "invalid register %u\n", r1);
9888
if (r2 != 0)
9889
err += efunc(pc, "non-zero reserved bits\n");
9890
if (rd >= nregs)
9891
err += efunc(pc, "invalid register %u\n", rd);
9892
if (rd == 0)
9893
err += efunc(pc, "cannot write to %%r0\n");
9894
if (kcheckload)
9895
dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
9896
DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
9897
break;
9898
case DIF_OP_RLDSB:
9899
case DIF_OP_RLDSH:
9900
case DIF_OP_RLDSW:
9901
case DIF_OP_RLDUB:
9902
case DIF_OP_RLDUH:
9903
case DIF_OP_RLDUW:
9904
case DIF_OP_RLDX:
9905
if (r1 >= nregs)
9906
err += efunc(pc, "invalid register %u\n", r1);
9907
if (r2 != 0)
9908
err += efunc(pc, "non-zero reserved bits\n");
9909
if (rd >= nregs)
9910
err += efunc(pc, "invalid register %u\n", rd);
9911
if (rd == 0)
9912
err += efunc(pc, "cannot write to %%r0\n");
9913
break;
9914
case DIF_OP_ULDSB:
9915
case DIF_OP_ULDSH:
9916
case DIF_OP_ULDSW:
9917
case DIF_OP_ULDUB:
9918
case DIF_OP_ULDUH:
9919
case DIF_OP_ULDUW:
9920
case DIF_OP_ULDX:
9921
if (r1 >= nregs)
9922
err += efunc(pc, "invalid register %u\n", r1);
9923
if (r2 != 0)
9924
err += efunc(pc, "non-zero reserved bits\n");
9925
if (rd >= nregs)
9926
err += efunc(pc, "invalid register %u\n", rd);
9927
if (rd == 0)
9928
err += efunc(pc, "cannot write to %%r0\n");
9929
break;
9930
case DIF_OP_STB:
9931
case DIF_OP_STH:
9932
case DIF_OP_STW:
9933
case DIF_OP_STX:
9934
if (r1 >= nregs)
9935
err += efunc(pc, "invalid register %u\n", r1);
9936
if (r2 != 0)
9937
err += efunc(pc, "non-zero reserved bits\n");
9938
if (rd >= nregs)
9939
err += efunc(pc, "invalid register %u\n", rd);
9940
if (rd == 0)
9941
err += efunc(pc, "cannot write to 0 address\n");
9942
break;
9943
case DIF_OP_CMP:
9944
case DIF_OP_SCMP:
9945
if (r1 >= nregs)
9946
err += efunc(pc, "invalid register %u\n", r1);
9947
if (r2 >= nregs)
9948
err += efunc(pc, "invalid register %u\n", r2);
9949
if (rd != 0)
9950
err += efunc(pc, "non-zero reserved bits\n");
9951
break;
9952
case DIF_OP_TST:
9953
if (r1 >= nregs)
9954
err += efunc(pc, "invalid register %u\n", r1);
9955
if (r2 != 0 || rd != 0)
9956
err += efunc(pc, "non-zero reserved bits\n");
9957
break;
9958
case DIF_OP_BA:
9959
case DIF_OP_BE:
9960
case DIF_OP_BNE:
9961
case DIF_OP_BG:
9962
case DIF_OP_BGU:
9963
case DIF_OP_BGE:
9964
case DIF_OP_BGEU:
9965
case DIF_OP_BL:
9966
case DIF_OP_BLU:
9967
case DIF_OP_BLE:
9968
case DIF_OP_BLEU:
9969
if (label >= dp->dtdo_len) {
9970
err += efunc(pc, "invalid branch target %u\n",
9971
label);
9972
}
9973
if (label <= pc) {
9974
err += efunc(pc, "backward branch to %u\n",
9975
label);
9976
}
9977
break;
9978
case DIF_OP_RET:
9979
if (r1 != 0 || r2 != 0)
9980
err += efunc(pc, "non-zero reserved bits\n");
9981
if (rd >= nregs)
9982
err += efunc(pc, "invalid register %u\n", rd);
9983
break;
9984
case DIF_OP_NOP:
9985
case DIF_OP_POPTS:
9986
case DIF_OP_FLUSHTS:
9987
if (r1 != 0 || r2 != 0 || rd != 0)
9988
err += efunc(pc, "non-zero reserved bits\n");
9989
break;
9990
case DIF_OP_SETX:
9991
if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
9992
err += efunc(pc, "invalid integer ref %u\n",
9993
DIF_INSTR_INTEGER(instr));
9994
}
9995
if (rd >= nregs)
9996
err += efunc(pc, "invalid register %u\n", rd);
9997
if (rd == 0)
9998
err += efunc(pc, "cannot write to %%r0\n");
9999
break;
10000
case DIF_OP_SETS:
10001
if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
10002
err += efunc(pc, "invalid string ref %u\n",
10003
DIF_INSTR_STRING(instr));
10004
}
10005
if (rd >= nregs)
10006
err += efunc(pc, "invalid register %u\n", rd);
10007
if (rd == 0)
10008
err += efunc(pc, "cannot write to %%r0\n");
10009
break;
10010
case DIF_OP_LDGA:
10011
case DIF_OP_LDTA:
10012
if (r1 > DIF_VAR_ARRAY_MAX)
10013
err += efunc(pc, "invalid array %u\n", r1);
10014
if (r2 >= nregs)
10015
err += efunc(pc, "invalid register %u\n", r2);
10016
if (rd >= nregs)
10017
err += efunc(pc, "invalid register %u\n", rd);
10018
if (rd == 0)
10019
err += efunc(pc, "cannot write to %%r0\n");
10020
break;
10021
case DIF_OP_LDGS:
10022
case DIF_OP_LDTS:
10023
case DIF_OP_LDLS:
10024
case DIF_OP_LDGAA:
10025
case DIF_OP_LDTAA:
10026
if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
10027
err += efunc(pc, "invalid variable %u\n", v);
10028
if (rd >= nregs)
10029
err += efunc(pc, "invalid register %u\n", rd);
10030
if (rd == 0)
10031
err += efunc(pc, "cannot write to %%r0\n");
10032
break;
10033
case DIF_OP_STGS:
10034
case DIF_OP_STTS:
10035
case DIF_OP_STLS:
10036
case DIF_OP_STGAA:
10037
case DIF_OP_STTAA:
10038
if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
10039
err += efunc(pc, "invalid variable %u\n", v);
10040
if (rs >= nregs)
10041
err += efunc(pc, "invalid register %u\n", rd);
10042
break;
10043
case DIF_OP_CALL:
10044
if (subr > DIF_SUBR_MAX)
10045
err += efunc(pc, "invalid subr %u\n", subr);
10046
if (rd >= nregs)
10047
err += efunc(pc, "invalid register %u\n", rd);
10048
if (rd == 0)
10049
err += efunc(pc, "cannot write to %%r0\n");
10050
10051
if (subr == DIF_SUBR_COPYOUT ||
10052
subr == DIF_SUBR_COPYOUTSTR) {
10053
dp->dtdo_destructive = 1;
10054
}
10055
10056
if (subr == DIF_SUBR_GETF) {
10057
#ifdef __FreeBSD__
10058
err += efunc(pc, "getf() not supported");
10059
#else
10060
/*
10061
* If we have a getf() we need to record that
10062
* in our state. Note that our state can be
10063
* NULL if this is a helper -- but in that
10064
* case, the call to getf() is itself illegal,
10065
* and will be caught (slightly later) when
10066
* the helper is validated.
10067
*/
10068
if (vstate->dtvs_state != NULL)
10069
vstate->dtvs_state->dts_getf++;
10070
#endif
10071
}
10072
10073
break;
10074
case DIF_OP_PUSHTR:
10075
if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
10076
err += efunc(pc, "invalid ref type %u\n", type);
10077
if (r2 >= nregs)
10078
err += efunc(pc, "invalid register %u\n", r2);
10079
if (rs >= nregs)
10080
err += efunc(pc, "invalid register %u\n", rs);
10081
break;
10082
case DIF_OP_PUSHTV:
10083
if (type != DIF_TYPE_CTF)
10084
err += efunc(pc, "invalid val type %u\n", type);
10085
if (r2 >= nregs)
10086
err += efunc(pc, "invalid register %u\n", r2);
10087
if (rs >= nregs)
10088
err += efunc(pc, "invalid register %u\n", rs);
10089
break;
10090
default:
10091
err += efunc(pc, "invalid opcode %u\n",
10092
DIF_INSTR_OP(instr));
10093
}
10094
}
10095
10096
if (dp->dtdo_len != 0 &&
10097
DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
10098
err += efunc(dp->dtdo_len - 1,
10099
"expected 'ret' as last DIF instruction\n");
10100
}
10101
10102
if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
10103
/*
10104
* If we're not returning by reference, the size must be either
10105
* 0 or the size of one of the base types.
10106
*/
10107
switch (dp->dtdo_rtype.dtdt_size) {
10108
case 0:
10109
case sizeof (uint8_t):
10110
case sizeof (uint16_t):
10111
case sizeof (uint32_t):
10112
case sizeof (uint64_t):
10113
break;
10114
10115
default:
10116
err += efunc(dp->dtdo_len - 1, "bad return size\n");
10117
}
10118
}
10119
10120
for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
10121
dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
10122
dtrace_diftype_t *vt, *et;
10123
uint_t id, ndx;
10124
10125
if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
10126
v->dtdv_scope != DIFV_SCOPE_THREAD &&
10127
v->dtdv_scope != DIFV_SCOPE_LOCAL) {
10128
err += efunc(i, "unrecognized variable scope %d\n",
10129
v->dtdv_scope);
10130
break;
10131
}
10132
10133
if (v->dtdv_kind != DIFV_KIND_ARRAY &&
10134
v->dtdv_kind != DIFV_KIND_SCALAR) {
10135
err += efunc(i, "unrecognized variable type %d\n",
10136
v->dtdv_kind);
10137
break;
10138
}
10139
10140
if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
10141
err += efunc(i, "%d exceeds variable id limit\n", id);
10142
break;
10143
}
10144
10145
if (id < DIF_VAR_OTHER_UBASE)
10146
continue;
10147
10148
/*
10149
* For user-defined variables, we need to check that this
10150
* definition is identical to any previous definition that we
10151
* encountered.
10152
*/
10153
ndx = id - DIF_VAR_OTHER_UBASE;
10154
10155
switch (v->dtdv_scope) {
10156
case DIFV_SCOPE_GLOBAL:
10157
if (maxglobal == -1 || ndx > maxglobal)
10158
maxglobal = ndx;
10159
10160
if (ndx < vstate->dtvs_nglobals) {
10161
dtrace_statvar_t *svar;
10162
10163
if ((svar = vstate->dtvs_globals[ndx]) != NULL)
10164
existing = &svar->dtsv_var;
10165
}
10166
10167
break;
10168
10169
case DIFV_SCOPE_THREAD:
10170
if (maxtlocal == -1 || ndx > maxtlocal)
10171
maxtlocal = ndx;
10172
10173
if (ndx < vstate->dtvs_ntlocals)
10174
existing = &vstate->dtvs_tlocals[ndx];
10175
break;
10176
10177
case DIFV_SCOPE_LOCAL:
10178
if (maxlocal == -1 || ndx > maxlocal)
10179
maxlocal = ndx;
10180
10181
if (ndx < vstate->dtvs_nlocals) {
10182
dtrace_statvar_t *svar;
10183
10184
if ((svar = vstate->dtvs_locals[ndx]) != NULL)
10185
existing = &svar->dtsv_var;
10186
}
10187
10188
break;
10189
}
10190
10191
vt = &v->dtdv_type;
10192
10193
if (vt->dtdt_flags & DIF_TF_BYREF) {
10194
if (vt->dtdt_size == 0) {
10195
err += efunc(i, "zero-sized variable\n");
10196
break;
10197
}
10198
10199
if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
10200
v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
10201
vt->dtdt_size > dtrace_statvar_maxsize) {
10202
err += efunc(i, "oversized by-ref static\n");
10203
break;
10204
}
10205
}
10206
10207
if (existing == NULL || existing->dtdv_id == 0)
10208
continue;
10209
10210
ASSERT(existing->dtdv_id == v->dtdv_id);
10211
ASSERT(existing->dtdv_scope == v->dtdv_scope);
10212
10213
if (existing->dtdv_kind != v->dtdv_kind)
10214
err += efunc(i, "%d changed variable kind\n", id);
10215
10216
et = &existing->dtdv_type;
10217
10218
if (vt->dtdt_flags != et->dtdt_flags) {
10219
err += efunc(i, "%d changed variable type flags\n", id);
10220
break;
10221
}
10222
10223
if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
10224
err += efunc(i, "%d changed variable type size\n", id);
10225
break;
10226
}
10227
}
10228
10229
for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
10230
dif_instr_t instr = dp->dtdo_buf[pc];
10231
10232
uint_t v = DIF_INSTR_VAR(instr);
10233
uint_t op = DIF_INSTR_OP(instr);
10234
10235
switch (op) {
10236
case DIF_OP_LDGS:
10237
case DIF_OP_LDGAA:
10238
case DIF_OP_STGS:
10239
case DIF_OP_STGAA:
10240
if (v > DIF_VAR_OTHER_UBASE + maxglobal)
10241
err += efunc(pc, "invalid variable %u\n", v);
10242
break;
10243
case DIF_OP_LDTS:
10244
case DIF_OP_LDTAA:
10245
case DIF_OP_STTS:
10246
case DIF_OP_STTAA:
10247
if (v > DIF_VAR_OTHER_UBASE + maxtlocal)
10248
err += efunc(pc, "invalid variable %u\n", v);
10249
break;
10250
case DIF_OP_LDLS:
10251
case DIF_OP_STLS:
10252
if (v > DIF_VAR_OTHER_UBASE + maxlocal)
10253
err += efunc(pc, "invalid variable %u\n", v);
10254
break;
10255
default:
10256
break;
10257
}
10258
}
10259
10260
return (err);
10261
}
10262
10263
/*
10264
* Validate a DTrace DIF object that it is to be used as a helper. Helpers
10265
* are much more constrained than normal DIFOs. Specifically, they may
10266
* not:
10267
*
10268
* 1. Make calls to subroutines other than copyin(), copyinstr() or
10269
* miscellaneous string routines
10270
* 2. Access DTrace variables other than the args[] array, and the
10271
* curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
10272
* 3. Have thread-local variables.
10273
* 4. Have dynamic variables.
10274
*/
10275
static int
10276
dtrace_difo_validate_helper(dtrace_difo_t *dp)
10277
{
10278
int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
10279
int err = 0;
10280
uint_t pc;
10281
10282
for (pc = 0; pc < dp->dtdo_len; pc++) {
10283
dif_instr_t instr = dp->dtdo_buf[pc];
10284
10285
uint_t v = DIF_INSTR_VAR(instr);
10286
uint_t subr = DIF_INSTR_SUBR(instr);
10287
uint_t op = DIF_INSTR_OP(instr);
10288
10289
switch (op) {
10290
case DIF_OP_OR:
10291
case DIF_OP_XOR:
10292
case DIF_OP_AND:
10293
case DIF_OP_SLL:
10294
case DIF_OP_SRL:
10295
case DIF_OP_SRA:
10296
case DIF_OP_SUB:
10297
case DIF_OP_ADD:
10298
case DIF_OP_MUL:
10299
case DIF_OP_SDIV:
10300
case DIF_OP_UDIV:
10301
case DIF_OP_SREM:
10302
case DIF_OP_UREM:
10303
case DIF_OP_COPYS:
10304
case DIF_OP_NOT:
10305
case DIF_OP_MOV:
10306
case DIF_OP_RLDSB:
10307
case DIF_OP_RLDSH:
10308
case DIF_OP_RLDSW:
10309
case DIF_OP_RLDUB:
10310
case DIF_OP_RLDUH:
10311
case DIF_OP_RLDUW:
10312
case DIF_OP_RLDX:
10313
case DIF_OP_ULDSB:
10314
case DIF_OP_ULDSH:
10315
case DIF_OP_ULDSW:
10316
case DIF_OP_ULDUB:
10317
case DIF_OP_ULDUH:
10318
case DIF_OP_ULDUW:
10319
case DIF_OP_ULDX:
10320
case DIF_OP_STB:
10321
case DIF_OP_STH:
10322
case DIF_OP_STW:
10323
case DIF_OP_STX:
10324
case DIF_OP_ALLOCS:
10325
case DIF_OP_CMP:
10326
case DIF_OP_SCMP:
10327
case DIF_OP_TST:
10328
case DIF_OP_BA:
10329
case DIF_OP_BE:
10330
case DIF_OP_BNE:
10331
case DIF_OP_BG:
10332
case DIF_OP_BGU:
10333
case DIF_OP_BGE:
10334
case DIF_OP_BGEU:
10335
case DIF_OP_BL:
10336
case DIF_OP_BLU:
10337
case DIF_OP_BLE:
10338
case DIF_OP_BLEU:
10339
case DIF_OP_RET:
10340
case DIF_OP_NOP:
10341
case DIF_OP_POPTS:
10342
case DIF_OP_FLUSHTS:
10343
case DIF_OP_SETX:
10344
case DIF_OP_SETS:
10345
case DIF_OP_LDGA:
10346
case DIF_OP_LDLS:
10347
case DIF_OP_STGS:
10348
case DIF_OP_STLS:
10349
case DIF_OP_PUSHTR:
10350
case DIF_OP_PUSHTV:
10351
break;
10352
10353
case DIF_OP_LDGS:
10354
if (v >= DIF_VAR_OTHER_UBASE)
10355
break;
10356
10357
if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
10358
break;
10359
10360
if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
10361
v == DIF_VAR_PPID || v == DIF_VAR_TID ||
10362
v == DIF_VAR_EXECARGS ||
10363
v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
10364
v == DIF_VAR_UID || v == DIF_VAR_GID)
10365
break;
10366
10367
err += efunc(pc, "illegal variable %u\n", v);
10368
break;
10369
10370
case DIF_OP_LDTA:
10371
case DIF_OP_LDTS:
10372
case DIF_OP_LDGAA:
10373
case DIF_OP_LDTAA:
10374
err += efunc(pc, "illegal dynamic variable load\n");
10375
break;
10376
10377
case DIF_OP_STTS:
10378
case DIF_OP_STGAA:
10379
case DIF_OP_STTAA:
10380
err += efunc(pc, "illegal dynamic variable store\n");
10381
break;
10382
10383
case DIF_OP_CALL:
10384
if (subr == DIF_SUBR_ALLOCA ||
10385
subr == DIF_SUBR_BCOPY ||
10386
subr == DIF_SUBR_COPYIN ||
10387
subr == DIF_SUBR_COPYINTO ||
10388
subr == DIF_SUBR_COPYINSTR ||
10389
subr == DIF_SUBR_INDEX ||
10390
subr == DIF_SUBR_INET_NTOA ||
10391
subr == DIF_SUBR_INET_NTOA6 ||
10392
subr == DIF_SUBR_INET_NTOP ||
10393
subr == DIF_SUBR_JSON ||
10394
subr == DIF_SUBR_LLTOSTR ||
10395
subr == DIF_SUBR_STRTOLL ||
10396
subr == DIF_SUBR_RINDEX ||
10397
subr == DIF_SUBR_STRCHR ||
10398
subr == DIF_SUBR_STRJOIN ||
10399
subr == DIF_SUBR_STRRCHR ||
10400
subr == DIF_SUBR_STRSTR ||
10401
subr == DIF_SUBR_HTONS ||
10402
subr == DIF_SUBR_HTONL ||
10403
subr == DIF_SUBR_HTONLL ||
10404
subr == DIF_SUBR_NTOHS ||
10405
subr == DIF_SUBR_NTOHL ||
10406
subr == DIF_SUBR_NTOHLL ||
10407
subr == DIF_SUBR_MEMREF)
10408
break;
10409
#ifdef __FreeBSD__
10410
if (subr == DIF_SUBR_MEMSTR)
10411
break;
10412
#endif
10413
10414
err += efunc(pc, "invalid subr %u\n", subr);
10415
break;
10416
10417
default:
10418
err += efunc(pc, "invalid opcode %u\n",
10419
DIF_INSTR_OP(instr));
10420
}
10421
}
10422
10423
return (err);
10424
}
10425
10426
/*
10427
* Returns 1 if the expression in the DIF object can be cached on a per-thread
10428
* basis; 0 if not.
10429
*/
10430
static int
10431
dtrace_difo_cacheable(dtrace_difo_t *dp)
10432
{
10433
int i;
10434
10435
if (dp == NULL)
10436
return (0);
10437
10438
for (i = 0; i < dp->dtdo_varlen; i++) {
10439
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10440
10441
if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
10442
continue;
10443
10444
switch (v->dtdv_id) {
10445
case DIF_VAR_CURTHREAD:
10446
case DIF_VAR_PID:
10447
case DIF_VAR_TID:
10448
case DIF_VAR_EXECARGS:
10449
case DIF_VAR_EXECNAME:
10450
case DIF_VAR_ZONENAME:
10451
break;
10452
10453
default:
10454
return (0);
10455
}
10456
}
10457
10458
/*
10459
* This DIF object may be cacheable. Now we need to look for any
10460
* array loading instructions, any memory loading instructions, or
10461
* any stores to thread-local variables.
10462
*/
10463
for (i = 0; i < dp->dtdo_len; i++) {
10464
uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
10465
10466
if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
10467
(op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
10468
(op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
10469
op == DIF_OP_LDGA || op == DIF_OP_STTS)
10470
return (0);
10471
}
10472
10473
return (1);
10474
}
10475
10476
static void
10477
dtrace_difo_hold(dtrace_difo_t *dp)
10478
{
10479
int i;
10480
10481
ASSERT(MUTEX_HELD(&dtrace_lock));
10482
10483
dp->dtdo_refcnt++;
10484
ASSERT(dp->dtdo_refcnt != 0);
10485
10486
/*
10487
* We need to check this DIF object for references to the variable
10488
* DIF_VAR_VTIMESTAMP.
10489
*/
10490
for (i = 0; i < dp->dtdo_varlen; i++) {
10491
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10492
10493
if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
10494
continue;
10495
10496
if (dtrace_vtime_references++ == 0)
10497
dtrace_vtime_enable();
10498
}
10499
}
10500
10501
/*
10502
* This routine calculates the dynamic variable chunksize for a given DIF
10503
* object. The calculation is not fool-proof, and can probably be tricked by
10504
* malicious DIF -- but it works for all compiler-generated DIF. Because this
10505
* calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
10506
* if a dynamic variable size exceeds the chunksize.
10507
*/
10508
static void
10509
dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10510
{
10511
uint64_t sval = 0;
10512
dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
10513
const dif_instr_t *text = dp->dtdo_buf;
10514
uint_t pc, srd = 0;
10515
uint_t ttop = 0;
10516
size_t size, ksize;
10517
uint_t id, i;
10518
10519
for (pc = 0; pc < dp->dtdo_len; pc++) {
10520
dif_instr_t instr = text[pc];
10521
uint_t op = DIF_INSTR_OP(instr);
10522
uint_t rd = DIF_INSTR_RD(instr);
10523
uint_t r1 = DIF_INSTR_R1(instr);
10524
uint_t nkeys = 0;
10525
uchar_t scope = 0;
10526
10527
dtrace_key_t *key = tupregs;
10528
10529
switch (op) {
10530
case DIF_OP_SETX:
10531
sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
10532
srd = rd;
10533
continue;
10534
10535
case DIF_OP_STTS:
10536
key = &tupregs[DIF_DTR_NREGS];
10537
key[0].dttk_size = 0;
10538
key[1].dttk_size = 0;
10539
nkeys = 2;
10540
scope = DIFV_SCOPE_THREAD;
10541
break;
10542
10543
case DIF_OP_STGAA:
10544
case DIF_OP_STTAA:
10545
nkeys = ttop;
10546
10547
if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
10548
key[nkeys++].dttk_size = 0;
10549
10550
key[nkeys++].dttk_size = 0;
10551
10552
if (op == DIF_OP_STTAA) {
10553
scope = DIFV_SCOPE_THREAD;
10554
} else {
10555
scope = DIFV_SCOPE_GLOBAL;
10556
}
10557
10558
break;
10559
10560
case DIF_OP_PUSHTR:
10561
if (ttop == DIF_DTR_NREGS)
10562
return;
10563
10564
if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
10565
/*
10566
* If the register for the size of the "pushtr"
10567
* is %r0 (or the value is 0) and the type is
10568
* a string, we'll use the system-wide default
10569
* string size.
10570
*/
10571
tupregs[ttop++].dttk_size =
10572
dtrace_strsize_default;
10573
} else {
10574
if (srd == 0)
10575
return;
10576
10577
if (sval > LONG_MAX)
10578
return;
10579
10580
tupregs[ttop++].dttk_size = sval;
10581
}
10582
10583
break;
10584
10585
case DIF_OP_PUSHTV:
10586
if (ttop == DIF_DTR_NREGS)
10587
return;
10588
10589
tupregs[ttop++].dttk_size = 0;
10590
break;
10591
10592
case DIF_OP_FLUSHTS:
10593
ttop = 0;
10594
break;
10595
10596
case DIF_OP_POPTS:
10597
if (ttop != 0)
10598
ttop--;
10599
break;
10600
}
10601
10602
sval = 0;
10603
srd = 0;
10604
10605
if (nkeys == 0)
10606
continue;
10607
10608
/*
10609
* We have a dynamic variable allocation; calculate its size.
10610
*/
10611
for (ksize = 0, i = 0; i < nkeys; i++)
10612
ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
10613
10614
size = sizeof (dtrace_dynvar_t);
10615
size += sizeof (dtrace_key_t) * (nkeys - 1);
10616
size += ksize;
10617
10618
/*
10619
* Now we need to determine the size of the stored data.
10620
*/
10621
id = DIF_INSTR_VAR(instr);
10622
10623
for (i = 0; i < dp->dtdo_varlen; i++) {
10624
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10625
10626
if (v->dtdv_id == id && v->dtdv_scope == scope) {
10627
size += v->dtdv_type.dtdt_size;
10628
break;
10629
}
10630
}
10631
10632
if (i == dp->dtdo_varlen)
10633
return;
10634
10635
/*
10636
* We have the size. If this is larger than the chunk size
10637
* for our dynamic variable state, reset the chunk size.
10638
*/
10639
size = P2ROUNDUP(size, sizeof (uint64_t));
10640
10641
/*
10642
* Before setting the chunk size, check that we're not going
10643
* to set it to a negative value...
10644
*/
10645
if (size > LONG_MAX)
10646
return;
10647
10648
/*
10649
* ...and make certain that we didn't badly overflow.
10650
*/
10651
if (size < ksize || size < sizeof (dtrace_dynvar_t))
10652
return;
10653
10654
if (size > vstate->dtvs_dynvars.dtds_chunksize)
10655
vstate->dtvs_dynvars.dtds_chunksize = size;
10656
}
10657
}
10658
10659
static void
10660
dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10661
{
10662
int i, oldsvars, osz, nsz, otlocals, ntlocals;
10663
uint_t id;
10664
10665
ASSERT(MUTEX_HELD(&dtrace_lock));
10666
ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
10667
10668
for (i = 0; i < dp->dtdo_varlen; i++) {
10669
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10670
dtrace_statvar_t *svar, ***svarp = NULL;
10671
size_t dsize = 0;
10672
uint8_t scope = v->dtdv_scope;
10673
int *np = NULL;
10674
10675
if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
10676
continue;
10677
10678
id -= DIF_VAR_OTHER_UBASE;
10679
10680
switch (scope) {
10681
case DIFV_SCOPE_THREAD:
10682
while (id >= (otlocals = vstate->dtvs_ntlocals)) {
10683
dtrace_difv_t *tlocals;
10684
10685
if ((ntlocals = (otlocals << 1)) == 0)
10686
ntlocals = 1;
10687
10688
osz = otlocals * sizeof (dtrace_difv_t);
10689
nsz = ntlocals * sizeof (dtrace_difv_t);
10690
10691
tlocals = kmem_zalloc(nsz, KM_SLEEP);
10692
10693
if (osz != 0) {
10694
bcopy(vstate->dtvs_tlocals,
10695
tlocals, osz);
10696
kmem_free(vstate->dtvs_tlocals, osz);
10697
}
10698
10699
vstate->dtvs_tlocals = tlocals;
10700
vstate->dtvs_ntlocals = ntlocals;
10701
}
10702
10703
vstate->dtvs_tlocals[id] = *v;
10704
continue;
10705
10706
case DIFV_SCOPE_LOCAL:
10707
np = &vstate->dtvs_nlocals;
10708
svarp = &vstate->dtvs_locals;
10709
10710
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
10711
dsize = (mp_maxid + 1) *
10712
(v->dtdv_type.dtdt_size +
10713
sizeof (uint64_t));
10714
else
10715
dsize = (mp_maxid + 1) * sizeof (uint64_t);
10716
10717
break;
10718
10719
case DIFV_SCOPE_GLOBAL:
10720
np = &vstate->dtvs_nglobals;
10721
svarp = &vstate->dtvs_globals;
10722
10723
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
10724
dsize = v->dtdv_type.dtdt_size +
10725
sizeof (uint64_t);
10726
10727
break;
10728
10729
default:
10730
ASSERT(0);
10731
}
10732
10733
while (id >= (oldsvars = *np)) {
10734
dtrace_statvar_t **statics;
10735
int newsvars, oldsize, newsize;
10736
10737
if ((newsvars = (oldsvars << 1)) == 0)
10738
newsvars = 1;
10739
10740
oldsize = oldsvars * sizeof (dtrace_statvar_t *);
10741
newsize = newsvars * sizeof (dtrace_statvar_t *);
10742
10743
statics = kmem_zalloc(newsize, KM_SLEEP);
10744
10745
if (oldsize != 0) {
10746
bcopy(*svarp, statics, oldsize);
10747
kmem_free(*svarp, oldsize);
10748
}
10749
10750
*svarp = statics;
10751
*np = newsvars;
10752
}
10753
10754
if ((svar = (*svarp)[id]) == NULL) {
10755
svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
10756
svar->dtsv_var = *v;
10757
10758
if ((svar->dtsv_size = dsize) != 0) {
10759
svar->dtsv_data = (uint64_t)(uintptr_t)
10760
kmem_zalloc(dsize, KM_SLEEP);
10761
}
10762
10763
(*svarp)[id] = svar;
10764
}
10765
10766
svar->dtsv_refcnt++;
10767
}
10768
10769
dtrace_difo_chunksize(dp, vstate);
10770
dtrace_difo_hold(dp);
10771
}
10772
10773
static dtrace_difo_t *
10774
dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10775
{
10776
dtrace_difo_t *new;
10777
size_t sz;
10778
10779
ASSERT(dp->dtdo_buf != NULL);
10780
ASSERT(dp->dtdo_refcnt != 0);
10781
10782
new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
10783
10784
ASSERT(dp->dtdo_buf != NULL);
10785
sz = dp->dtdo_len * sizeof (dif_instr_t);
10786
new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
10787
bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
10788
new->dtdo_len = dp->dtdo_len;
10789
10790
if (dp->dtdo_strtab != NULL) {
10791
ASSERT(dp->dtdo_strlen != 0);
10792
new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
10793
bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
10794
new->dtdo_strlen = dp->dtdo_strlen;
10795
}
10796
10797
if (dp->dtdo_inttab != NULL) {
10798
ASSERT(dp->dtdo_intlen != 0);
10799
sz = dp->dtdo_intlen * sizeof (uint64_t);
10800
new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
10801
bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
10802
new->dtdo_intlen = dp->dtdo_intlen;
10803
}
10804
10805
if (dp->dtdo_vartab != NULL) {
10806
ASSERT(dp->dtdo_varlen != 0);
10807
sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
10808
new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
10809
bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
10810
new->dtdo_varlen = dp->dtdo_varlen;
10811
}
10812
10813
dtrace_difo_init(new, vstate);
10814
return (new);
10815
}
10816
10817
static void
10818
dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10819
{
10820
int i;
10821
10822
ASSERT(dp->dtdo_refcnt == 0);
10823
10824
for (i = 0; i < dp->dtdo_varlen; i++) {
10825
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10826
dtrace_statvar_t *svar, **svarp = NULL;
10827
uint_t id;
10828
uint8_t scope = v->dtdv_scope;
10829
int *np = NULL;
10830
10831
switch (scope) {
10832
case DIFV_SCOPE_THREAD:
10833
continue;
10834
10835
case DIFV_SCOPE_LOCAL:
10836
np = &vstate->dtvs_nlocals;
10837
svarp = vstate->dtvs_locals;
10838
break;
10839
10840
case DIFV_SCOPE_GLOBAL:
10841
np = &vstate->dtvs_nglobals;
10842
svarp = vstate->dtvs_globals;
10843
break;
10844
10845
default:
10846
ASSERT(0);
10847
}
10848
10849
if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
10850
continue;
10851
10852
id -= DIF_VAR_OTHER_UBASE;
10853
ASSERT(id < *np);
10854
10855
svar = svarp[id];
10856
ASSERT(svar != NULL);
10857
ASSERT(svar->dtsv_refcnt > 0);
10858
10859
if (--svar->dtsv_refcnt > 0)
10860
continue;
10861
10862
if (svar->dtsv_size != 0) {
10863
ASSERT(svar->dtsv_data != 0);
10864
kmem_free((void *)(uintptr_t)svar->dtsv_data,
10865
svar->dtsv_size);
10866
}
10867
10868
kmem_free(svar, sizeof (dtrace_statvar_t));
10869
svarp[id] = NULL;
10870
}
10871
10872
if (dp->dtdo_buf != NULL)
10873
kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
10874
if (dp->dtdo_inttab != NULL)
10875
kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
10876
if (dp->dtdo_strtab != NULL)
10877
kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
10878
if (dp->dtdo_vartab != NULL)
10879
kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
10880
10881
kmem_free(dp, sizeof (dtrace_difo_t));
10882
}
10883
10884
static void
10885
dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10886
{
10887
int i;
10888
10889
ASSERT(MUTEX_HELD(&dtrace_lock));
10890
ASSERT(dp->dtdo_refcnt != 0);
10891
10892
for (i = 0; i < dp->dtdo_varlen; i++) {
10893
dtrace_difv_t *v = &dp->dtdo_vartab[i];
10894
10895
if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
10896
continue;
10897
10898
ASSERT(dtrace_vtime_references > 0);
10899
if (--dtrace_vtime_references == 0)
10900
dtrace_vtime_disable();
10901
}
10902
10903
if (--dp->dtdo_refcnt == 0)
10904
dtrace_difo_destroy(dp, vstate);
10905
}
10906
10907
/*
10908
* DTrace Format Functions
10909
*/
10910
static uint16_t
10911
dtrace_format_add(dtrace_state_t *state, char *str)
10912
{
10913
char *fmt, **new;
10914
uint16_t ndx, len = strlen(str) + 1;
10915
10916
fmt = kmem_zalloc(len, KM_SLEEP);
10917
bcopy(str, fmt, len);
10918
10919
for (ndx = 0; ndx < state->dts_nformats; ndx++) {
10920
if (state->dts_formats[ndx] == NULL) {
10921
state->dts_formats[ndx] = fmt;
10922
return (ndx + 1);
10923
}
10924
}
10925
10926
if (state->dts_nformats == USHRT_MAX) {
10927
/*
10928
* This is only likely if a denial-of-service attack is being
10929
* attempted. As such, it's okay to fail silently here.
10930
*/
10931
kmem_free(fmt, len);
10932
return (0);
10933
}
10934
10935
/*
10936
* For simplicity, we always resize the formats array to be exactly the
10937
* number of formats.
10938
*/
10939
ndx = state->dts_nformats++;
10940
new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
10941
10942
if (state->dts_formats != NULL) {
10943
ASSERT(ndx != 0);
10944
bcopy(state->dts_formats, new, ndx * sizeof (char *));
10945
kmem_free(state->dts_formats, ndx * sizeof (char *));
10946
}
10947
10948
state->dts_formats = new;
10949
state->dts_formats[ndx] = fmt;
10950
10951
return (ndx + 1);
10952
}
10953
10954
static void
10955
dtrace_format_remove(dtrace_state_t *state, uint16_t format)
10956
{
10957
char *fmt;
10958
10959
ASSERT(state->dts_formats != NULL);
10960
ASSERT(format <= state->dts_nformats);
10961
ASSERT(state->dts_formats[format - 1] != NULL);
10962
10963
fmt = state->dts_formats[format - 1];
10964
kmem_free(fmt, strlen(fmt) + 1);
10965
state->dts_formats[format - 1] = NULL;
10966
}
10967
10968
static void
10969
dtrace_format_destroy(dtrace_state_t *state)
10970
{
10971
int i;
10972
10973
if (state->dts_nformats == 0) {
10974
ASSERT(state->dts_formats == NULL);
10975
return;
10976
}
10977
10978
ASSERT(state->dts_formats != NULL);
10979
10980
for (i = 0; i < state->dts_nformats; i++) {
10981
char *fmt = state->dts_formats[i];
10982
10983
if (fmt == NULL)
10984
continue;
10985
10986
kmem_free(fmt, strlen(fmt) + 1);
10987
}
10988
10989
kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
10990
state->dts_nformats = 0;
10991
state->dts_formats = NULL;
10992
}
10993
10994
/*
10995
* DTrace Predicate Functions
10996
*/
10997
static dtrace_predicate_t *
10998
dtrace_predicate_create(dtrace_difo_t *dp)
10999
{
11000
dtrace_predicate_t *pred;
11001
11002
ASSERT(MUTEX_HELD(&dtrace_lock));
11003
ASSERT(dp->dtdo_refcnt != 0);
11004
11005
pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
11006
pred->dtp_difo = dp;
11007
pred->dtp_refcnt = 1;
11008
11009
if (!dtrace_difo_cacheable(dp))
11010
return (pred);
11011
11012
if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
11013
/*
11014
* This is only theoretically possible -- we have had 2^32
11015
* cacheable predicates on this machine. We cannot allow any
11016
* more predicates to become cacheable: as unlikely as it is,
11017
* there may be a thread caching a (now stale) predicate cache
11018
* ID. (N.B.: the temptation is being successfully resisted to
11019
* have this cmn_err() "Holy shit -- we executed this code!")
11020
*/
11021
return (pred);
11022
}
11023
11024
pred->dtp_cacheid = dtrace_predcache_id++;
11025
11026
return (pred);
11027
}
11028
11029
static void
11030
dtrace_predicate_hold(dtrace_predicate_t *pred)
11031
{
11032
ASSERT(MUTEX_HELD(&dtrace_lock));
11033
ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
11034
ASSERT(pred->dtp_refcnt > 0);
11035
11036
pred->dtp_refcnt++;
11037
}
11038
11039
static void
11040
dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
11041
{
11042
dtrace_difo_t *dp = pred->dtp_difo;
11043
11044
ASSERT(MUTEX_HELD(&dtrace_lock));
11045
ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
11046
ASSERT(pred->dtp_refcnt > 0);
11047
11048
if (--pred->dtp_refcnt == 0) {
11049
dtrace_difo_release(pred->dtp_difo, vstate);
11050
kmem_free(pred, sizeof (dtrace_predicate_t));
11051
}
11052
}
11053
11054
/*
11055
* DTrace Action Description Functions
11056
*/
11057
static dtrace_actdesc_t *
11058
dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
11059
uint64_t uarg, uint64_t arg)
11060
{
11061
dtrace_actdesc_t *act;
11062
11063
#ifdef illumos
11064
ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
11065
arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
11066
#endif
11067
11068
act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
11069
act->dtad_kind = kind;
11070
act->dtad_ntuple = ntuple;
11071
act->dtad_uarg = uarg;
11072
act->dtad_arg = arg;
11073
act->dtad_refcnt = 1;
11074
11075
return (act);
11076
}
11077
11078
static void
11079
dtrace_actdesc_hold(dtrace_actdesc_t *act)
11080
{
11081
ASSERT(act->dtad_refcnt >= 1);
11082
act->dtad_refcnt++;
11083
}
11084
11085
static void
11086
dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
11087
{
11088
dtrace_actkind_t kind = act->dtad_kind;
11089
dtrace_difo_t *dp;
11090
11091
ASSERT(act->dtad_refcnt >= 1);
11092
11093
if (--act->dtad_refcnt != 0)
11094
return;
11095
11096
if ((dp = act->dtad_difo) != NULL)
11097
dtrace_difo_release(dp, vstate);
11098
11099
if (DTRACEACT_ISPRINTFLIKE(kind)) {
11100
char *str = (char *)(uintptr_t)act->dtad_arg;
11101
11102
#ifdef illumos
11103
ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
11104
(str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
11105
#endif
11106
11107
if (str != NULL)
11108
kmem_free(str, strlen(str) + 1);
11109
}
11110
11111
kmem_free(act, sizeof (dtrace_actdesc_t));
11112
}
11113
11114
/*
11115
* DTrace ECB Functions
11116
*/
11117
static dtrace_ecb_t *
11118
dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
11119
{
11120
dtrace_ecb_t *ecb;
11121
dtrace_epid_t epid;
11122
11123
ASSERT(MUTEX_HELD(&dtrace_lock));
11124
11125
ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
11126
ecb->dte_predicate = NULL;
11127
ecb->dte_probe = probe;
11128
11129
/*
11130
* The default size is the size of the default action: recording
11131
* the header.
11132
*/
11133
ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
11134
ecb->dte_alignment = sizeof (dtrace_epid_t);
11135
11136
epid = state->dts_epid++;
11137
11138
if (epid - 1 >= state->dts_necbs) {
11139
dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
11140
int necbs = state->dts_necbs << 1;
11141
11142
ASSERT(epid == state->dts_necbs + 1);
11143
11144
if (necbs == 0) {
11145
ASSERT(oecbs == NULL);
11146
necbs = 1;
11147
}
11148
11149
ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
11150
11151
if (oecbs != NULL)
11152
bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
11153
11154
dtrace_membar_producer();
11155
state->dts_ecbs = ecbs;
11156
11157
if (oecbs != NULL) {
11158
/*
11159
* If this state is active, we must dtrace_sync()
11160
* before we can free the old dts_ecbs array: we're
11161
* coming in hot, and there may be active ring
11162
* buffer processing (which indexes into the dts_ecbs
11163
* array) on another CPU.
11164
*/
11165
if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
11166
dtrace_sync();
11167
11168
kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
11169
}
11170
11171
dtrace_membar_producer();
11172
state->dts_necbs = necbs;
11173
}
11174
11175
ecb->dte_state = state;
11176
11177
ASSERT(state->dts_ecbs[epid - 1] == NULL);
11178
dtrace_membar_producer();
11179
state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
11180
11181
return (ecb);
11182
}
11183
11184
static void
11185
dtrace_ecb_enable(dtrace_ecb_t *ecb)
11186
{
11187
dtrace_probe_t *probe = ecb->dte_probe;
11188
11189
ASSERT(MUTEX_HELD(&cpu_lock));
11190
ASSERT(MUTEX_HELD(&dtrace_lock));
11191
ASSERT(ecb->dte_next == NULL);
11192
11193
if (probe == NULL) {
11194
/*
11195
* This is the NULL probe -- there's nothing to do.
11196
*/
11197
return;
11198
}
11199
11200
if (probe->dtpr_ecb == NULL) {
11201
dtrace_provider_t *prov = probe->dtpr_provider;
11202
11203
/*
11204
* We're the first ECB on this probe.
11205
*/
11206
probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
11207
11208
if (ecb->dte_predicate != NULL)
11209
probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
11210
11211
prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
11212
probe->dtpr_id, probe->dtpr_arg);
11213
} else {
11214
/*
11215
* This probe is already active. Swing the last pointer to
11216
* point to the new ECB, and issue a dtrace_sync() to assure
11217
* that all CPUs have seen the change.
11218
*/
11219
ASSERT(probe->dtpr_ecb_last != NULL);
11220
probe->dtpr_ecb_last->dte_next = ecb;
11221
probe->dtpr_ecb_last = ecb;
11222
probe->dtpr_predcache = 0;
11223
11224
dtrace_sync();
11225
}
11226
}
11227
11228
static int
11229
dtrace_ecb_resize(dtrace_ecb_t *ecb)
11230
{
11231
dtrace_action_t *act;
11232
uint32_t curneeded = UINT32_MAX;
11233
uint32_t aggbase = UINT32_MAX;
11234
11235
/*
11236
* If we record anything, we always record the dtrace_rechdr_t. (And
11237
* we always record it first.)
11238
*/
11239
ecb->dte_size = sizeof (dtrace_rechdr_t);
11240
ecb->dte_alignment = sizeof (dtrace_epid_t);
11241
11242
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
11243
dtrace_recdesc_t *rec = &act->dta_rec;
11244
ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
11245
11246
ecb->dte_alignment = MAX(ecb->dte_alignment,
11247
rec->dtrd_alignment);
11248
11249
if (DTRACEACT_ISAGG(act->dta_kind)) {
11250
dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
11251
11252
ASSERT(rec->dtrd_size != 0);
11253
ASSERT(agg->dtag_first != NULL);
11254
ASSERT(act->dta_prev->dta_intuple);
11255
ASSERT(aggbase != UINT32_MAX);
11256
ASSERT(curneeded != UINT32_MAX);
11257
11258
agg->dtag_base = aggbase;
11259
11260
curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
11261
rec->dtrd_offset = curneeded;
11262
if (curneeded + rec->dtrd_size < curneeded)
11263
return (EINVAL);
11264
curneeded += rec->dtrd_size;
11265
ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
11266
11267
aggbase = UINT32_MAX;
11268
curneeded = UINT32_MAX;
11269
} else if (act->dta_intuple) {
11270
if (curneeded == UINT32_MAX) {
11271
/*
11272
* This is the first record in a tuple. Align
11273
* curneeded to be at offset 4 in an 8-byte
11274
* aligned block.
11275
*/
11276
ASSERT(act->dta_prev == NULL ||
11277
!act->dta_prev->dta_intuple);
11278
ASSERT3U(aggbase, ==, UINT32_MAX);
11279
curneeded = P2PHASEUP(ecb->dte_size,
11280
sizeof (uint64_t), sizeof (dtrace_aggid_t));
11281
11282
aggbase = curneeded - sizeof (dtrace_aggid_t);
11283
ASSERT(IS_P2ALIGNED(aggbase,
11284
sizeof (uint64_t)));
11285
}
11286
curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
11287
rec->dtrd_offset = curneeded;
11288
if (curneeded + rec->dtrd_size < curneeded)
11289
return (EINVAL);
11290
curneeded += rec->dtrd_size;
11291
} else {
11292
/* tuples must be followed by an aggregation */
11293
ASSERT(act->dta_prev == NULL ||
11294
!act->dta_prev->dta_intuple);
11295
11296
ecb->dte_size = P2ROUNDUP(ecb->dte_size,
11297
rec->dtrd_alignment);
11298
rec->dtrd_offset = ecb->dte_size;
11299
if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
11300
return (EINVAL);
11301
ecb->dte_size += rec->dtrd_size;
11302
ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
11303
}
11304
}
11305
11306
if ((act = ecb->dte_action) != NULL &&
11307
!(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
11308
ecb->dte_size == sizeof (dtrace_rechdr_t)) {
11309
/*
11310
* If the size is still sizeof (dtrace_rechdr_t), then all
11311
* actions store no data; set the size to 0.
11312
*/
11313
ecb->dte_size = 0;
11314
}
11315
11316
ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
11317
ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
11318
ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
11319
ecb->dte_needed);
11320
return (0);
11321
}
11322
11323
static dtrace_action_t *
11324
dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
11325
{
11326
dtrace_aggregation_t *agg;
11327
size_t size = sizeof (uint64_t);
11328
int ntuple = desc->dtad_ntuple;
11329
dtrace_action_t *act;
11330
dtrace_recdesc_t *frec;
11331
dtrace_aggid_t aggid;
11332
dtrace_state_t *state = ecb->dte_state;
11333
11334
agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
11335
agg->dtag_ecb = ecb;
11336
11337
ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
11338
11339
switch (desc->dtad_kind) {
11340
case DTRACEAGG_MIN:
11341
agg->dtag_initial = INT64_MAX;
11342
agg->dtag_aggregate = dtrace_aggregate_min;
11343
break;
11344
11345
case DTRACEAGG_MAX:
11346
agg->dtag_initial = INT64_MIN;
11347
agg->dtag_aggregate = dtrace_aggregate_max;
11348
break;
11349
11350
case DTRACEAGG_COUNT:
11351
agg->dtag_aggregate = dtrace_aggregate_count;
11352
break;
11353
11354
case DTRACEAGG_QUANTIZE:
11355
agg->dtag_aggregate = dtrace_aggregate_quantize;
11356
size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
11357
sizeof (uint64_t);
11358
break;
11359
11360
case DTRACEAGG_LQUANTIZE: {
11361
uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
11362
uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
11363
11364
agg->dtag_initial = desc->dtad_arg;
11365
agg->dtag_aggregate = dtrace_aggregate_lquantize;
11366
11367
if (step == 0 || levels == 0)
11368
goto err;
11369
11370
size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
11371
break;
11372
}
11373
11374
case DTRACEAGG_LLQUANTIZE: {
11375
uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
11376
uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
11377
uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
11378
uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
11379
int64_t v;
11380
11381
agg->dtag_initial = desc->dtad_arg;
11382
agg->dtag_aggregate = dtrace_aggregate_llquantize;
11383
11384
if (factor < 2 || low >= high || nsteps < factor)
11385
goto err;
11386
11387
/*
11388
* Now check that the number of steps evenly divides a power
11389
* of the factor. (This assures both integer bucket size and
11390
* linearity within each magnitude.)
11391
*/
11392
for (v = factor; v < nsteps; v *= factor)
11393
continue;
11394
11395
if ((v % nsteps) || (nsteps % factor))
11396
goto err;
11397
11398
size = (dtrace_aggregate_llquantize_bucket(factor,
11399
low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
11400
break;
11401
}
11402
11403
case DTRACEAGG_AVG:
11404
agg->dtag_aggregate = dtrace_aggregate_avg;
11405
size = sizeof (uint64_t) * 2;
11406
break;
11407
11408
case DTRACEAGG_STDDEV:
11409
agg->dtag_aggregate = dtrace_aggregate_stddev;
11410
size = sizeof (uint64_t) * 4;
11411
break;
11412
11413
case DTRACEAGG_SUM:
11414
agg->dtag_aggregate = dtrace_aggregate_sum;
11415
break;
11416
11417
default:
11418
goto err;
11419
}
11420
11421
agg->dtag_action.dta_rec.dtrd_size = size;
11422
11423
if (ntuple == 0)
11424
goto err;
11425
11426
/*
11427
* We must make sure that we have enough actions for the n-tuple.
11428
*/
11429
for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
11430
if (DTRACEACT_ISAGG(act->dta_kind))
11431
break;
11432
11433
if (--ntuple == 0) {
11434
/*
11435
* This is the action with which our n-tuple begins.
11436
*/
11437
agg->dtag_first = act;
11438
goto success;
11439
}
11440
}
11441
11442
/*
11443
* This n-tuple is short by ntuple elements. Return failure.
11444
*/
11445
ASSERT(ntuple != 0);
11446
err:
11447
kmem_free(agg, sizeof (dtrace_aggregation_t));
11448
return (NULL);
11449
11450
success:
11451
/*
11452
* If the last action in the tuple has a size of zero, it's actually
11453
* an expression argument for the aggregating action.
11454
*/
11455
ASSERT(ecb->dte_action_last != NULL);
11456
act = ecb->dte_action_last;
11457
11458
if (act->dta_kind == DTRACEACT_DIFEXPR) {
11459
ASSERT(act->dta_difo != NULL);
11460
11461
if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
11462
agg->dtag_hasarg = 1;
11463
}
11464
11465
/*
11466
* We need to allocate an id for this aggregation.
11467
*/
11468
#ifdef illumos
11469
aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
11470
VM_BESTFIT | VM_SLEEP);
11471
#else
11472
aggid = alloc_unr(state->dts_aggid_arena);
11473
#endif
11474
11475
if (aggid - 1 >= state->dts_naggregations) {
11476
dtrace_aggregation_t **oaggs = state->dts_aggregations;
11477
dtrace_aggregation_t **aggs;
11478
int naggs = state->dts_naggregations << 1;
11479
int onaggs = state->dts_naggregations;
11480
11481
ASSERT(aggid == state->dts_naggregations + 1);
11482
11483
if (naggs == 0) {
11484
ASSERT(oaggs == NULL);
11485
naggs = 1;
11486
}
11487
11488
aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
11489
11490
if (oaggs != NULL) {
11491
bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
11492
kmem_free(oaggs, onaggs * sizeof (*aggs));
11493
}
11494
11495
state->dts_aggregations = aggs;
11496
state->dts_naggregations = naggs;
11497
}
11498
11499
ASSERT(state->dts_aggregations[aggid - 1] == NULL);
11500
state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
11501
11502
frec = &agg->dtag_first->dta_rec;
11503
if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
11504
frec->dtrd_alignment = sizeof (dtrace_aggid_t);
11505
11506
for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
11507
ASSERT(!act->dta_intuple);
11508
act->dta_intuple = 1;
11509
}
11510
11511
return (&agg->dtag_action);
11512
}
11513
11514
static void
11515
dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
11516
{
11517
dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
11518
dtrace_state_t *state = ecb->dte_state;
11519
dtrace_aggid_t aggid = agg->dtag_id;
11520
11521
ASSERT(DTRACEACT_ISAGG(act->dta_kind));
11522
#ifdef illumos
11523
vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
11524
#else
11525
free_unr(state->dts_aggid_arena, aggid);
11526
#endif
11527
11528
ASSERT(state->dts_aggregations[aggid - 1] == agg);
11529
state->dts_aggregations[aggid - 1] = NULL;
11530
11531
kmem_free(agg, sizeof (dtrace_aggregation_t));
11532
}
11533
11534
static int
11535
dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
11536
{
11537
dtrace_action_t *action, *last;
11538
dtrace_difo_t *dp = desc->dtad_difo;
11539
uint32_t size = 0, align = sizeof (uint8_t), mask;
11540
uint16_t format = 0;
11541
dtrace_recdesc_t *rec;
11542
dtrace_state_t *state = ecb->dte_state;
11543
dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize;
11544
uint64_t arg = desc->dtad_arg;
11545
11546
ASSERT(MUTEX_HELD(&dtrace_lock));
11547
ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
11548
11549
if (DTRACEACT_ISAGG(desc->dtad_kind)) {
11550
/*
11551
* If this is an aggregating action, there must be neither
11552
* a speculate nor a commit on the action chain.
11553
*/
11554
dtrace_action_t *act;
11555
11556
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
11557
if (act->dta_kind == DTRACEACT_COMMIT)
11558
return (EINVAL);
11559
11560
if (act->dta_kind == DTRACEACT_SPECULATE)
11561
return (EINVAL);
11562
}
11563
11564
action = dtrace_ecb_aggregation_create(ecb, desc);
11565
11566
if (action == NULL)
11567
return (EINVAL);
11568
} else {
11569
if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
11570
(desc->dtad_kind == DTRACEACT_DIFEXPR &&
11571
dp != NULL && dp->dtdo_destructive)) {
11572
state->dts_destructive = 1;
11573
}
11574
11575
switch (desc->dtad_kind) {
11576
case DTRACEACT_PRINTF:
11577
case DTRACEACT_PRINTA:
11578
case DTRACEACT_SYSTEM:
11579
case DTRACEACT_FREOPEN:
11580
case DTRACEACT_DIFEXPR:
11581
/*
11582
* We know that our arg is a string -- turn it into a
11583
* format.
11584
*/
11585
if (arg == 0) {
11586
ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
11587
desc->dtad_kind == DTRACEACT_DIFEXPR);
11588
format = 0;
11589
} else {
11590
ASSERT(arg != 0);
11591
#ifdef illumos
11592
ASSERT(arg > KERNELBASE);
11593
#endif
11594
format = dtrace_format_add(state,
11595
(char *)(uintptr_t)arg);
11596
}
11597
11598
/*FALLTHROUGH*/
11599
case DTRACEACT_LIBACT:
11600
case DTRACEACT_TRACEMEM:
11601
case DTRACEACT_TRACEMEM_DYNSIZE:
11602
if (dp == NULL)
11603
return (EINVAL);
11604
11605
if ((size = dp->dtdo_rtype.dtdt_size) != 0)
11606
break;
11607
11608
if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
11609
if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11610
return (EINVAL);
11611
11612
size = opt[DTRACEOPT_STRSIZE];
11613
}
11614
11615
break;
11616
11617
case DTRACEACT_STACK:
11618
if ((nframes = arg) == 0) {
11619
nframes = opt[DTRACEOPT_STACKFRAMES];
11620
ASSERT(nframes > 0);
11621
arg = nframes;
11622
}
11623
11624
size = nframes * sizeof (pc_t);
11625
break;
11626
11627
case DTRACEACT_JSTACK:
11628
if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
11629
strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
11630
11631
if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
11632
nframes = opt[DTRACEOPT_JSTACKFRAMES];
11633
11634
arg = DTRACE_USTACK_ARG(nframes, strsize);
11635
11636
/*FALLTHROUGH*/
11637
case DTRACEACT_USTACK:
11638
if (desc->dtad_kind != DTRACEACT_JSTACK &&
11639
(nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
11640
strsize = DTRACE_USTACK_STRSIZE(arg);
11641
nframes = opt[DTRACEOPT_USTACKFRAMES];
11642
ASSERT(nframes > 0);
11643
arg = DTRACE_USTACK_ARG(nframes, strsize);
11644
}
11645
11646
/*
11647
* Save a slot for the pid.
11648
*/
11649
size = (nframes + 1) * sizeof (uint64_t);
11650
size += DTRACE_USTACK_STRSIZE(arg);
11651
size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
11652
11653
break;
11654
11655
case DTRACEACT_SYM:
11656
case DTRACEACT_MOD:
11657
if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
11658
sizeof (uint64_t)) ||
11659
(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11660
return (EINVAL);
11661
break;
11662
11663
case DTRACEACT_USYM:
11664
case DTRACEACT_UMOD:
11665
case DTRACEACT_UADDR:
11666
if (dp == NULL ||
11667
(dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
11668
(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11669
return (EINVAL);
11670
11671
/*
11672
* We have a slot for the pid, plus a slot for the
11673
* argument. To keep things simple (aligned with
11674
* bitness-neutral sizing), we store each as a 64-bit
11675
* quantity.
11676
*/
11677
size = 2 * sizeof (uint64_t);
11678
break;
11679
11680
case DTRACEACT_STOP:
11681
case DTRACEACT_BREAKPOINT:
11682
case DTRACEACT_PANIC:
11683
break;
11684
11685
case DTRACEACT_CHILL:
11686
case DTRACEACT_DISCARD:
11687
case DTRACEACT_RAISE:
11688
if (dp == NULL)
11689
return (EINVAL);
11690
break;
11691
11692
case DTRACEACT_EXIT:
11693
if (dp == NULL ||
11694
(size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
11695
(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
11696
return (EINVAL);
11697
break;
11698
11699
case DTRACEACT_SPECULATE:
11700
if (ecb->dte_size > sizeof (dtrace_rechdr_t))
11701
return (EINVAL);
11702
11703
if (dp == NULL)
11704
return (EINVAL);
11705
11706
state->dts_speculates = 1;
11707
break;
11708
11709
case DTRACEACT_PRINTM:
11710
size = dp->dtdo_rtype.dtdt_size;
11711
break;
11712
11713
case DTRACEACT_COMMIT: {
11714
dtrace_action_t *act = ecb->dte_action;
11715
11716
for (; act != NULL; act = act->dta_next) {
11717
if (act->dta_kind == DTRACEACT_COMMIT)
11718
return (EINVAL);
11719
}
11720
11721
if (dp == NULL)
11722
return (EINVAL);
11723
break;
11724
}
11725
11726
default:
11727
return (EINVAL);
11728
}
11729
11730
if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
11731
/*
11732
* If this is a data-storing action or a speculate,
11733
* we must be sure that there isn't a commit on the
11734
* action chain.
11735
*/
11736
dtrace_action_t *act = ecb->dte_action;
11737
11738
for (; act != NULL; act = act->dta_next) {
11739
if (act->dta_kind == DTRACEACT_COMMIT)
11740
return (EINVAL);
11741
}
11742
}
11743
11744
action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
11745
action->dta_rec.dtrd_size = size;
11746
}
11747
11748
action->dta_refcnt = 1;
11749
rec = &action->dta_rec;
11750
size = rec->dtrd_size;
11751
11752
for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
11753
if (!(size & mask)) {
11754
align = mask + 1;
11755
break;
11756
}
11757
}
11758
11759
action->dta_kind = desc->dtad_kind;
11760
11761
if ((action->dta_difo = dp) != NULL)
11762
dtrace_difo_hold(dp);
11763
11764
rec->dtrd_action = action->dta_kind;
11765
rec->dtrd_arg = arg;
11766
rec->dtrd_uarg = desc->dtad_uarg;
11767
rec->dtrd_alignment = (uint16_t)align;
11768
rec->dtrd_format = format;
11769
11770
if ((last = ecb->dte_action_last) != NULL) {
11771
ASSERT(ecb->dte_action != NULL);
11772
action->dta_prev = last;
11773
last->dta_next = action;
11774
} else {
11775
ASSERT(ecb->dte_action == NULL);
11776
ecb->dte_action = action;
11777
}
11778
11779
ecb->dte_action_last = action;
11780
11781
return (0);
11782
}
11783
11784
static void
11785
dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
11786
{
11787
dtrace_action_t *act = ecb->dte_action, *next;
11788
dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
11789
dtrace_difo_t *dp;
11790
uint16_t format;
11791
11792
if (act != NULL && act->dta_refcnt > 1) {
11793
ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
11794
act->dta_refcnt--;
11795
} else {
11796
for (; act != NULL; act = next) {
11797
next = act->dta_next;
11798
ASSERT(next != NULL || act == ecb->dte_action_last);
11799
ASSERT(act->dta_refcnt == 1);
11800
11801
if ((format = act->dta_rec.dtrd_format) != 0)
11802
dtrace_format_remove(ecb->dte_state, format);
11803
11804
if ((dp = act->dta_difo) != NULL)
11805
dtrace_difo_release(dp, vstate);
11806
11807
if (DTRACEACT_ISAGG(act->dta_kind)) {
11808
dtrace_ecb_aggregation_destroy(ecb, act);
11809
} else {
11810
kmem_free(act, sizeof (dtrace_action_t));
11811
}
11812
}
11813
}
11814
11815
ecb->dte_action = NULL;
11816
ecb->dte_action_last = NULL;
11817
ecb->dte_size = 0;
11818
}
11819
11820
static void
11821
dtrace_ecb_disable(dtrace_ecb_t *ecb)
11822
{
11823
/*
11824
* We disable the ECB by removing it from its probe.
11825
*/
11826
dtrace_ecb_t *pecb, *prev = NULL;
11827
dtrace_probe_t *probe = ecb->dte_probe;
11828
11829
ASSERT(MUTEX_HELD(&dtrace_lock));
11830
11831
if (probe == NULL) {
11832
/*
11833
* This is the NULL probe; there is nothing to disable.
11834
*/
11835
return;
11836
}
11837
11838
for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
11839
if (pecb == ecb)
11840
break;
11841
prev = pecb;
11842
}
11843
11844
ASSERT(pecb != NULL);
11845
11846
if (prev == NULL) {
11847
probe->dtpr_ecb = ecb->dte_next;
11848
} else {
11849
prev->dte_next = ecb->dte_next;
11850
}
11851
11852
if (ecb == probe->dtpr_ecb_last) {
11853
ASSERT(ecb->dte_next == NULL);
11854
probe->dtpr_ecb_last = prev;
11855
}
11856
11857
/*
11858
* The ECB has been disconnected from the probe; now sync to assure
11859
* that all CPUs have seen the change before returning.
11860
*/
11861
dtrace_sync();
11862
11863
if (probe->dtpr_ecb == NULL) {
11864
/*
11865
* That was the last ECB on the probe; clear the predicate
11866
* cache ID for the probe, disable it and sync one more time
11867
* to assure that we'll never hit it again.
11868
*/
11869
dtrace_provider_t *prov = probe->dtpr_provider;
11870
11871
ASSERT(ecb->dte_next == NULL);
11872
ASSERT(probe->dtpr_ecb_last == NULL);
11873
probe->dtpr_predcache = DTRACE_CACHEIDNONE;
11874
prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
11875
probe->dtpr_id, probe->dtpr_arg);
11876
dtrace_sync();
11877
} else {
11878
/*
11879
* There is at least one ECB remaining on the probe. If there
11880
* is _exactly_ one, set the probe's predicate cache ID to be
11881
* the predicate cache ID of the remaining ECB.
11882
*/
11883
ASSERT(probe->dtpr_ecb_last != NULL);
11884
ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
11885
11886
if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
11887
dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
11888
11889
ASSERT(probe->dtpr_ecb->dte_next == NULL);
11890
11891
if (p != NULL)
11892
probe->dtpr_predcache = p->dtp_cacheid;
11893
}
11894
11895
ecb->dte_next = NULL;
11896
}
11897
}
11898
11899
static void
11900
dtrace_ecb_destroy(dtrace_ecb_t *ecb)
11901
{
11902
dtrace_state_t *state = ecb->dte_state;
11903
dtrace_vstate_t *vstate = &state->dts_vstate;
11904
dtrace_predicate_t *pred;
11905
dtrace_epid_t epid = ecb->dte_epid;
11906
11907
ASSERT(MUTEX_HELD(&dtrace_lock));
11908
ASSERT(ecb->dte_next == NULL);
11909
ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
11910
11911
if ((pred = ecb->dte_predicate) != NULL)
11912
dtrace_predicate_release(pred, vstate);
11913
11914
dtrace_ecb_action_remove(ecb);
11915
11916
ASSERT(state->dts_ecbs[epid - 1] == ecb);
11917
state->dts_ecbs[epid - 1] = NULL;
11918
11919
kmem_free(ecb, sizeof (dtrace_ecb_t));
11920
}
11921
11922
static dtrace_ecb_t *
11923
dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
11924
dtrace_enabling_t *enab)
11925
{
11926
dtrace_ecb_t *ecb;
11927
dtrace_predicate_t *pred;
11928
dtrace_actdesc_t *act;
11929
dtrace_provider_t *prov;
11930
dtrace_ecbdesc_t *desc = enab->dten_current;
11931
11932
ASSERT(MUTEX_HELD(&dtrace_lock));
11933
ASSERT(state != NULL);
11934
11935
ecb = dtrace_ecb_add(state, probe);
11936
ecb->dte_uarg = desc->dted_uarg;
11937
11938
if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
11939
dtrace_predicate_hold(pred);
11940
ecb->dte_predicate = pred;
11941
}
11942
11943
if (probe != NULL) {
11944
/*
11945
* If the provider shows more leg than the consumer is old
11946
* enough to see, we need to enable the appropriate implicit
11947
* predicate bits to prevent the ecb from activating at
11948
* revealing times.
11949
*
11950
* Providers specifying DTRACE_PRIV_USER at register time
11951
* are stating that they need the /proc-style privilege
11952
* model to be enforced, and this is what DTRACE_COND_OWNER
11953
* and DTRACE_COND_ZONEOWNER will then do at probe time.
11954
*/
11955
prov = probe->dtpr_provider;
11956
if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
11957
(prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11958
ecb->dte_cond |= DTRACE_COND_OWNER;
11959
11960
if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
11961
(prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11962
ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
11963
11964
/*
11965
* If the provider shows us kernel innards and the user
11966
* is lacking sufficient privilege, enable the
11967
* DTRACE_COND_USERMODE implicit predicate.
11968
*/
11969
if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
11970
(prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
11971
ecb->dte_cond |= DTRACE_COND_USERMODE;
11972
}
11973
11974
if (dtrace_ecb_create_cache != NULL) {
11975
/*
11976
* If we have a cached ecb, we'll use its action list instead
11977
* of creating our own (saving both time and space).
11978
*/
11979
dtrace_ecb_t *cached = dtrace_ecb_create_cache;
11980
dtrace_action_t *act = cached->dte_action;
11981
11982
if (act != NULL) {
11983
ASSERT(act->dta_refcnt > 0);
11984
act->dta_refcnt++;
11985
ecb->dte_action = act;
11986
ecb->dte_action_last = cached->dte_action_last;
11987
ecb->dte_needed = cached->dte_needed;
11988
ecb->dte_size = cached->dte_size;
11989
ecb->dte_alignment = cached->dte_alignment;
11990
}
11991
11992
return (ecb);
11993
}
11994
11995
for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
11996
if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
11997
dtrace_ecb_destroy(ecb);
11998
return (NULL);
11999
}
12000
}
12001
12002
if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
12003
dtrace_ecb_destroy(ecb);
12004
return (NULL);
12005
}
12006
12007
return (dtrace_ecb_create_cache = ecb);
12008
}
12009
12010
static int
12011
dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
12012
{
12013
dtrace_ecb_t *ecb;
12014
dtrace_enabling_t *enab = arg;
12015
dtrace_state_t *state = enab->dten_vstate->dtvs_state;
12016
12017
ASSERT(state != NULL);
12018
12019
if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
12020
/*
12021
* This probe was created in a generation for which this
12022
* enabling has previously created ECBs; we don't want to
12023
* enable it again, so just kick out.
12024
*/
12025
return (DTRACE_MATCH_NEXT);
12026
}
12027
12028
if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
12029
return (DTRACE_MATCH_DONE);
12030
12031
dtrace_ecb_enable(ecb);
12032
return (DTRACE_MATCH_NEXT);
12033
}
12034
12035
static dtrace_ecb_t *
12036
dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
12037
{
12038
dtrace_ecb_t *ecb;
12039
12040
ASSERT(MUTEX_HELD(&dtrace_lock));
12041
12042
if (id == 0 || id > state->dts_necbs)
12043
return (NULL);
12044
12045
ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
12046
ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
12047
12048
return (state->dts_ecbs[id - 1]);
12049
}
12050
12051
static dtrace_aggregation_t *
12052
dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
12053
{
12054
dtrace_aggregation_t *agg;
12055
12056
ASSERT(MUTEX_HELD(&dtrace_lock));
12057
12058
if (id == 0 || id > state->dts_naggregations)
12059
return (NULL);
12060
12061
ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
12062
ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
12063
agg->dtag_id == id);
12064
12065
return (state->dts_aggregations[id - 1]);
12066
}
12067
12068
/*
12069
* DTrace Buffer Functions
12070
*
12071
* The following functions manipulate DTrace buffers. Most of these functions
12072
* are called in the context of establishing or processing consumer state;
12073
* exceptions are explicitly noted.
12074
*/
12075
12076
/*
12077
* Note: called from cross call context. This function switches the two
12078
* buffers on a given CPU. The atomicity of this operation is assured by
12079
* disabling interrupts while the actual switch takes place; the disabling of
12080
* interrupts serializes the execution with any execution of dtrace_probe() on
12081
* the same CPU.
12082
*/
12083
static void
12084
dtrace_buffer_switch(dtrace_buffer_t *buf)
12085
{
12086
caddr_t tomax = buf->dtb_tomax;
12087
caddr_t xamot = buf->dtb_xamot;
12088
dtrace_icookie_t cookie;
12089
hrtime_t now;
12090
12091
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
12092
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
12093
12094
cookie = dtrace_interrupt_disable();
12095
now = dtrace_gethrtime();
12096
buf->dtb_tomax = xamot;
12097
buf->dtb_xamot = tomax;
12098
buf->dtb_xamot_drops = buf->dtb_drops;
12099
buf->dtb_xamot_offset = buf->dtb_offset;
12100
buf->dtb_xamot_errors = buf->dtb_errors;
12101
buf->dtb_xamot_flags = buf->dtb_flags;
12102
buf->dtb_offset = 0;
12103
buf->dtb_drops = 0;
12104
buf->dtb_errors = 0;
12105
buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
12106
buf->dtb_interval = now - buf->dtb_switched;
12107
buf->dtb_switched = now;
12108
dtrace_interrupt_enable(cookie);
12109
}
12110
12111
/*
12112
* Note: called from cross call context. This function activates a buffer
12113
* on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
12114
* is guaranteed by the disabling of interrupts.
12115
*/
12116
static void
12117
dtrace_buffer_activate(dtrace_state_t *state)
12118
{
12119
dtrace_buffer_t *buf;
12120
dtrace_icookie_t cookie = dtrace_interrupt_disable();
12121
12122
buf = &state->dts_buffer[curcpu];
12123
12124
if (buf->dtb_tomax != NULL) {
12125
/*
12126
* We might like to assert that the buffer is marked inactive,
12127
* but this isn't necessarily true: the buffer for the CPU
12128
* that processes the BEGIN probe has its buffer activated
12129
* manually. In this case, we take the (harmless) action
12130
* re-clearing the bit INACTIVE bit.
12131
*/
12132
buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
12133
}
12134
12135
dtrace_interrupt_enable(cookie);
12136
}
12137
12138
#ifdef __FreeBSD__
12139
/*
12140
* Activate the specified per-CPU buffer. This is used instead of
12141
* dtrace_buffer_activate() when APs have not yet started, i.e. when
12142
* activating anonymous state.
12143
*/
12144
static void
12145
dtrace_buffer_activate_cpu(dtrace_state_t *state, int cpu)
12146
{
12147
12148
if (state->dts_buffer[cpu].dtb_tomax != NULL)
12149
state->dts_buffer[cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
12150
}
12151
#endif
12152
12153
static int
12154
dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
12155
processorid_t cpu, int *factor)
12156
{
12157
#ifdef illumos
12158
cpu_t *cp;
12159
#endif
12160
dtrace_buffer_t *buf;
12161
int allocated = 0, desired = 0;
12162
12163
#ifdef illumos
12164
ASSERT(MUTEX_HELD(&cpu_lock));
12165
ASSERT(MUTEX_HELD(&dtrace_lock));
12166
12167
*factor = 1;
12168
12169
if (size > dtrace_nonroot_maxsize &&
12170
!PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
12171
return (EFBIG);
12172
12173
cp = cpu_list;
12174
12175
do {
12176
if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
12177
continue;
12178
12179
buf = &bufs[cp->cpu_id];
12180
12181
/*
12182
* If there is already a buffer allocated for this CPU, it
12183
* is only possible that this is a DR event. In this case,
12184
*/
12185
if (buf->dtb_tomax != NULL) {
12186
ASSERT(buf->dtb_size == size);
12187
continue;
12188
}
12189
12190
ASSERT(buf->dtb_xamot == NULL);
12191
12192
if ((buf->dtb_tomax = kmem_zalloc(size,
12193
KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12194
goto err;
12195
12196
buf->dtb_size = size;
12197
buf->dtb_flags = flags;
12198
buf->dtb_offset = 0;
12199
buf->dtb_drops = 0;
12200
12201
if (flags & DTRACEBUF_NOSWITCH)
12202
continue;
12203
12204
if ((buf->dtb_xamot = kmem_zalloc(size,
12205
KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12206
goto err;
12207
} while ((cp = cp->cpu_next) != cpu_list);
12208
12209
return (0);
12210
12211
err:
12212
cp = cpu_list;
12213
12214
do {
12215
if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
12216
continue;
12217
12218
buf = &bufs[cp->cpu_id];
12219
desired += 2;
12220
12221
if (buf->dtb_xamot != NULL) {
12222
ASSERT(buf->dtb_tomax != NULL);
12223
ASSERT(buf->dtb_size == size);
12224
kmem_free(buf->dtb_xamot, size);
12225
allocated++;
12226
}
12227
12228
if (buf->dtb_tomax != NULL) {
12229
ASSERT(buf->dtb_size == size);
12230
kmem_free(buf->dtb_tomax, size);
12231
allocated++;
12232
}
12233
12234
buf->dtb_tomax = NULL;
12235
buf->dtb_xamot = NULL;
12236
buf->dtb_size = 0;
12237
} while ((cp = cp->cpu_next) != cpu_list);
12238
#else
12239
int i;
12240
12241
*factor = 1;
12242
#if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
12243
defined(__mips__) || defined(__powerpc__) || defined(__riscv)
12244
/*
12245
* FreeBSD isn't good at limiting the amount of memory we
12246
* ask to malloc, so let's place a limit here before trying
12247
* to do something that might well end in tears at bedtime.
12248
*/
12249
int bufsize_percpu_frac = dtrace_bufsize_max_frac * mp_ncpus;
12250
if (size > physmem * PAGE_SIZE / bufsize_percpu_frac)
12251
return (ENOMEM);
12252
#endif
12253
12254
ASSERT(MUTEX_HELD(&dtrace_lock));
12255
CPU_FOREACH(i) {
12256
if (cpu != DTRACE_CPUALL && cpu != i)
12257
continue;
12258
12259
buf = &bufs[i];
12260
12261
/*
12262
* If there is already a buffer allocated for this CPU, it
12263
* is only possible that this is a DR event. In this case,
12264
* the buffer size must match our specified size.
12265
*/
12266
if (buf->dtb_tomax != NULL) {
12267
ASSERT(buf->dtb_size == size);
12268
continue;
12269
}
12270
12271
ASSERT(buf->dtb_xamot == NULL);
12272
12273
if ((buf->dtb_tomax = kmem_zalloc(size,
12274
KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12275
goto err;
12276
12277
buf->dtb_size = size;
12278
buf->dtb_flags = flags;
12279
buf->dtb_offset = 0;
12280
buf->dtb_drops = 0;
12281
12282
if (flags & DTRACEBUF_NOSWITCH)
12283
continue;
12284
12285
if ((buf->dtb_xamot = kmem_zalloc(size,
12286
KM_NOSLEEP | KM_NORMALPRI)) == NULL)
12287
goto err;
12288
}
12289
12290
return (0);
12291
12292
err:
12293
/*
12294
* Error allocating memory, so free the buffers that were
12295
* allocated before the failed allocation.
12296
*/
12297
CPU_FOREACH(i) {
12298
if (cpu != DTRACE_CPUALL && cpu != i)
12299
continue;
12300
12301
buf = &bufs[i];
12302
desired += 2;
12303
12304
if (buf->dtb_xamot != NULL) {
12305
ASSERT(buf->dtb_tomax != NULL);
12306
ASSERT(buf->dtb_size == size);
12307
kmem_free(buf->dtb_xamot, size);
12308
allocated++;
12309
}
12310
12311
if (buf->dtb_tomax != NULL) {
12312
ASSERT(buf->dtb_size == size);
12313
kmem_free(buf->dtb_tomax, size);
12314
allocated++;
12315
}
12316
12317
buf->dtb_tomax = NULL;
12318
buf->dtb_xamot = NULL;
12319
buf->dtb_size = 0;
12320
12321
}
12322
#endif
12323
*factor = desired / (allocated > 0 ? allocated : 1);
12324
12325
return (ENOMEM);
12326
}
12327
12328
/*
12329
* Note: called from probe context. This function just increments the drop
12330
* count on a buffer. It has been made a function to allow for the
12331
* possibility of understanding the source of mysterious drop counts. (A
12332
* problem for which one may be particularly disappointed that DTrace cannot
12333
* be used to understand DTrace.)
12334
*/
12335
static void
12336
dtrace_buffer_drop(dtrace_buffer_t *buf)
12337
{
12338
buf->dtb_drops++;
12339
}
12340
12341
/*
12342
* Note: called from probe context. This function is called to reserve space
12343
* in a buffer. If mstate is non-NULL, sets the scratch base and size in the
12344
* mstate. Returns the new offset in the buffer, or a negative value if an
12345
* error has occurred.
12346
*/
12347
static ssize_t
12348
dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
12349
dtrace_state_t *state, dtrace_mstate_t *mstate)
12350
{
12351
ssize_t offs = buf->dtb_offset, soffs;
12352
intptr_t woffs;
12353
caddr_t tomax;
12354
size_t total;
12355
12356
if (buf->dtb_flags & DTRACEBUF_INACTIVE)
12357
return (-1);
12358
12359
if ((tomax = buf->dtb_tomax) == NULL) {
12360
dtrace_buffer_drop(buf);
12361
return (-1);
12362
}
12363
12364
if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
12365
while (offs & (align - 1)) {
12366
/*
12367
* Assert that our alignment is off by a number which
12368
* is itself sizeof (uint32_t) aligned.
12369
*/
12370
ASSERT(!((align - (offs & (align - 1))) &
12371
(sizeof (uint32_t) - 1)));
12372
DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
12373
offs += sizeof (uint32_t);
12374
}
12375
12376
if ((soffs = offs + needed) > buf->dtb_size) {
12377
dtrace_buffer_drop(buf);
12378
return (-1);
12379
}
12380
12381
if (mstate == NULL)
12382
return (offs);
12383
12384
mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
12385
mstate->dtms_scratch_size = buf->dtb_size - soffs;
12386
mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
12387
12388
return (offs);
12389
}
12390
12391
if (buf->dtb_flags & DTRACEBUF_FILL) {
12392
if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
12393
(buf->dtb_flags & DTRACEBUF_FULL))
12394
return (-1);
12395
goto out;
12396
}
12397
12398
total = needed + (offs & (align - 1));
12399
12400
/*
12401
* For a ring buffer, life is quite a bit more complicated. Before
12402
* we can store any padding, we need to adjust our wrapping offset.
12403
* (If we've never before wrapped or we're not about to, no adjustment
12404
* is required.)
12405
*/
12406
if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
12407
offs + total > buf->dtb_size) {
12408
woffs = buf->dtb_xamot_offset;
12409
12410
if (offs + total > buf->dtb_size) {
12411
/*
12412
* We can't fit in the end of the buffer. First, a
12413
* sanity check that we can fit in the buffer at all.
12414
*/
12415
if (total > buf->dtb_size) {
12416
dtrace_buffer_drop(buf);
12417
return (-1);
12418
}
12419
12420
/*
12421
* We're going to be storing at the top of the buffer,
12422
* so now we need to deal with the wrapped offset. We
12423
* only reset our wrapped offset to 0 if it is
12424
* currently greater than the current offset. If it
12425
* is less than the current offset, it is because a
12426
* previous allocation induced a wrap -- but the
12427
* allocation didn't subsequently take the space due
12428
* to an error or false predicate evaluation. In this
12429
* case, we'll just leave the wrapped offset alone: if
12430
* the wrapped offset hasn't been advanced far enough
12431
* for this allocation, it will be adjusted in the
12432
* lower loop.
12433
*/
12434
if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
12435
if (woffs >= offs)
12436
woffs = 0;
12437
} else {
12438
woffs = 0;
12439
}
12440
12441
/*
12442
* Now we know that we're going to be storing to the
12443
* top of the buffer and that there is room for us
12444
* there. We need to clear the buffer from the current
12445
* offset to the end (there may be old gunk there).
12446
*/
12447
while (offs < buf->dtb_size)
12448
tomax[offs++] = 0;
12449
12450
/*
12451
* We need to set our offset to zero. And because we
12452
* are wrapping, we need to set the bit indicating as
12453
* much. We can also adjust our needed space back
12454
* down to the space required by the ECB -- we know
12455
* that the top of the buffer is aligned.
12456
*/
12457
offs = 0;
12458
total = needed;
12459
buf->dtb_flags |= DTRACEBUF_WRAPPED;
12460
} else {
12461
/*
12462
* There is room for us in the buffer, so we simply
12463
* need to check the wrapped offset.
12464
*/
12465
if (woffs < offs) {
12466
/*
12467
* The wrapped offset is less than the offset.
12468
* This can happen if we allocated buffer space
12469
* that induced a wrap, but then we didn't
12470
* subsequently take the space due to an error
12471
* or false predicate evaluation. This is
12472
* okay; we know that _this_ allocation isn't
12473
* going to induce a wrap. We still can't
12474
* reset the wrapped offset to be zero,
12475
* however: the space may have been trashed in
12476
* the previous failed probe attempt. But at
12477
* least the wrapped offset doesn't need to
12478
* be adjusted at all...
12479
*/
12480
goto out;
12481
}
12482
}
12483
12484
while (offs + total > woffs) {
12485
dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
12486
size_t size;
12487
12488
if (epid == DTRACE_EPIDNONE) {
12489
size = sizeof (uint32_t);
12490
} else {
12491
ASSERT3U(epid, <=, state->dts_necbs);
12492
ASSERT(state->dts_ecbs[epid - 1] != NULL);
12493
12494
size = state->dts_ecbs[epid - 1]->dte_size;
12495
}
12496
12497
ASSERT(woffs + size <= buf->dtb_size);
12498
ASSERT(size != 0);
12499
12500
if (woffs + size == buf->dtb_size) {
12501
/*
12502
* We've reached the end of the buffer; we want
12503
* to set the wrapped offset to 0 and break
12504
* out. However, if the offs is 0, then we're
12505
* in a strange edge-condition: the amount of
12506
* space that we want to reserve plus the size
12507
* of the record that we're overwriting is
12508
* greater than the size of the buffer. This
12509
* is problematic because if we reserve the
12510
* space but subsequently don't consume it (due
12511
* to a failed predicate or error) the wrapped
12512
* offset will be 0 -- yet the EPID at offset 0
12513
* will not be committed. This situation is
12514
* relatively easy to deal with: if we're in
12515
* this case, the buffer is indistinguishable
12516
* from one that hasn't wrapped; we need only
12517
* finish the job by clearing the wrapped bit,
12518
* explicitly setting the offset to be 0, and
12519
* zero'ing out the old data in the buffer.
12520
*/
12521
if (offs == 0) {
12522
buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
12523
buf->dtb_offset = 0;
12524
woffs = total;
12525
12526
while (woffs < buf->dtb_size)
12527
tomax[woffs++] = 0;
12528
}
12529
12530
woffs = 0;
12531
break;
12532
}
12533
12534
woffs += size;
12535
}
12536
12537
/*
12538
* We have a wrapped offset. It may be that the wrapped offset
12539
* has become zero -- that's okay.
12540
*/
12541
buf->dtb_xamot_offset = woffs;
12542
}
12543
12544
out:
12545
/*
12546
* Now we can plow the buffer with any necessary padding.
12547
*/
12548
while (offs & (align - 1)) {
12549
/*
12550
* Assert that our alignment is off by a number which
12551
* is itself sizeof (uint32_t) aligned.
12552
*/
12553
ASSERT(!((align - (offs & (align - 1))) &
12554
(sizeof (uint32_t) - 1)));
12555
DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
12556
offs += sizeof (uint32_t);
12557
}
12558
12559
if (buf->dtb_flags & DTRACEBUF_FILL) {
12560
if (offs + needed > buf->dtb_size - state->dts_reserve) {
12561
buf->dtb_flags |= DTRACEBUF_FULL;
12562
return (-1);
12563
}
12564
}
12565
12566
if (mstate == NULL)
12567
return (offs);
12568
12569
/*
12570
* For ring buffers and fill buffers, the scratch space is always
12571
* the inactive buffer.
12572
*/
12573
mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
12574
mstate->dtms_scratch_size = buf->dtb_size;
12575
mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
12576
12577
return (offs);
12578
}
12579
12580
static void
12581
dtrace_buffer_polish(dtrace_buffer_t *buf)
12582
{
12583
ASSERT(buf->dtb_flags & DTRACEBUF_RING);
12584
ASSERT(MUTEX_HELD(&dtrace_lock));
12585
12586
if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
12587
return;
12588
12589
/*
12590
* We need to polish the ring buffer. There are three cases:
12591
*
12592
* - The first (and presumably most common) is that there is no gap
12593
* between the buffer offset and the wrapped offset. In this case,
12594
* there is nothing in the buffer that isn't valid data; we can
12595
* mark the buffer as polished and return.
12596
*
12597
* - The second (less common than the first but still more common
12598
* than the third) is that there is a gap between the buffer offset
12599
* and the wrapped offset, and the wrapped offset is larger than the
12600
* buffer offset. This can happen because of an alignment issue, or
12601
* can happen because of a call to dtrace_buffer_reserve() that
12602
* didn't subsequently consume the buffer space. In this case,
12603
* we need to zero the data from the buffer offset to the wrapped
12604
* offset.
12605
*
12606
* - The third (and least common) is that there is a gap between the
12607
* buffer offset and the wrapped offset, but the wrapped offset is
12608
* _less_ than the buffer offset. This can only happen because a
12609
* call to dtrace_buffer_reserve() induced a wrap, but the space
12610
* was not subsequently consumed. In this case, we need to zero the
12611
* space from the offset to the end of the buffer _and_ from the
12612
* top of the buffer to the wrapped offset.
12613
*/
12614
if (buf->dtb_offset < buf->dtb_xamot_offset) {
12615
bzero(buf->dtb_tomax + buf->dtb_offset,
12616
buf->dtb_xamot_offset - buf->dtb_offset);
12617
}
12618
12619
if (buf->dtb_offset > buf->dtb_xamot_offset) {
12620
bzero(buf->dtb_tomax + buf->dtb_offset,
12621
buf->dtb_size - buf->dtb_offset);
12622
bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
12623
}
12624
}
12625
12626
/*
12627
* This routine determines if data generated at the specified time has likely
12628
* been entirely consumed at user-level. This routine is called to determine
12629
* if an ECB on a defunct probe (but for an active enabling) can be safely
12630
* disabled and destroyed.
12631
*/
12632
static int
12633
dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
12634
{
12635
int i;
12636
12637
CPU_FOREACH(i) {
12638
dtrace_buffer_t *buf = &bufs[i];
12639
12640
if (buf->dtb_size == 0)
12641
continue;
12642
12643
if (buf->dtb_flags & DTRACEBUF_RING)
12644
return (0);
12645
12646
if (!buf->dtb_switched && buf->dtb_offset != 0)
12647
return (0);
12648
12649
if (buf->dtb_switched - buf->dtb_interval < when)
12650
return (0);
12651
}
12652
12653
return (1);
12654
}
12655
12656
static void
12657
dtrace_buffer_free(dtrace_buffer_t *bufs)
12658
{
12659
int i;
12660
12661
CPU_FOREACH(i) {
12662
dtrace_buffer_t *buf = &bufs[i];
12663
12664
if (buf->dtb_tomax == NULL) {
12665
ASSERT(buf->dtb_xamot == NULL);
12666
ASSERT(buf->dtb_size == 0);
12667
continue;
12668
}
12669
12670
if (buf->dtb_xamot != NULL) {
12671
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
12672
kmem_free(buf->dtb_xamot, buf->dtb_size);
12673
}
12674
12675
kmem_free(buf->dtb_tomax, buf->dtb_size);
12676
buf->dtb_size = 0;
12677
buf->dtb_tomax = NULL;
12678
buf->dtb_xamot = NULL;
12679
}
12680
}
12681
12682
/*
12683
* DTrace Enabling Functions
12684
*/
12685
static dtrace_enabling_t *
12686
dtrace_enabling_create(dtrace_vstate_t *vstate)
12687
{
12688
dtrace_enabling_t *enab;
12689
12690
enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
12691
enab->dten_vstate = vstate;
12692
12693
return (enab);
12694
}
12695
12696
static void
12697
dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
12698
{
12699
dtrace_ecbdesc_t **ndesc;
12700
size_t osize, nsize;
12701
12702
/*
12703
* We can't add to enablings after we've enabled them, or after we've
12704
* retained them.
12705
*/
12706
ASSERT(enab->dten_probegen == 0);
12707
ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
12708
12709
if (enab->dten_ndesc < enab->dten_maxdesc) {
12710
enab->dten_desc[enab->dten_ndesc++] = ecb;
12711
return;
12712
}
12713
12714
osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
12715
12716
if (enab->dten_maxdesc == 0) {
12717
enab->dten_maxdesc = 1;
12718
} else {
12719
enab->dten_maxdesc <<= 1;
12720
}
12721
12722
ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
12723
12724
nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
12725
ndesc = kmem_zalloc(nsize, KM_SLEEP);
12726
bcopy(enab->dten_desc, ndesc, osize);
12727
if (enab->dten_desc != NULL)
12728
kmem_free(enab->dten_desc, osize);
12729
12730
enab->dten_desc = ndesc;
12731
enab->dten_desc[enab->dten_ndesc++] = ecb;
12732
}
12733
12734
static void
12735
dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
12736
dtrace_probedesc_t *pd)
12737
{
12738
dtrace_ecbdesc_t *new;
12739
dtrace_predicate_t *pred;
12740
dtrace_actdesc_t *act;
12741
12742
/*
12743
* We're going to create a new ECB description that matches the
12744
* specified ECB in every way, but has the specified probe description.
12745
*/
12746
new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12747
12748
if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
12749
dtrace_predicate_hold(pred);
12750
12751
for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
12752
dtrace_actdesc_hold(act);
12753
12754
new->dted_action = ecb->dted_action;
12755
new->dted_pred = ecb->dted_pred;
12756
new->dted_probe = *pd;
12757
new->dted_uarg = ecb->dted_uarg;
12758
12759
dtrace_enabling_add(enab, new);
12760
}
12761
12762
static void
12763
dtrace_enabling_dump(dtrace_enabling_t *enab)
12764
{
12765
int i;
12766
12767
for (i = 0; i < enab->dten_ndesc; i++) {
12768
dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
12769
12770
#ifdef __FreeBSD__
12771
printf("dtrace: enabling probe %d (%s:%s:%s:%s)\n", i,
12772
desc->dtpd_provider, desc->dtpd_mod,
12773
desc->dtpd_func, desc->dtpd_name);
12774
#else
12775
cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
12776
desc->dtpd_provider, desc->dtpd_mod,
12777
desc->dtpd_func, desc->dtpd_name);
12778
#endif
12779
}
12780
}
12781
12782
static void
12783
dtrace_enabling_destroy(dtrace_enabling_t *enab)
12784
{
12785
int i;
12786
dtrace_ecbdesc_t *ep;
12787
dtrace_vstate_t *vstate = enab->dten_vstate;
12788
12789
ASSERT(MUTEX_HELD(&dtrace_lock));
12790
12791
for (i = 0; i < enab->dten_ndesc; i++) {
12792
dtrace_actdesc_t *act, *next;
12793
dtrace_predicate_t *pred;
12794
12795
ep = enab->dten_desc[i];
12796
12797
if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
12798
dtrace_predicate_release(pred, vstate);
12799
12800
for (act = ep->dted_action; act != NULL; act = next) {
12801
next = act->dtad_next;
12802
dtrace_actdesc_release(act, vstate);
12803
}
12804
12805
kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12806
}
12807
12808
if (enab->dten_desc != NULL)
12809
kmem_free(enab->dten_desc,
12810
enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
12811
12812
/*
12813
* If this was a retained enabling, decrement the dts_nretained count
12814
* and take it off of the dtrace_retained list.
12815
*/
12816
if (enab->dten_prev != NULL || enab->dten_next != NULL ||
12817
dtrace_retained == enab) {
12818
ASSERT(enab->dten_vstate->dtvs_state != NULL);
12819
ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
12820
enab->dten_vstate->dtvs_state->dts_nretained--;
12821
dtrace_retained_gen++;
12822
}
12823
12824
if (enab->dten_prev == NULL) {
12825
if (dtrace_retained == enab) {
12826
dtrace_retained = enab->dten_next;
12827
12828
if (dtrace_retained != NULL)
12829
dtrace_retained->dten_prev = NULL;
12830
}
12831
} else {
12832
ASSERT(enab != dtrace_retained);
12833
ASSERT(dtrace_retained != NULL);
12834
enab->dten_prev->dten_next = enab->dten_next;
12835
}
12836
12837
if (enab->dten_next != NULL) {
12838
ASSERT(dtrace_retained != NULL);
12839
enab->dten_next->dten_prev = enab->dten_prev;
12840
}
12841
12842
kmem_free(enab, sizeof (dtrace_enabling_t));
12843
}
12844
12845
static int
12846
dtrace_enabling_retain(dtrace_enabling_t *enab)
12847
{
12848
dtrace_state_t *state;
12849
12850
ASSERT(MUTEX_HELD(&dtrace_lock));
12851
ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
12852
ASSERT(enab->dten_vstate != NULL);
12853
12854
state = enab->dten_vstate->dtvs_state;
12855
ASSERT(state != NULL);
12856
12857
/*
12858
* We only allow each state to retain dtrace_retain_max enablings.
12859
*/
12860
if (state->dts_nretained >= dtrace_retain_max)
12861
return (ENOSPC);
12862
12863
state->dts_nretained++;
12864
dtrace_retained_gen++;
12865
12866
if (dtrace_retained == NULL) {
12867
dtrace_retained = enab;
12868
return (0);
12869
}
12870
12871
enab->dten_next = dtrace_retained;
12872
dtrace_retained->dten_prev = enab;
12873
dtrace_retained = enab;
12874
12875
return (0);
12876
}
12877
12878
static int
12879
dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
12880
dtrace_probedesc_t *create)
12881
{
12882
dtrace_enabling_t *new, *enab;
12883
int found = 0, err = ENOENT;
12884
12885
ASSERT(MUTEX_HELD(&dtrace_lock));
12886
ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
12887
ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
12888
ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
12889
ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
12890
12891
new = dtrace_enabling_create(&state->dts_vstate);
12892
12893
/*
12894
* Iterate over all retained enablings, looking for enablings that
12895
* match the specified state.
12896
*/
12897
for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
12898
int i;
12899
12900
/*
12901
* dtvs_state can only be NULL for helper enablings -- and
12902
* helper enablings can't be retained.
12903
*/
12904
ASSERT(enab->dten_vstate->dtvs_state != NULL);
12905
12906
if (enab->dten_vstate->dtvs_state != state)
12907
continue;
12908
12909
/*
12910
* Now iterate over each probe description; we're looking for
12911
* an exact match to the specified probe description.
12912
*/
12913
for (i = 0; i < enab->dten_ndesc; i++) {
12914
dtrace_ecbdesc_t *ep = enab->dten_desc[i];
12915
dtrace_probedesc_t *pd = &ep->dted_probe;
12916
12917
if (strcmp(pd->dtpd_provider, match->dtpd_provider))
12918
continue;
12919
12920
if (strcmp(pd->dtpd_mod, match->dtpd_mod))
12921
continue;
12922
12923
if (strcmp(pd->dtpd_func, match->dtpd_func))
12924
continue;
12925
12926
if (strcmp(pd->dtpd_name, match->dtpd_name))
12927
continue;
12928
12929
/*
12930
* We have a winning probe! Add it to our growing
12931
* enabling.
12932
*/
12933
found = 1;
12934
dtrace_enabling_addlike(new, ep, create);
12935
}
12936
}
12937
12938
if (!found || (err = dtrace_enabling_retain(new)) != 0) {
12939
dtrace_enabling_destroy(new);
12940
return (err);
12941
}
12942
12943
return (0);
12944
}
12945
12946
static void
12947
dtrace_enabling_retract(dtrace_state_t *state)
12948
{
12949
dtrace_enabling_t *enab, *next;
12950
12951
ASSERT(MUTEX_HELD(&dtrace_lock));
12952
12953
/*
12954
* Iterate over all retained enablings, destroy the enablings retained
12955
* for the specified state.
12956
*/
12957
for (enab = dtrace_retained; enab != NULL; enab = next) {
12958
next = enab->dten_next;
12959
12960
/*
12961
* dtvs_state can only be NULL for helper enablings -- and
12962
* helper enablings can't be retained.
12963
*/
12964
ASSERT(enab->dten_vstate->dtvs_state != NULL);
12965
12966
if (enab->dten_vstate->dtvs_state == state) {
12967
ASSERT(state->dts_nretained > 0);
12968
dtrace_enabling_destroy(enab);
12969
}
12970
}
12971
12972
ASSERT(state->dts_nretained == 0);
12973
}
12974
12975
static int
12976
dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
12977
{
12978
int i = 0;
12979
int matched = 0;
12980
12981
ASSERT(MUTEX_HELD(&cpu_lock));
12982
ASSERT(MUTEX_HELD(&dtrace_lock));
12983
12984
for (i = 0; i < enab->dten_ndesc; i++) {
12985
dtrace_ecbdesc_t *ep = enab->dten_desc[i];
12986
12987
enab->dten_current = ep;
12988
enab->dten_error = 0;
12989
12990
matched += dtrace_probe_enable(&ep->dted_probe, enab);
12991
12992
if (enab->dten_error != 0) {
12993
/*
12994
* If we get an error half-way through enabling the
12995
* probes, we kick out -- perhaps with some number of
12996
* them enabled. Leaving enabled probes enabled may
12997
* be slightly confusing for user-level, but we expect
12998
* that no one will attempt to actually drive on in
12999
* the face of such errors. If this is an anonymous
13000
* enabling (indicated with a NULL nmatched pointer),
13001
* we cmn_err() a message. We aren't expecting to
13002
* get such an error -- such as it can exist at all,
13003
* it would be a result of corrupted DOF in the driver
13004
* properties.
13005
*/
13006
if (nmatched == NULL) {
13007
cmn_err(CE_WARN, "dtrace_enabling_match() "
13008
"error on %p: %d", (void *)ep,
13009
enab->dten_error);
13010
}
13011
13012
return (enab->dten_error);
13013
}
13014
}
13015
13016
enab->dten_probegen = dtrace_probegen;
13017
if (nmatched != NULL)
13018
*nmatched = matched;
13019
13020
return (0);
13021
}
13022
13023
static void
13024
dtrace_enabling_matchall_task(void *args __unused)
13025
{
13026
dtrace_enabling_matchall();
13027
}
13028
13029
static void
13030
dtrace_enabling_matchall(void)
13031
{
13032
dtrace_enabling_t *enab;
13033
13034
mutex_enter(&cpu_lock);
13035
mutex_enter(&dtrace_lock);
13036
13037
/*
13038
* Iterate over all retained enablings to see if any probes match
13039
* against them. We only perform this operation on enablings for which
13040
* we have sufficient permissions by virtue of being in the global zone
13041
* or in the same zone as the DTrace client. Because we can be called
13042
* after dtrace_detach() has been called, we cannot assert that there
13043
* are retained enablings. We can safely load from dtrace_retained,
13044
* however: the taskq_destroy() at the end of dtrace_detach() will
13045
* block pending our completion.
13046
*/
13047
for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
13048
#ifdef illumos
13049
cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
13050
13051
if (INGLOBALZONE(curproc) ||
13052
cr != NULL && getzoneid() == crgetzoneid(cr))
13053
#endif
13054
(void) dtrace_enabling_match(enab, NULL);
13055
}
13056
13057
mutex_exit(&dtrace_lock);
13058
mutex_exit(&cpu_lock);
13059
}
13060
13061
/*
13062
* If an enabling is to be enabled without having matched probes (that is, if
13063
* dtrace_state_go() is to be called on the underlying dtrace_state_t), the
13064
* enabling must be _primed_ by creating an ECB for every ECB description.
13065
* This must be done to assure that we know the number of speculations, the
13066
* number of aggregations, the minimum buffer size needed, etc. before we
13067
* transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
13068
* enabling any probes, we create ECBs for every ECB decription, but with a
13069
* NULL probe -- which is exactly what this function does.
13070
*/
13071
static void
13072
dtrace_enabling_prime(dtrace_state_t *state)
13073
{
13074
dtrace_enabling_t *enab;
13075
int i;
13076
13077
for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
13078
ASSERT(enab->dten_vstate->dtvs_state != NULL);
13079
13080
if (enab->dten_vstate->dtvs_state != state)
13081
continue;
13082
13083
/*
13084
* We don't want to prime an enabling more than once, lest
13085
* we allow a malicious user to induce resource exhaustion.
13086
* (The ECBs that result from priming an enabling aren't
13087
* leaked -- but they also aren't deallocated until the
13088
* consumer state is destroyed.)
13089
*/
13090
if (enab->dten_primed)
13091
continue;
13092
13093
for (i = 0; i < enab->dten_ndesc; i++) {
13094
enab->dten_current = enab->dten_desc[i];
13095
(void) dtrace_probe_enable(NULL, enab);
13096
}
13097
13098
enab->dten_primed = 1;
13099
}
13100
}
13101
13102
/*
13103
* Called to indicate that probes should be provided due to retained
13104
* enablings. This is implemented in terms of dtrace_probe_provide(), but it
13105
* must take an initial lap through the enabling calling the dtps_provide()
13106
* entry point explicitly to allow for autocreated probes.
13107
*/
13108
static void
13109
dtrace_enabling_provide(dtrace_provider_t *prv)
13110
{
13111
int i, all = 0;
13112
dtrace_probedesc_t desc;
13113
dtrace_genid_t gen;
13114
13115
ASSERT(MUTEX_HELD(&dtrace_lock));
13116
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
13117
13118
if (prv == NULL) {
13119
all = 1;
13120
prv = dtrace_provider;
13121
}
13122
13123
do {
13124
dtrace_enabling_t *enab;
13125
void *parg = prv->dtpv_arg;
13126
13127
retry:
13128
gen = dtrace_retained_gen;
13129
for (enab = dtrace_retained; enab != NULL;
13130
enab = enab->dten_next) {
13131
for (i = 0; i < enab->dten_ndesc; i++) {
13132
desc = enab->dten_desc[i]->dted_probe;
13133
mutex_exit(&dtrace_lock);
13134
prv->dtpv_pops.dtps_provide(parg, &desc);
13135
mutex_enter(&dtrace_lock);
13136
/*
13137
* Process the retained enablings again if
13138
* they have changed while we weren't holding
13139
* dtrace_lock.
13140
*/
13141
if (gen != dtrace_retained_gen)
13142
goto retry;
13143
}
13144
}
13145
} while (all && (prv = prv->dtpv_next) != NULL);
13146
13147
mutex_exit(&dtrace_lock);
13148
dtrace_probe_provide(NULL, all ? NULL : prv);
13149
mutex_enter(&dtrace_lock);
13150
}
13151
13152
/*
13153
* Called to reap ECBs that are attached to probes from defunct providers.
13154
*/
13155
static void
13156
dtrace_enabling_reap(void *args __unused)
13157
{
13158
dtrace_provider_t *prov;
13159
dtrace_probe_t *probe;
13160
dtrace_ecb_t *ecb;
13161
hrtime_t when;
13162
int i;
13163
13164
mutex_enter(&cpu_lock);
13165
mutex_enter(&dtrace_lock);
13166
13167
for (i = 0; i < dtrace_nprobes; i++) {
13168
if ((probe = dtrace_probes[i]) == NULL)
13169
continue;
13170
13171
if (probe->dtpr_ecb == NULL)
13172
continue;
13173
13174
prov = probe->dtpr_provider;
13175
13176
if ((when = prov->dtpv_defunct) == 0)
13177
continue;
13178
13179
/*
13180
* We have ECBs on a defunct provider: we want to reap these
13181
* ECBs to allow the provider to unregister. The destruction
13182
* of these ECBs must be done carefully: if we destroy the ECB
13183
* and the consumer later wishes to consume an EPID that
13184
* corresponds to the destroyed ECB (and if the EPID metadata
13185
* has not been previously consumed), the consumer will abort
13186
* processing on the unknown EPID. To reduce (but not, sadly,
13187
* eliminate) the possibility of this, we will only destroy an
13188
* ECB for a defunct provider if, for the state that
13189
* corresponds to the ECB:
13190
*
13191
* (a) There is no speculative tracing (which can effectively
13192
* cache an EPID for an arbitrary amount of time).
13193
*
13194
* (b) The principal buffers have been switched twice since the
13195
* provider became defunct.
13196
*
13197
* (c) The aggregation buffers are of zero size or have been
13198
* switched twice since the provider became defunct.
13199
*
13200
* We use dts_speculates to determine (a) and call a function
13201
* (dtrace_buffer_consumed()) to determine (b) and (c). Note
13202
* that as soon as we've been unable to destroy one of the ECBs
13203
* associated with the probe, we quit trying -- reaping is only
13204
* fruitful in as much as we can destroy all ECBs associated
13205
* with the defunct provider's probes.
13206
*/
13207
while ((ecb = probe->dtpr_ecb) != NULL) {
13208
dtrace_state_t *state = ecb->dte_state;
13209
dtrace_buffer_t *buf = state->dts_buffer;
13210
dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
13211
13212
if (state->dts_speculates)
13213
break;
13214
13215
if (!dtrace_buffer_consumed(buf, when))
13216
break;
13217
13218
if (!dtrace_buffer_consumed(aggbuf, when))
13219
break;
13220
13221
dtrace_ecb_disable(ecb);
13222
ASSERT(probe->dtpr_ecb != ecb);
13223
dtrace_ecb_destroy(ecb);
13224
}
13225
}
13226
13227
mutex_exit(&dtrace_lock);
13228
mutex_exit(&cpu_lock);
13229
}
13230
13231
/*
13232
* DTrace DOF Functions
13233
*/
13234
/*ARGSUSED*/
13235
static void
13236
dtrace_dof_error(dof_hdr_t *dof, const char *str)
13237
{
13238
if (dtrace_err_verbose)
13239
cmn_err(CE_WARN, "failed to process DOF: %s", str);
13240
13241
#ifdef DTRACE_ERRDEBUG
13242
dtrace_errdebug(str);
13243
#endif
13244
}
13245
13246
/*
13247
* Create DOF out of a currently enabled state. Right now, we only create
13248
* DOF containing the run-time options -- but this could be expanded to create
13249
* complete DOF representing the enabled state.
13250
*/
13251
static dof_hdr_t *
13252
dtrace_dof_create(dtrace_state_t *state)
13253
{
13254
dof_hdr_t *dof;
13255
dof_sec_t *sec;
13256
dof_optdesc_t *opt;
13257
int i, len = sizeof (dof_hdr_t) +
13258
roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
13259
sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
13260
13261
ASSERT(MUTEX_HELD(&dtrace_lock));
13262
13263
dof = kmem_zalloc(len, KM_SLEEP);
13264
dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
13265
dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
13266
dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
13267
dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
13268
13269
dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
13270
dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
13271
dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
13272
dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
13273
dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
13274
dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
13275
13276
dof->dofh_flags = 0;
13277
dof->dofh_hdrsize = sizeof (dof_hdr_t);
13278
dof->dofh_secsize = sizeof (dof_sec_t);
13279
dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
13280
dof->dofh_secoff = sizeof (dof_hdr_t);
13281
dof->dofh_loadsz = len;
13282
dof->dofh_filesz = len;
13283
dof->dofh_pad = 0;
13284
13285
/*
13286
* Fill in the option section header...
13287
*/
13288
sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
13289
sec->dofs_type = DOF_SECT_OPTDESC;
13290
sec->dofs_align = sizeof (uint64_t);
13291
sec->dofs_flags = DOF_SECF_LOAD;
13292
sec->dofs_entsize = sizeof (dof_optdesc_t);
13293
13294
opt = (dof_optdesc_t *)((uintptr_t)sec +
13295
roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
13296
13297
sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
13298
sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
13299
13300
for (i = 0; i < DTRACEOPT_MAX; i++) {
13301
opt[i].dofo_option = i;
13302
opt[i].dofo_strtab = DOF_SECIDX_NONE;
13303
opt[i].dofo_value = state->dts_options[i];
13304
}
13305
13306
return (dof);
13307
}
13308
13309
static dof_hdr_t *
13310
dtrace_dof_copyin(uintptr_t uarg, int *errp)
13311
{
13312
dof_hdr_t hdr, *dof;
13313
13314
ASSERT(!MUTEX_HELD(&dtrace_lock));
13315
13316
/*
13317
* First, we're going to copyin() the sizeof (dof_hdr_t).
13318
*/
13319
if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
13320
dtrace_dof_error(NULL, "failed to copyin DOF header");
13321
*errp = EFAULT;
13322
return (NULL);
13323
}
13324
13325
/*
13326
* Now we'll allocate the entire DOF and copy it in -- provided
13327
* that the length isn't outrageous.
13328
*/
13329
if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
13330
dtrace_dof_error(&hdr, "load size exceeds maximum");
13331
*errp = E2BIG;
13332
return (NULL);
13333
}
13334
13335
if (hdr.dofh_loadsz < sizeof (hdr)) {
13336
dtrace_dof_error(&hdr, "invalid load size");
13337
*errp = EINVAL;
13338
return (NULL);
13339
}
13340
13341
dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
13342
13343
if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
13344
dof->dofh_loadsz != hdr.dofh_loadsz) {
13345
kmem_free(dof, hdr.dofh_loadsz);
13346
*errp = EFAULT;
13347
return (NULL);
13348
}
13349
13350
return (dof);
13351
}
13352
13353
#ifdef __FreeBSD__
13354
static dof_hdr_t *
13355
dtrace_dof_copyin_proc(struct proc *p, uintptr_t uarg, int *errp)
13356
{
13357
dof_hdr_t hdr, *dof;
13358
struct thread *td;
13359
size_t loadsz;
13360
13361
ASSERT(!MUTEX_HELD(&dtrace_lock));
13362
13363
td = curthread;
13364
13365
/*
13366
* First, we're going to copyin() the sizeof (dof_hdr_t).
13367
*/
13368
if (proc_readmem(td, p, uarg, &hdr, sizeof(hdr)) != sizeof(hdr)) {
13369
dtrace_dof_error(NULL, "failed to copyin DOF header");
13370
*errp = EFAULT;
13371
return (NULL);
13372
}
13373
13374
/*
13375
* Now we'll allocate the entire DOF and copy it in -- provided
13376
* that the length isn't outrageous.
13377
*/
13378
if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
13379
dtrace_dof_error(&hdr, "load size exceeds maximum");
13380
*errp = E2BIG;
13381
return (NULL);
13382
}
13383
loadsz = (size_t)hdr.dofh_loadsz;
13384
13385
if (loadsz < sizeof (hdr)) {
13386
dtrace_dof_error(&hdr, "invalid load size");
13387
*errp = EINVAL;
13388
return (NULL);
13389
}
13390
13391
dof = kmem_alloc(loadsz, KM_SLEEP);
13392
13393
if (proc_readmem(td, p, uarg, dof, loadsz) != loadsz ||
13394
dof->dofh_loadsz != loadsz) {
13395
kmem_free(dof, hdr.dofh_loadsz);
13396
*errp = EFAULT;
13397
return (NULL);
13398
}
13399
13400
return (dof);
13401
}
13402
13403
static __inline uchar_t
13404
dtrace_dof_char(char c)
13405
{
13406
13407
switch (c) {
13408
case '0':
13409
case '1':
13410
case '2':
13411
case '3':
13412
case '4':
13413
case '5':
13414
case '6':
13415
case '7':
13416
case '8':
13417
case '9':
13418
return (c - '0');
13419
case 'A':
13420
case 'B':
13421
case 'C':
13422
case 'D':
13423
case 'E':
13424
case 'F':
13425
return (c - 'A' + 10);
13426
case 'a':
13427
case 'b':
13428
case 'c':
13429
case 'd':
13430
case 'e':
13431
case 'f':
13432
return (c - 'a' + 10);
13433
}
13434
/* Should not reach here. */
13435
return (UCHAR_MAX);
13436
}
13437
#endif /* __FreeBSD__ */
13438
13439
static dof_hdr_t *
13440
dtrace_dof_property(const char *name)
13441
{
13442
#ifdef __FreeBSD__
13443
uint8_t *dofbuf;
13444
u_char *data, *eol;
13445
caddr_t doffile;
13446
size_t bytes, len, i;
13447
dof_hdr_t *dof;
13448
u_char c1, c2;
13449
13450
dof = NULL;
13451
13452
doffile = preload_search_by_type("dtrace_dof");
13453
if (doffile == NULL)
13454
return (NULL);
13455
13456
data = preload_fetch_addr(doffile);
13457
len = preload_fetch_size(doffile);
13458
for (;;) {
13459
/* Look for the end of the line. All lines end in a newline. */
13460
eol = memchr(data, '\n', len);
13461
if (eol == NULL)
13462
return (NULL);
13463
13464
if (strncmp(name, data, strlen(name)) == 0)
13465
break;
13466
13467
eol++; /* skip past the newline */
13468
len -= eol - data;
13469
data = eol;
13470
}
13471
13472
/* We've found the data corresponding to the specified key. */
13473
13474
data += strlen(name) + 1; /* skip past the '=' */
13475
len = eol - data;
13476
if (len % 2 != 0) {
13477
dtrace_dof_error(NULL, "invalid DOF encoding length");
13478
goto doferr;
13479
}
13480
bytes = len / 2;
13481
if (bytes < sizeof(dof_hdr_t)) {
13482
dtrace_dof_error(NULL, "truncated header");
13483
goto doferr;
13484
}
13485
13486
/*
13487
* Each byte is represented by the two ASCII characters in its hex
13488
* representation.
13489
*/
13490
dofbuf = malloc(bytes, M_SOLARIS, M_WAITOK);
13491
for (i = 0; i < bytes; i++) {
13492
c1 = dtrace_dof_char(data[i * 2]);
13493
c2 = dtrace_dof_char(data[i * 2 + 1]);
13494
if (c1 == UCHAR_MAX || c2 == UCHAR_MAX) {
13495
dtrace_dof_error(NULL, "invalid hex char in DOF");
13496
goto doferr;
13497
}
13498
dofbuf[i] = c1 * 16 + c2;
13499
}
13500
13501
dof = (dof_hdr_t *)dofbuf;
13502
if (bytes < dof->dofh_loadsz) {
13503
dtrace_dof_error(NULL, "truncated DOF");
13504
goto doferr;
13505
}
13506
13507
if (dof->dofh_loadsz >= dtrace_dof_maxsize) {
13508
dtrace_dof_error(NULL, "oversized DOF");
13509
goto doferr;
13510
}
13511
13512
return (dof);
13513
13514
doferr:
13515
free(dof, M_SOLARIS);
13516
return (NULL);
13517
#else /* __FreeBSD__ */
13518
uchar_t *buf;
13519
uint64_t loadsz;
13520
unsigned int len, i;
13521
dof_hdr_t *dof;
13522
13523
/*
13524
* Unfortunately, array of values in .conf files are always (and
13525
* only) interpreted to be integer arrays. We must read our DOF
13526
* as an integer array, and then squeeze it into a byte array.
13527
*/
13528
if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
13529
(char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
13530
return (NULL);
13531
13532
for (i = 0; i < len; i++)
13533
buf[i] = (uchar_t)(((int *)buf)[i]);
13534
13535
if (len < sizeof (dof_hdr_t)) {
13536
ddi_prop_free(buf);
13537
dtrace_dof_error(NULL, "truncated header");
13538
return (NULL);
13539
}
13540
13541
if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
13542
ddi_prop_free(buf);
13543
dtrace_dof_error(NULL, "truncated DOF");
13544
return (NULL);
13545
}
13546
13547
if (loadsz >= dtrace_dof_maxsize) {
13548
ddi_prop_free(buf);
13549
dtrace_dof_error(NULL, "oversized DOF");
13550
return (NULL);
13551
}
13552
13553
dof = kmem_alloc(loadsz, KM_SLEEP);
13554
bcopy(buf, dof, loadsz);
13555
ddi_prop_free(buf);
13556
13557
return (dof);
13558
#endif /* !__FreeBSD__ */
13559
}
13560
13561
static void
13562
dtrace_dof_destroy(dof_hdr_t *dof)
13563
{
13564
kmem_free(dof, dof->dofh_loadsz);
13565
}
13566
13567
/*
13568
* Return the dof_sec_t pointer corresponding to a given section index. If the
13569
* index is not valid, dtrace_dof_error() is called and NULL is returned. If
13570
* a type other than DOF_SECT_NONE is specified, the header is checked against
13571
* this type and NULL is returned if the types do not match.
13572
*/
13573
static dof_sec_t *
13574
dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
13575
{
13576
dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
13577
((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
13578
13579
if (i >= dof->dofh_secnum) {
13580
dtrace_dof_error(dof, "referenced section index is invalid");
13581
return (NULL);
13582
}
13583
13584
if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
13585
dtrace_dof_error(dof, "referenced section is not loadable");
13586
return (NULL);
13587
}
13588
13589
if (type != DOF_SECT_NONE && type != sec->dofs_type) {
13590
dtrace_dof_error(dof, "referenced section is the wrong type");
13591
return (NULL);
13592
}
13593
13594
return (sec);
13595
}
13596
13597
static dtrace_probedesc_t *
13598
dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
13599
{
13600
dof_probedesc_t *probe;
13601
dof_sec_t *strtab;
13602
uintptr_t daddr = (uintptr_t)dof;
13603
uintptr_t str;
13604
size_t size;
13605
13606
if (sec->dofs_type != DOF_SECT_PROBEDESC) {
13607
dtrace_dof_error(dof, "invalid probe section");
13608
return (NULL);
13609
}
13610
13611
if (sec->dofs_align != sizeof (dof_secidx_t)) {
13612
dtrace_dof_error(dof, "bad alignment in probe description");
13613
return (NULL);
13614
}
13615
13616
if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
13617
dtrace_dof_error(dof, "truncated probe description");
13618
return (NULL);
13619
}
13620
13621
probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
13622
strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
13623
13624
if (strtab == NULL)
13625
return (NULL);
13626
13627
str = daddr + strtab->dofs_offset;
13628
size = strtab->dofs_size;
13629
13630
if (probe->dofp_provider >= strtab->dofs_size) {
13631
dtrace_dof_error(dof, "corrupt probe provider");
13632
return (NULL);
13633
}
13634
13635
(void) strncpy(desc->dtpd_provider,
13636
(char *)(str + probe->dofp_provider),
13637
MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
13638
13639
if (probe->dofp_mod >= strtab->dofs_size) {
13640
dtrace_dof_error(dof, "corrupt probe module");
13641
return (NULL);
13642
}
13643
13644
(void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
13645
MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
13646
13647
if (probe->dofp_func >= strtab->dofs_size) {
13648
dtrace_dof_error(dof, "corrupt probe function");
13649
return (NULL);
13650
}
13651
13652
(void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
13653
MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
13654
13655
if (probe->dofp_name >= strtab->dofs_size) {
13656
dtrace_dof_error(dof, "corrupt probe name");
13657
return (NULL);
13658
}
13659
13660
(void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
13661
MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
13662
13663
return (desc);
13664
}
13665
13666
static dtrace_difo_t *
13667
dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
13668
cred_t *cr)
13669
{
13670
dtrace_difo_t *dp;
13671
size_t ttl = 0;
13672
dof_difohdr_t *dofd;
13673
uintptr_t daddr = (uintptr_t)dof;
13674
size_t max = dtrace_difo_maxsize;
13675
int i, l, n;
13676
13677
static const struct {
13678
int section;
13679
int bufoffs;
13680
int lenoffs;
13681
int entsize;
13682
int align;
13683
const char *msg;
13684
} difo[] = {
13685
{ DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
13686
offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
13687
sizeof (dif_instr_t), "multiple DIF sections" },
13688
13689
{ DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
13690
offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
13691
sizeof (uint64_t), "multiple integer tables" },
13692
13693
{ DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
13694
offsetof(dtrace_difo_t, dtdo_strlen), 0,
13695
sizeof (char), "multiple string tables" },
13696
13697
{ DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
13698
offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
13699
sizeof (uint_t), "multiple variable tables" },
13700
13701
{ DOF_SECT_NONE, 0, 0, 0, 0, NULL }
13702
};
13703
13704
if (sec->dofs_type != DOF_SECT_DIFOHDR) {
13705
dtrace_dof_error(dof, "invalid DIFO header section");
13706
return (NULL);
13707
}
13708
13709
if (sec->dofs_align != sizeof (dof_secidx_t)) {
13710
dtrace_dof_error(dof, "bad alignment in DIFO header");
13711
return (NULL);
13712
}
13713
13714
if (sec->dofs_size < sizeof (dof_difohdr_t) ||
13715
sec->dofs_size % sizeof (dof_secidx_t)) {
13716
dtrace_dof_error(dof, "bad size in DIFO header");
13717
return (NULL);
13718
}
13719
13720
dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
13721
n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
13722
13723
dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
13724
dp->dtdo_rtype = dofd->dofd_rtype;
13725
13726
for (l = 0; l < n; l++) {
13727
dof_sec_t *subsec;
13728
void **bufp;
13729
uint32_t *lenp;
13730
13731
if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
13732
dofd->dofd_links[l])) == NULL)
13733
goto err; /* invalid section link */
13734
13735
if (ttl + subsec->dofs_size > max) {
13736
dtrace_dof_error(dof, "exceeds maximum size");
13737
goto err;
13738
}
13739
13740
ttl += subsec->dofs_size;
13741
13742
for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
13743
if (subsec->dofs_type != difo[i].section)
13744
continue;
13745
13746
if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
13747
dtrace_dof_error(dof, "section not loaded");
13748
goto err;
13749
}
13750
13751
if (subsec->dofs_align != difo[i].align) {
13752
dtrace_dof_error(dof, "bad alignment");
13753
goto err;
13754
}
13755
13756
bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
13757
lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
13758
13759
if (*bufp != NULL) {
13760
dtrace_dof_error(dof, difo[i].msg);
13761
goto err;
13762
}
13763
13764
if (difo[i].entsize != subsec->dofs_entsize) {
13765
dtrace_dof_error(dof, "entry size mismatch");
13766
goto err;
13767
}
13768
13769
if (subsec->dofs_entsize != 0 &&
13770
(subsec->dofs_size % subsec->dofs_entsize) != 0) {
13771
dtrace_dof_error(dof, "corrupt entry size");
13772
goto err;
13773
}
13774
13775
*lenp = subsec->dofs_size;
13776
*bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
13777
bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
13778
*bufp, subsec->dofs_size);
13779
13780
if (subsec->dofs_entsize != 0)
13781
*lenp /= subsec->dofs_entsize;
13782
13783
break;
13784
}
13785
13786
/*
13787
* If we encounter a loadable DIFO sub-section that is not
13788
* known to us, assume this is a broken program and fail.
13789
*/
13790
if (difo[i].section == DOF_SECT_NONE &&
13791
(subsec->dofs_flags & DOF_SECF_LOAD)) {
13792
dtrace_dof_error(dof, "unrecognized DIFO subsection");
13793
goto err;
13794
}
13795
}
13796
13797
if (dp->dtdo_buf == NULL) {
13798
/*
13799
* We can't have a DIF object without DIF text.
13800
*/
13801
dtrace_dof_error(dof, "missing DIF text");
13802
goto err;
13803
}
13804
13805
/*
13806
* Before we validate the DIF object, run through the variable table
13807
* looking for the strings -- if any of their size are under, we'll set
13808
* their size to be the system-wide default string size. Note that
13809
* this should _not_ happen if the "strsize" option has been set --
13810
* in this case, the compiler should have set the size to reflect the
13811
* setting of the option.
13812
*/
13813
for (i = 0; i < dp->dtdo_varlen; i++) {
13814
dtrace_difv_t *v = &dp->dtdo_vartab[i];
13815
dtrace_diftype_t *t = &v->dtdv_type;
13816
13817
if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
13818
continue;
13819
13820
if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
13821
t->dtdt_size = dtrace_strsize_default;
13822
}
13823
13824
if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
13825
goto err;
13826
13827
dtrace_difo_init(dp, vstate);
13828
return (dp);
13829
13830
err:
13831
kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
13832
kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
13833
kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
13834
kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
13835
13836
kmem_free(dp, sizeof (dtrace_difo_t));
13837
return (NULL);
13838
}
13839
13840
static dtrace_predicate_t *
13841
dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
13842
cred_t *cr)
13843
{
13844
dtrace_difo_t *dp;
13845
13846
if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
13847
return (NULL);
13848
13849
return (dtrace_predicate_create(dp));
13850
}
13851
13852
static dtrace_actdesc_t *
13853
dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
13854
cred_t *cr)
13855
{
13856
dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
13857
dof_actdesc_t *desc;
13858
dof_sec_t *difosec;
13859
size_t offs;
13860
uintptr_t daddr = (uintptr_t)dof;
13861
uint64_t arg;
13862
dtrace_actkind_t kind;
13863
13864
if (sec->dofs_type != DOF_SECT_ACTDESC) {
13865
dtrace_dof_error(dof, "invalid action section");
13866
return (NULL);
13867
}
13868
13869
if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
13870
dtrace_dof_error(dof, "truncated action description");
13871
return (NULL);
13872
}
13873
13874
if (sec->dofs_align != sizeof (uint64_t)) {
13875
dtrace_dof_error(dof, "bad alignment in action description");
13876
return (NULL);
13877
}
13878
13879
if (sec->dofs_size < sec->dofs_entsize) {
13880
dtrace_dof_error(dof, "section entry size exceeds total size");
13881
return (NULL);
13882
}
13883
13884
if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
13885
dtrace_dof_error(dof, "bad entry size in action description");
13886
return (NULL);
13887
}
13888
13889
if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
13890
dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
13891
return (NULL);
13892
}
13893
13894
for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
13895
desc = (dof_actdesc_t *)(daddr +
13896
(uintptr_t)sec->dofs_offset + offs);
13897
kind = (dtrace_actkind_t)desc->dofa_kind;
13898
13899
if ((DTRACEACT_ISPRINTFLIKE(kind) &&
13900
(kind != DTRACEACT_PRINTA ||
13901
desc->dofa_strtab != DOF_SECIDX_NONE)) ||
13902
(kind == DTRACEACT_DIFEXPR &&
13903
desc->dofa_strtab != DOF_SECIDX_NONE)) {
13904
dof_sec_t *strtab;
13905
char *str, *fmt;
13906
uint64_t i;
13907
13908
/*
13909
* The argument to these actions is an index into the
13910
* DOF string table. For printf()-like actions, this
13911
* is the format string. For print(), this is the
13912
* CTF type of the expression result.
13913
*/
13914
if ((strtab = dtrace_dof_sect(dof,
13915
DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
13916
goto err;
13917
13918
str = (char *)((uintptr_t)dof +
13919
(uintptr_t)strtab->dofs_offset);
13920
13921
for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
13922
if (str[i] == '\0')
13923
break;
13924
}
13925
13926
if (i >= strtab->dofs_size) {
13927
dtrace_dof_error(dof, "bogus format string");
13928
goto err;
13929
}
13930
13931
if (i == desc->dofa_arg) {
13932
dtrace_dof_error(dof, "empty format string");
13933
goto err;
13934
}
13935
13936
i -= desc->dofa_arg;
13937
fmt = kmem_alloc(i + 1, KM_SLEEP);
13938
bcopy(&str[desc->dofa_arg], fmt, i + 1);
13939
arg = (uint64_t)(uintptr_t)fmt;
13940
} else {
13941
if (kind == DTRACEACT_PRINTA) {
13942
ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
13943
arg = 0;
13944
} else {
13945
arg = desc->dofa_arg;
13946
}
13947
}
13948
13949
act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
13950
desc->dofa_uarg, arg);
13951
13952
if (last != NULL) {
13953
last->dtad_next = act;
13954
} else {
13955
first = act;
13956
}
13957
13958
last = act;
13959
13960
if (desc->dofa_difo == DOF_SECIDX_NONE)
13961
continue;
13962
13963
if ((difosec = dtrace_dof_sect(dof,
13964
DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
13965
goto err;
13966
13967
act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
13968
13969
if (act->dtad_difo == NULL)
13970
goto err;
13971
}
13972
13973
ASSERT(first != NULL);
13974
return (first);
13975
13976
err:
13977
for (act = first; act != NULL; act = next) {
13978
next = act->dtad_next;
13979
dtrace_actdesc_release(act, vstate);
13980
}
13981
13982
return (NULL);
13983
}
13984
13985
static dtrace_ecbdesc_t *
13986
dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
13987
cred_t *cr)
13988
{
13989
dtrace_ecbdesc_t *ep;
13990
dof_ecbdesc_t *ecb;
13991
dtrace_probedesc_t *desc;
13992
dtrace_predicate_t *pred = NULL;
13993
13994
if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
13995
dtrace_dof_error(dof, "truncated ECB description");
13996
return (NULL);
13997
}
13998
13999
if (sec->dofs_align != sizeof (uint64_t)) {
14000
dtrace_dof_error(dof, "bad alignment in ECB description");
14001
return (NULL);
14002
}
14003
14004
ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
14005
sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
14006
14007
if (sec == NULL)
14008
return (NULL);
14009
14010
ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
14011
ep->dted_uarg = ecb->dofe_uarg;
14012
desc = &ep->dted_probe;
14013
14014
if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
14015
goto err;
14016
14017
if (ecb->dofe_pred != DOF_SECIDX_NONE) {
14018
if ((sec = dtrace_dof_sect(dof,
14019
DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
14020
goto err;
14021
14022
if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
14023
goto err;
14024
14025
ep->dted_pred.dtpdd_predicate = pred;
14026
}
14027
14028
if (ecb->dofe_actions != DOF_SECIDX_NONE) {
14029
if ((sec = dtrace_dof_sect(dof,
14030
DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
14031
goto err;
14032
14033
ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
14034
14035
if (ep->dted_action == NULL)
14036
goto err;
14037
}
14038
14039
return (ep);
14040
14041
err:
14042
if (pred != NULL)
14043
dtrace_predicate_release(pred, vstate);
14044
kmem_free(ep, sizeof (dtrace_ecbdesc_t));
14045
return (NULL);
14046
}
14047
14048
/*
14049
* Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
14050
* specified DOF. SETX relocations are computed using 'ubase', the base load
14051
* address of the object containing the DOF, and DOFREL relocations are relative
14052
* to the relocation offset within the DOF.
14053
*/
14054
static int
14055
dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase,
14056
uint64_t udaddr)
14057
{
14058
uintptr_t daddr = (uintptr_t)dof;
14059
uintptr_t ts_end;
14060
dof_relohdr_t *dofr =
14061
(dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
14062
dof_sec_t *ss, *rs, *ts;
14063
dof_relodesc_t *r;
14064
uint_t i, n;
14065
14066
if (sec->dofs_size < sizeof (dof_relohdr_t) ||
14067
sec->dofs_align != sizeof (dof_secidx_t)) {
14068
dtrace_dof_error(dof, "invalid relocation header");
14069
return (-1);
14070
}
14071
14072
ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
14073
rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
14074
ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
14075
ts_end = (uintptr_t)ts + sizeof (dof_sec_t);
14076
14077
if (ss == NULL || rs == NULL || ts == NULL)
14078
return (-1); /* dtrace_dof_error() has been called already */
14079
14080
if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
14081
rs->dofs_align != sizeof (uint64_t)) {
14082
dtrace_dof_error(dof, "invalid relocation section");
14083
return (-1);
14084
}
14085
14086
r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
14087
n = rs->dofs_size / rs->dofs_entsize;
14088
14089
for (i = 0; i < n; i++) {
14090
uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
14091
14092
switch (r->dofr_type) {
14093
case DOF_RELO_NONE:
14094
break;
14095
case DOF_RELO_SETX:
14096
case DOF_RELO_DOFREL:
14097
if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
14098
sizeof (uint64_t) > ts->dofs_size) {
14099
dtrace_dof_error(dof, "bad relocation offset");
14100
return (-1);
14101
}
14102
14103
if (taddr >= (uintptr_t)ts && taddr < ts_end) {
14104
dtrace_dof_error(dof, "bad relocation offset");
14105
return (-1);
14106
}
14107
14108
if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
14109
dtrace_dof_error(dof, "misaligned setx relo");
14110
return (-1);
14111
}
14112
14113
if (r->dofr_type == DOF_RELO_SETX)
14114
*(uint64_t *)taddr += ubase;
14115
else
14116
*(uint64_t *)taddr +=
14117
udaddr + ts->dofs_offset + r->dofr_offset;
14118
break;
14119
default:
14120
dtrace_dof_error(dof, "invalid relocation type");
14121
return (-1);
14122
}
14123
14124
r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
14125
}
14126
14127
return (0);
14128
}
14129
14130
/*
14131
* The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
14132
* header: it should be at the front of a memory region that is at least
14133
* sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
14134
* size. It need not be validated in any other way.
14135
*/
14136
static int
14137
dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
14138
dtrace_enabling_t **enabp, uint64_t ubase, uint64_t udaddr, int noprobes)
14139
{
14140
uint64_t len = dof->dofh_loadsz, seclen;
14141
uintptr_t daddr = (uintptr_t)dof;
14142
dtrace_ecbdesc_t *ep;
14143
dtrace_enabling_t *enab;
14144
uint_t i;
14145
14146
ASSERT(MUTEX_HELD(&dtrace_lock));
14147
ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
14148
14149
/*
14150
* Check the DOF header identification bytes. In addition to checking
14151
* valid settings, we also verify that unused bits/bytes are zeroed so
14152
* we can use them later without fear of regressing existing binaries.
14153
*/
14154
if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
14155
DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
14156
dtrace_dof_error(dof, "DOF magic string mismatch");
14157
return (-1);
14158
}
14159
14160
if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
14161
dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
14162
dtrace_dof_error(dof, "DOF has invalid data model");
14163
return (-1);
14164
}
14165
14166
if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
14167
dtrace_dof_error(dof, "DOF encoding mismatch");
14168
return (-1);
14169
}
14170
14171
if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14172
dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
14173
dtrace_dof_error(dof, "DOF version mismatch");
14174
return (-1);
14175
}
14176
14177
if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
14178
dtrace_dof_error(dof, "DOF uses unsupported instruction set");
14179
return (-1);
14180
}
14181
14182
if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
14183
dtrace_dof_error(dof, "DOF uses too many integer registers");
14184
return (-1);
14185
}
14186
14187
if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
14188
dtrace_dof_error(dof, "DOF uses too many tuple registers");
14189
return (-1);
14190
}
14191
14192
for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
14193
if (dof->dofh_ident[i] != 0) {
14194
dtrace_dof_error(dof, "DOF has invalid ident byte set");
14195
return (-1);
14196
}
14197
}
14198
14199
if (dof->dofh_flags & ~DOF_FL_VALID) {
14200
dtrace_dof_error(dof, "DOF has invalid flag bits set");
14201
return (-1);
14202
}
14203
14204
if (dof->dofh_secsize == 0) {
14205
dtrace_dof_error(dof, "zero section header size");
14206
return (-1);
14207
}
14208
14209
/*
14210
* Check that the section headers don't exceed the amount of DOF
14211
* data. Note that we cast the section size and number of sections
14212
* to uint64_t's to prevent possible overflow in the multiplication.
14213
*/
14214
seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
14215
14216
if (dof->dofh_secoff > len || seclen > len ||
14217
dof->dofh_secoff + seclen > len) {
14218
dtrace_dof_error(dof, "truncated section headers");
14219
return (-1);
14220
}
14221
14222
if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
14223
dtrace_dof_error(dof, "misaligned section headers");
14224
return (-1);
14225
}
14226
14227
if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
14228
dtrace_dof_error(dof, "misaligned section size");
14229
return (-1);
14230
}
14231
14232
/*
14233
* Take an initial pass through the section headers to be sure that
14234
* the headers don't have stray offsets. If the 'noprobes' flag is
14235
* set, do not permit sections relating to providers, probes, or args.
14236
*/
14237
for (i = 0; i < dof->dofh_secnum; i++) {
14238
dof_sec_t *sec = (dof_sec_t *)(daddr +
14239
(uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
14240
14241
if (noprobes) {
14242
switch (sec->dofs_type) {
14243
case DOF_SECT_PROVIDER:
14244
case DOF_SECT_PROBES:
14245
case DOF_SECT_PRARGS:
14246
case DOF_SECT_PROFFS:
14247
dtrace_dof_error(dof, "illegal sections "
14248
"for enabling");
14249
return (-1);
14250
}
14251
}
14252
14253
if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
14254
!(sec->dofs_flags & DOF_SECF_LOAD)) {
14255
dtrace_dof_error(dof, "loadable section with load "
14256
"flag unset");
14257
return (-1);
14258
}
14259
14260
if (!(sec->dofs_flags & DOF_SECF_LOAD))
14261
continue; /* just ignore non-loadable sections */
14262
14263
if (!ISP2(sec->dofs_align)) {
14264
dtrace_dof_error(dof, "bad section alignment");
14265
return (-1);
14266
}
14267
14268
if (sec->dofs_offset & (sec->dofs_align - 1)) {
14269
dtrace_dof_error(dof, "misaligned section");
14270
return (-1);
14271
}
14272
14273
if (sec->dofs_offset > len || sec->dofs_size > len ||
14274
sec->dofs_offset + sec->dofs_size > len) {
14275
dtrace_dof_error(dof, "corrupt section header");
14276
return (-1);
14277
}
14278
14279
if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
14280
sec->dofs_offset + sec->dofs_size - 1) != '\0') {
14281
dtrace_dof_error(dof, "non-terminating string table");
14282
return (-1);
14283
}
14284
}
14285
14286
/*
14287
* Take a second pass through the sections and locate and perform any
14288
* relocations that are present. We do this after the first pass to
14289
* be sure that all sections have had their headers validated.
14290
*/
14291
for (i = 0; i < dof->dofh_secnum; i++) {
14292
dof_sec_t *sec = (dof_sec_t *)(daddr +
14293
(uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
14294
14295
if (!(sec->dofs_flags & DOF_SECF_LOAD))
14296
continue; /* skip sections that are not loadable */
14297
14298
switch (sec->dofs_type) {
14299
case DOF_SECT_URELHDR:
14300
if (dtrace_dof_relocate(dof, sec, ubase, udaddr) != 0)
14301
return (-1);
14302
break;
14303
}
14304
}
14305
14306
if ((enab = *enabp) == NULL)
14307
enab = *enabp = dtrace_enabling_create(vstate);
14308
14309
for (i = 0; i < dof->dofh_secnum; i++) {
14310
dof_sec_t *sec = (dof_sec_t *)(daddr +
14311
(uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
14312
14313
if (sec->dofs_type != DOF_SECT_ECBDESC)
14314
continue;
14315
14316
if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
14317
dtrace_enabling_destroy(enab);
14318
*enabp = NULL;
14319
return (-1);
14320
}
14321
14322
dtrace_enabling_add(enab, ep);
14323
}
14324
14325
return (0);
14326
}
14327
14328
/*
14329
* Process DOF for any options. This routine assumes that the DOF has been
14330
* at least processed by dtrace_dof_slurp().
14331
*/
14332
static int
14333
dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
14334
{
14335
int i, rval;
14336
uint32_t entsize;
14337
size_t offs;
14338
dof_optdesc_t *desc;
14339
14340
for (i = 0; i < dof->dofh_secnum; i++) {
14341
dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
14342
(uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
14343
14344
if (sec->dofs_type != DOF_SECT_OPTDESC)
14345
continue;
14346
14347
if (sec->dofs_align != sizeof (uint64_t)) {
14348
dtrace_dof_error(dof, "bad alignment in "
14349
"option description");
14350
return (EINVAL);
14351
}
14352
14353
if ((entsize = sec->dofs_entsize) == 0) {
14354
dtrace_dof_error(dof, "zeroed option entry size");
14355
return (EINVAL);
14356
}
14357
14358
if (entsize < sizeof (dof_optdesc_t)) {
14359
dtrace_dof_error(dof, "bad option entry size");
14360
return (EINVAL);
14361
}
14362
14363
for (offs = 0; offs < sec->dofs_size; offs += entsize) {
14364
desc = (dof_optdesc_t *)((uintptr_t)dof +
14365
(uintptr_t)sec->dofs_offset + offs);
14366
14367
if (desc->dofo_strtab != DOF_SECIDX_NONE) {
14368
dtrace_dof_error(dof, "non-zero option string");
14369
return (EINVAL);
14370
}
14371
14372
if (desc->dofo_value == DTRACEOPT_UNSET) {
14373
dtrace_dof_error(dof, "unset option");
14374
return (EINVAL);
14375
}
14376
14377
if ((rval = dtrace_state_option(state,
14378
desc->dofo_option, desc->dofo_value)) != 0) {
14379
dtrace_dof_error(dof, "rejected option");
14380
return (rval);
14381
}
14382
}
14383
}
14384
14385
return (0);
14386
}
14387
14388
/*
14389
* DTrace Consumer State Functions
14390
*/
14391
static int
14392
dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
14393
{
14394
size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
14395
void *base;
14396
uintptr_t limit;
14397
dtrace_dynvar_t *dvar, *next, *start;
14398
int i;
14399
14400
ASSERT(MUTEX_HELD(&dtrace_lock));
14401
ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
14402
14403
bzero(dstate, sizeof (dtrace_dstate_t));
14404
14405
if ((dstate->dtds_chunksize = chunksize) == 0)
14406
dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
14407
14408
VERIFY(dstate->dtds_chunksize < LONG_MAX);
14409
14410
if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
14411
size = min;
14412
14413
if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
14414
return (ENOMEM);
14415
14416
dstate->dtds_size = size;
14417
dstate->dtds_base = base;
14418
dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
14419
bzero(dstate->dtds_percpu,
14420
(mp_maxid + 1) * sizeof (dtrace_dstate_percpu_t));
14421
14422
hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
14423
14424
if (hashsize != 1 && (hashsize & 1))
14425
hashsize--;
14426
14427
dstate->dtds_hashsize = hashsize;
14428
dstate->dtds_hash = dstate->dtds_base;
14429
14430
/*
14431
* Set all of our hash buckets to point to the single sink, and (if
14432
* it hasn't already been set), set the sink's hash value to be the
14433
* sink sentinel value. The sink is needed for dynamic variable
14434
* lookups to know that they have iterated over an entire, valid hash
14435
* chain.
14436
*/
14437
for (i = 0; i < hashsize; i++)
14438
dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
14439
14440
if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
14441
dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
14442
14443
/*
14444
* Determine number of active CPUs. Divide free list evenly among
14445
* active CPUs.
14446
*/
14447
start = (dtrace_dynvar_t *)
14448
((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
14449
limit = (uintptr_t)base + size;
14450
14451
VERIFY((uintptr_t)start < limit);
14452
VERIFY((uintptr_t)start >= (uintptr_t)base);
14453
14454
maxper = (limit - (uintptr_t)start) / (mp_maxid + 1);
14455
maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
14456
14457
CPU_FOREACH(i) {
14458
dstate->dtds_percpu[i].dtdsc_free = dvar = start;
14459
14460
/*
14461
* If we don't even have enough chunks to make it once through
14462
* NCPUs, we're just going to allocate everything to the first
14463
* CPU. And if we're on the last CPU, we're going to allocate
14464
* whatever is left over. In either case, we set the limit to
14465
* be the limit of the dynamic variable space.
14466
*/
14467
if (maxper == 0 || i == mp_maxid) {
14468
limit = (uintptr_t)base + size;
14469
start = NULL;
14470
} else {
14471
limit = (uintptr_t)start + maxper;
14472
start = (dtrace_dynvar_t *)limit;
14473
}
14474
14475
VERIFY(limit <= (uintptr_t)base + size);
14476
14477
for (;;) {
14478
next = (dtrace_dynvar_t *)((uintptr_t)dvar +
14479
dstate->dtds_chunksize);
14480
14481
if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
14482
break;
14483
14484
VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
14485
(uintptr_t)dvar <= (uintptr_t)base + size);
14486
dvar->dtdv_next = next;
14487
dvar = next;
14488
}
14489
14490
if (maxper == 0)
14491
break;
14492
}
14493
14494
return (0);
14495
}
14496
14497
static void
14498
dtrace_dstate_fini(dtrace_dstate_t *dstate)
14499
{
14500
ASSERT(MUTEX_HELD(&cpu_lock));
14501
14502
if (dstate->dtds_base == NULL)
14503
return;
14504
14505
kmem_free(dstate->dtds_base, dstate->dtds_size);
14506
kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
14507
}
14508
14509
static void
14510
dtrace_vstate_fini(dtrace_vstate_t *vstate)
14511
{
14512
/*
14513
* Logical XOR, where are you?
14514
*/
14515
ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
14516
14517
if (vstate->dtvs_nglobals > 0) {
14518
kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
14519
sizeof (dtrace_statvar_t *));
14520
}
14521
14522
if (vstate->dtvs_ntlocals > 0) {
14523
kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
14524
sizeof (dtrace_difv_t));
14525
}
14526
14527
ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
14528
14529
if (vstate->dtvs_nlocals > 0) {
14530
kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
14531
sizeof (dtrace_statvar_t *));
14532
}
14533
}
14534
14535
#ifdef illumos
14536
static void
14537
dtrace_state_clean(dtrace_state_t *state)
14538
{
14539
if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
14540
return;
14541
14542
dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
14543
dtrace_speculation_clean(state);
14544
}
14545
14546
static void
14547
dtrace_state_deadman(dtrace_state_t *state)
14548
{
14549
hrtime_t now;
14550
14551
dtrace_sync();
14552
14553
now = dtrace_gethrtime();
14554
14555
if (state != dtrace_anon.dta_state &&
14556
now - state->dts_laststatus >= dtrace_deadman_user)
14557
return;
14558
14559
/*
14560
* We must be sure that dts_alive never appears to be less than the
14561
* value upon entry to dtrace_state_deadman(), and because we lack a
14562
* dtrace_cas64(), we cannot store to it atomically. We thus instead
14563
* store INT64_MAX to it, followed by a memory barrier, followed by
14564
* the new value. This assures that dts_alive never appears to be
14565
* less than its true value, regardless of the order in which the
14566
* stores to the underlying storage are issued.
14567
*/
14568
state->dts_alive = INT64_MAX;
14569
dtrace_membar_producer();
14570
state->dts_alive = now;
14571
}
14572
#else /* !illumos */
14573
static void
14574
dtrace_state_clean(void *arg)
14575
{
14576
dtrace_state_t *state = arg;
14577
dtrace_optval_t *opt = state->dts_options;
14578
14579
if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
14580
return;
14581
14582
dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
14583
dtrace_speculation_clean(state);
14584
14585
callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
14586
dtrace_state_clean, state);
14587
}
14588
14589
static void
14590
dtrace_state_deadman(void *arg)
14591
{
14592
dtrace_state_t *state = arg;
14593
hrtime_t now;
14594
14595
dtrace_sync();
14596
14597
dtrace_debug_output();
14598
14599
now = dtrace_gethrtime();
14600
14601
if (state != dtrace_anon.dta_state &&
14602
now - state->dts_laststatus >= dtrace_deadman_user)
14603
return;
14604
14605
/*
14606
* We must be sure that dts_alive never appears to be less than the
14607
* value upon entry to dtrace_state_deadman(), and because we lack a
14608
* dtrace_cas64(), we cannot store to it atomically. We thus instead
14609
* store INT64_MAX to it, followed by a memory barrier, followed by
14610
* the new value. This assures that dts_alive never appears to be
14611
* less than its true value, regardless of the order in which the
14612
* stores to the underlying storage are issued.
14613
*/
14614
state->dts_alive = INT64_MAX;
14615
dtrace_membar_producer();
14616
state->dts_alive = now;
14617
14618
callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
14619
dtrace_state_deadman, state);
14620
}
14621
#endif /* illumos */
14622
14623
static dtrace_state_t *
14624
#ifdef illumos
14625
dtrace_state_create(dev_t *devp, cred_t *cr)
14626
#else
14627
dtrace_state_create(struct cdev *dev, struct ucred *cred __unused)
14628
#endif
14629
{
14630
#ifdef illumos
14631
minor_t minor;
14632
major_t major;
14633
#else
14634
cred_t *cr = NULL;
14635
int m = 0;
14636
#endif
14637
char c[30];
14638
dtrace_state_t *state;
14639
dtrace_optval_t *opt;
14640
int bufsize = (mp_maxid + 1) * sizeof (dtrace_buffer_t), i;
14641
int cpu_it;
14642
14643
ASSERT(MUTEX_HELD(&dtrace_lock));
14644
ASSERT(MUTEX_HELD(&cpu_lock));
14645
14646
#ifdef illumos
14647
minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
14648
VM_BESTFIT | VM_SLEEP);
14649
14650
if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
14651
vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
14652
return (NULL);
14653
}
14654
14655
state = ddi_get_soft_state(dtrace_softstate, minor);
14656
#else
14657
if (dev != NULL) {
14658
cr = dev->si_cred;
14659
m = dev2unit(dev);
14660
}
14661
14662
/* Allocate memory for the state. */
14663
state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP);
14664
#endif
14665
14666
state->dts_epid = DTRACE_EPIDNONE + 1;
14667
14668
(void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m);
14669
#ifdef illumos
14670
state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
14671
NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
14672
14673
if (devp != NULL) {
14674
major = getemajor(*devp);
14675
} else {
14676
major = ddi_driver_major(dtrace_devi);
14677
}
14678
14679
state->dts_dev = makedevice(major, minor);
14680
14681
if (devp != NULL)
14682
*devp = state->dts_dev;
14683
#else
14684
state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx);
14685
state->dts_dev = dev;
14686
#endif
14687
14688
state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
14689
state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
14690
14691
/*
14692
* Allocate and initialise the per-process per-CPU random state.
14693
* SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is
14694
* assumed to be seeded at this point (if from Fortuna seed file).
14695
*/
14696
arc4random_buf(&state->dts_rstate[0], 2 * sizeof(uint64_t));
14697
for (cpu_it = 1; cpu_it <= mp_maxid; cpu_it++) {
14698
/*
14699
* Each CPU is assigned a 2^64 period, non-overlapping
14700
* subsequence.
14701
*/
14702
dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it - 1],
14703
state->dts_rstate[cpu_it]);
14704
}
14705
14706
#ifdef illumos
14707
state->dts_cleaner = CYCLIC_NONE;
14708
state->dts_deadman = CYCLIC_NONE;
14709
#else
14710
callout_init(&state->dts_cleaner, 1);
14711
callout_init(&state->dts_deadman, 1);
14712
#endif
14713
state->dts_vstate.dtvs_state = state;
14714
14715
for (i = 0; i < DTRACEOPT_MAX; i++)
14716
state->dts_options[i] = DTRACEOPT_UNSET;
14717
14718
/*
14719
* Set the default options.
14720
*/
14721
opt = state->dts_options;
14722
opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
14723
opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
14724
opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
14725
opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
14726
opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
14727
opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
14728
opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
14729
opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
14730
opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
14731
opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
14732
opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
14733
opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
14734
opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
14735
opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
14736
14737
state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
14738
14739
/*
14740
* Depending on the user credentials, we set flag bits which alter probe
14741
* visibility or the amount of destructiveness allowed. In the case of
14742
* actual anonymous tracing, or the possession of all privileges, all of
14743
* the normal checks are bypassed.
14744
*/
14745
if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
14746
state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
14747
state->dts_cred.dcr_action = DTRACE_CRA_ALL;
14748
} else {
14749
/*
14750
* Set up the credentials for this instantiation. We take a
14751
* hold on the credential to prevent it from disappearing on
14752
* us; this in turn prevents the zone_t referenced by this
14753
* credential from disappearing. This means that we can
14754
* examine the credential and the zone from probe context.
14755
*/
14756
crhold(cr);
14757
state->dts_cred.dcr_cred = cr;
14758
14759
/*
14760
* CRA_PROC means "we have *some* privilege for dtrace" and
14761
* unlocks the use of variables like pid, zonename, etc.
14762
*/
14763
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
14764
PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
14765
state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
14766
}
14767
14768
/*
14769
* dtrace_user allows use of syscall and profile providers.
14770
* If the user also has proc_owner and/or proc_zone, we
14771
* extend the scope to include additional visibility and
14772
* destructive power.
14773
*/
14774
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
14775
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
14776
state->dts_cred.dcr_visible |=
14777
DTRACE_CRV_ALLPROC;
14778
14779
state->dts_cred.dcr_action |=
14780
DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
14781
}
14782
14783
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
14784
state->dts_cred.dcr_visible |=
14785
DTRACE_CRV_ALLZONE;
14786
14787
state->dts_cred.dcr_action |=
14788
DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
14789
}
14790
14791
/*
14792
* If we have all privs in whatever zone this is,
14793
* we can do destructive things to processes which
14794
* have altered credentials.
14795
*/
14796
#ifdef illumos
14797
if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
14798
cr->cr_zone->zone_privset)) {
14799
state->dts_cred.dcr_action |=
14800
DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
14801
}
14802
#endif
14803
}
14804
14805
/*
14806
* Holding the dtrace_kernel privilege also implies that
14807
* the user has the dtrace_user privilege from a visibility
14808
* perspective. But without further privileges, some
14809
* destructive actions are not available.
14810
*/
14811
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
14812
/*
14813
* Make all probes in all zones visible. However,
14814
* this doesn't mean that all actions become available
14815
* to all zones.
14816
*/
14817
state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
14818
DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
14819
14820
state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
14821
DTRACE_CRA_PROC;
14822
/*
14823
* Holding proc_owner means that destructive actions
14824
* for *this* zone are allowed.
14825
*/
14826
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
14827
state->dts_cred.dcr_action |=
14828
DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
14829
14830
/*
14831
* Holding proc_zone means that destructive actions
14832
* for this user/group ID in all zones is allowed.
14833
*/
14834
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
14835
state->dts_cred.dcr_action |=
14836
DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
14837
14838
#ifdef illumos
14839
/*
14840
* If we have all privs in whatever zone this is,
14841
* we can do destructive things to processes which
14842
* have altered credentials.
14843
*/
14844
if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
14845
cr->cr_zone->zone_privset)) {
14846
state->dts_cred.dcr_action |=
14847
DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
14848
}
14849
#endif
14850
}
14851
14852
/*
14853
* Holding the dtrace_proc privilege gives control over fasttrap
14854
* and pid providers. We need to grant wider destructive
14855
* privileges in the event that the user has proc_owner and/or
14856
* proc_zone.
14857
*/
14858
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
14859
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
14860
state->dts_cred.dcr_action |=
14861
DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
14862
14863
if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
14864
state->dts_cred.dcr_action |=
14865
DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
14866
}
14867
}
14868
14869
return (state);
14870
}
14871
14872
static int
14873
dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
14874
{
14875
dtrace_optval_t *opt = state->dts_options, size;
14876
processorid_t cpu = 0;
14877
int flags = 0, rval, factor, divisor = 1;
14878
14879
ASSERT(MUTEX_HELD(&dtrace_lock));
14880
ASSERT(MUTEX_HELD(&cpu_lock));
14881
ASSERT(which < DTRACEOPT_MAX);
14882
ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
14883
(state == dtrace_anon.dta_state &&
14884
state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
14885
14886
if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
14887
return (0);
14888
14889
if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
14890
cpu = opt[DTRACEOPT_CPU];
14891
14892
if (which == DTRACEOPT_SPECSIZE)
14893
flags |= DTRACEBUF_NOSWITCH;
14894
14895
if (which == DTRACEOPT_BUFSIZE) {
14896
if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
14897
flags |= DTRACEBUF_RING;
14898
14899
if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
14900
flags |= DTRACEBUF_FILL;
14901
14902
if (state != dtrace_anon.dta_state ||
14903
state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
14904
flags |= DTRACEBUF_INACTIVE;
14905
}
14906
14907
for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
14908
/*
14909
* The size must be 8-byte aligned. If the size is not 8-byte
14910
* aligned, drop it down by the difference.
14911
*/
14912
if (size & (sizeof (uint64_t) - 1))
14913
size -= size & (sizeof (uint64_t) - 1);
14914
14915
if (size < state->dts_reserve) {
14916
/*
14917
* Buffers always must be large enough to accommodate
14918
* their prereserved space. We return E2BIG instead
14919
* of ENOMEM in this case to allow for user-level
14920
* software to differentiate the cases.
14921
*/
14922
return (E2BIG);
14923
}
14924
14925
rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
14926
14927
if (rval != ENOMEM) {
14928
opt[which] = size;
14929
return (rval);
14930
}
14931
14932
if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
14933
return (rval);
14934
14935
for (divisor = 2; divisor < factor; divisor <<= 1)
14936
continue;
14937
}
14938
14939
return (ENOMEM);
14940
}
14941
14942
static int
14943
dtrace_state_buffers(dtrace_state_t *state)
14944
{
14945
dtrace_speculation_t *spec = state->dts_speculations;
14946
int rval, i;
14947
14948
if ((rval = dtrace_state_buffer(state, state->dts_buffer,
14949
DTRACEOPT_BUFSIZE)) != 0)
14950
return (rval);
14951
14952
if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
14953
DTRACEOPT_AGGSIZE)) != 0)
14954
return (rval);
14955
14956
for (i = 0; i < state->dts_nspeculations; i++) {
14957
if ((rval = dtrace_state_buffer(state,
14958
spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
14959
return (rval);
14960
}
14961
14962
return (0);
14963
}
14964
14965
static void
14966
dtrace_state_prereserve(dtrace_state_t *state)
14967
{
14968
dtrace_ecb_t *ecb;
14969
dtrace_probe_t *probe;
14970
14971
state->dts_reserve = 0;
14972
14973
if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
14974
return;
14975
14976
/*
14977
* If our buffer policy is a "fill" buffer policy, we need to set the
14978
* prereserved space to be the space required by the END probes.
14979
*/
14980
probe = dtrace_probes[dtrace_probeid_end - 1];
14981
ASSERT(probe != NULL);
14982
14983
for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
14984
if (ecb->dte_state != state)
14985
continue;
14986
14987
state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
14988
}
14989
}
14990
14991
static int
14992
dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
14993
{
14994
dtrace_optval_t *opt = state->dts_options, sz, nspec;
14995
dtrace_speculation_t *spec;
14996
dtrace_buffer_t *buf;
14997
#ifdef illumos
14998
cyc_handler_t hdlr;
14999
cyc_time_t when;
15000
#endif
15001
int rval = 0, i, bufsize = (mp_maxid + 1) * sizeof (dtrace_buffer_t);
15002
dtrace_icookie_t cookie;
15003
15004
mutex_enter(&cpu_lock);
15005
mutex_enter(&dtrace_lock);
15006
15007
if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
15008
rval = EBUSY;
15009
goto out;
15010
}
15011
15012
/*
15013
* Before we can perform any checks, we must prime all of the
15014
* retained enablings that correspond to this state.
15015
*/
15016
dtrace_enabling_prime(state);
15017
15018
if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
15019
rval = EACCES;
15020
goto out;
15021
}
15022
15023
dtrace_state_prereserve(state);
15024
15025
/*
15026
* Now we want to do is try to allocate our speculations.
15027
* We do not automatically resize the number of speculations; if
15028
* this fails, we will fail the operation.
15029
*/
15030
nspec = opt[DTRACEOPT_NSPEC];
15031
ASSERT(nspec != DTRACEOPT_UNSET);
15032
15033
if (nspec > INT_MAX) {
15034
rval = ENOMEM;
15035
goto out;
15036
}
15037
15038
spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
15039
KM_NOSLEEP | KM_NORMALPRI);
15040
15041
if (spec == NULL) {
15042
rval = ENOMEM;
15043
goto out;
15044
}
15045
15046
state->dts_speculations = spec;
15047
state->dts_nspeculations = (int)nspec;
15048
15049
for (i = 0; i < nspec; i++) {
15050
if ((buf = kmem_zalloc(bufsize,
15051
KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
15052
rval = ENOMEM;
15053
goto err;
15054
}
15055
15056
spec[i].dtsp_buffer = buf;
15057
}
15058
15059
if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
15060
if (dtrace_anon.dta_state == NULL) {
15061
rval = ENOENT;
15062
goto out;
15063
}
15064
15065
if (state->dts_necbs != 0) {
15066
rval = EALREADY;
15067
goto out;
15068
}
15069
15070
state->dts_anon = dtrace_anon_grab();
15071
ASSERT(state->dts_anon != NULL);
15072
state = state->dts_anon;
15073
15074
/*
15075
* We want "grabanon" to be set in the grabbed state, so we'll
15076
* copy that option value from the grabbing state into the
15077
* grabbed state.
15078
*/
15079
state->dts_options[DTRACEOPT_GRABANON] =
15080
opt[DTRACEOPT_GRABANON];
15081
15082
*cpu = dtrace_anon.dta_beganon;
15083
15084
/*
15085
* If the anonymous state is active (as it almost certainly
15086
* is if the anonymous enabling ultimately matched anything),
15087
* we don't allow any further option processing -- but we
15088
* don't return failure.
15089
*/
15090
if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
15091
goto out;
15092
}
15093
15094
if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
15095
opt[DTRACEOPT_AGGSIZE] != 0) {
15096
if (state->dts_aggregations == NULL) {
15097
/*
15098
* We're not going to create an aggregation buffer
15099
* because we don't have any ECBs that contain
15100
* aggregations -- set this option to 0.
15101
*/
15102
opt[DTRACEOPT_AGGSIZE] = 0;
15103
} else {
15104
/*
15105
* If we have an aggregation buffer, we must also have
15106
* a buffer to use as scratch.
15107
*/
15108
if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
15109
opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
15110
opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
15111
}
15112
}
15113
}
15114
15115
if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
15116
opt[DTRACEOPT_SPECSIZE] != 0) {
15117
if (!state->dts_speculates) {
15118
/*
15119
* We're not going to create speculation buffers
15120
* because we don't have any ECBs that actually
15121
* speculate -- set the speculation size to 0.
15122
*/
15123
opt[DTRACEOPT_SPECSIZE] = 0;
15124
}
15125
}
15126
15127
/*
15128
* The bare minimum size for any buffer that we're actually going to
15129
* do anything to is sizeof (uint64_t).
15130
*/
15131
sz = sizeof (uint64_t);
15132
15133
if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
15134
(state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
15135
(state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
15136
/*
15137
* A buffer size has been explicitly set to 0 (or to a size
15138
* that will be adjusted to 0) and we need the space -- we
15139
* need to return failure. We return ENOSPC to differentiate
15140
* it from failing to allocate a buffer due to failure to meet
15141
* the reserve (for which we return E2BIG).
15142
*/
15143
rval = ENOSPC;
15144
goto out;
15145
}
15146
15147
if ((rval = dtrace_state_buffers(state)) != 0)
15148
goto err;
15149
15150
if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
15151
sz = dtrace_dstate_defsize;
15152
15153
do {
15154
rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
15155
15156
if (rval == 0)
15157
break;
15158
15159
if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
15160
goto err;
15161
} while (sz >>= 1);
15162
15163
opt[DTRACEOPT_DYNVARSIZE] = sz;
15164
15165
if (rval != 0)
15166
goto err;
15167
15168
if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
15169
opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
15170
15171
if (opt[DTRACEOPT_CLEANRATE] == 0)
15172
opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
15173
15174
if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
15175
opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
15176
15177
if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
15178
opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
15179
15180
state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
15181
#ifdef illumos
15182
hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
15183
hdlr.cyh_arg = state;
15184
hdlr.cyh_level = CY_LOW_LEVEL;
15185
15186
when.cyt_when = 0;
15187
when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
15188
15189
state->dts_cleaner = cyclic_add(&hdlr, &when);
15190
15191
hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
15192
hdlr.cyh_arg = state;
15193
hdlr.cyh_level = CY_LOW_LEVEL;
15194
15195
when.cyt_when = 0;
15196
when.cyt_interval = dtrace_deadman_interval;
15197
15198
state->dts_deadman = cyclic_add(&hdlr, &when);
15199
#else
15200
callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
15201
dtrace_state_clean, state);
15202
callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
15203
dtrace_state_deadman, state);
15204
#endif
15205
15206
state->dts_activity = DTRACE_ACTIVITY_WARMUP;
15207
15208
#ifdef illumos
15209
if (state->dts_getf != 0 &&
15210
!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
15211
/*
15212
* We don't have kernel privs but we have at least one call
15213
* to getf(); we need to bump our zone's count, and (if
15214
* this is the first enabling to have an unprivileged call
15215
* to getf()) we need to hook into closef().
15216
*/
15217
state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
15218
15219
if (dtrace_getf++ == 0) {
15220
ASSERT(dtrace_closef == NULL);
15221
dtrace_closef = dtrace_getf_barrier;
15222
}
15223
}
15224
#endif
15225
15226
/*
15227
* Now it's time to actually fire the BEGIN probe. We need to disable
15228
* interrupts here both to record the CPU on which we fired the BEGIN
15229
* probe (the data from this CPU will be processed first at user
15230
* level) and to manually activate the buffer for this CPU.
15231
*/
15232
cookie = dtrace_interrupt_disable();
15233
*cpu = curcpu;
15234
ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
15235
state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
15236
15237
dtrace_probe(dtrace_probeid_begin,
15238
(uint64_t)(uintptr_t)state, 0, 0, 0, 0);
15239
dtrace_interrupt_enable(cookie);
15240
/*
15241
* We may have had an exit action from a BEGIN probe; only change our
15242
* state to ACTIVE if we're still in WARMUP.
15243
*/
15244
ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
15245
state->dts_activity == DTRACE_ACTIVITY_DRAINING);
15246
15247
if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
15248
state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
15249
15250
#ifdef __FreeBSD__
15251
/*
15252
* We enable anonymous tracing before APs are started, so we must
15253
* activate buffers using the current CPU.
15254
*/
15255
if (state == dtrace_anon.dta_state) {
15256
CPU_FOREACH(i)
15257
dtrace_buffer_activate_cpu(state, i);
15258
} else
15259
dtrace_xcall(DTRACE_CPUALL,
15260
(dtrace_xcall_t)dtrace_buffer_activate, state);
15261
#else
15262
/*
15263
* Regardless of whether or not now we're in ACTIVE or DRAINING, we
15264
* want each CPU to transition its principal buffer out of the
15265
* INACTIVE state. Doing this assures that no CPU will suddenly begin
15266
* processing an ECB halfway down a probe's ECB chain; all CPUs will
15267
* atomically transition from processing none of a state's ECBs to
15268
* processing all of them.
15269
*/
15270
dtrace_xcall(DTRACE_CPUALL,
15271
(dtrace_xcall_t)dtrace_buffer_activate, state);
15272
#endif
15273
goto out;
15274
15275
err:
15276
dtrace_buffer_free(state->dts_buffer);
15277
dtrace_buffer_free(state->dts_aggbuffer);
15278
15279
if ((nspec = state->dts_nspeculations) == 0) {
15280
ASSERT(state->dts_speculations == NULL);
15281
goto out;
15282
}
15283
15284
spec = state->dts_speculations;
15285
ASSERT(spec != NULL);
15286
15287
for (i = 0; i < state->dts_nspeculations; i++) {
15288
if ((buf = spec[i].dtsp_buffer) == NULL)
15289
break;
15290
15291
dtrace_buffer_free(buf);
15292
kmem_free(buf, bufsize);
15293
}
15294
15295
kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
15296
state->dts_nspeculations = 0;
15297
state->dts_speculations = NULL;
15298
15299
out:
15300
mutex_exit(&dtrace_lock);
15301
mutex_exit(&cpu_lock);
15302
15303
return (rval);
15304
}
15305
15306
static int
15307
dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
15308
{
15309
dtrace_icookie_t cookie;
15310
15311
ASSERT(MUTEX_HELD(&dtrace_lock));
15312
15313
if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
15314
state->dts_activity != DTRACE_ACTIVITY_DRAINING)
15315
return (EINVAL);
15316
15317
/*
15318
* We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
15319
* to be sure that every CPU has seen it. See below for the details
15320
* on why this is done.
15321
*/
15322
state->dts_activity = DTRACE_ACTIVITY_DRAINING;
15323
dtrace_sync();
15324
15325
/*
15326
* By this point, it is impossible for any CPU to be still processing
15327
* with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
15328
* DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
15329
* other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
15330
* and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
15331
* iff we're in the END probe.
15332
*/
15333
state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
15334
dtrace_sync();
15335
ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
15336
15337
/*
15338
* Finally, we can release the reserve and call the END probe. We
15339
* disable interrupts across calling the END probe to allow us to
15340
* return the CPU on which we actually called the END probe. This
15341
* allows user-land to be sure that this CPU's principal buffer is
15342
* processed last.
15343
*/
15344
state->dts_reserve = 0;
15345
15346
cookie = dtrace_interrupt_disable();
15347
*cpu = curcpu;
15348
dtrace_probe(dtrace_probeid_end,
15349
(uint64_t)(uintptr_t)state, 0, 0, 0, 0);
15350
dtrace_interrupt_enable(cookie);
15351
15352
state->dts_activity = DTRACE_ACTIVITY_STOPPED;
15353
dtrace_sync();
15354
15355
#ifdef illumos
15356
if (state->dts_getf != 0 &&
15357
!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
15358
/*
15359
* We don't have kernel privs but we have at least one call
15360
* to getf(); we need to lower our zone's count, and (if
15361
* this is the last enabling to have an unprivileged call
15362
* to getf()) we need to clear the closef() hook.
15363
*/
15364
ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
15365
ASSERT(dtrace_closef == dtrace_getf_barrier);
15366
ASSERT(dtrace_getf > 0);
15367
15368
state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
15369
15370
if (--dtrace_getf == 0)
15371
dtrace_closef = NULL;
15372
}
15373
#endif
15374
15375
return (0);
15376
}
15377
15378
static int
15379
dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
15380
dtrace_optval_t val)
15381
{
15382
ASSERT(MUTEX_HELD(&dtrace_lock));
15383
15384
if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
15385
return (EBUSY);
15386
15387
if (option >= DTRACEOPT_MAX)
15388
return (EINVAL);
15389
15390
if (option != DTRACEOPT_CPU && val < 0)
15391
return (EINVAL);
15392
15393
switch (option) {
15394
case DTRACEOPT_DESTRUCTIVE:
15395
if (dtrace_destructive_disallow)
15396
return (EACCES);
15397
15398
state->dts_cred.dcr_destructive = 1;
15399
break;
15400
15401
case DTRACEOPT_BUFSIZE:
15402
case DTRACEOPT_DYNVARSIZE:
15403
case DTRACEOPT_AGGSIZE:
15404
case DTRACEOPT_SPECSIZE:
15405
case DTRACEOPT_STRSIZE:
15406
if (val < 0)
15407
return (EINVAL);
15408
15409
if (val >= LONG_MAX) {
15410
/*
15411
* If this is an otherwise negative value, set it to
15412
* the highest multiple of 128m less than LONG_MAX.
15413
* Technically, we're adjusting the size without
15414
* regard to the buffer resizing policy, but in fact,
15415
* this has no effect -- if we set the buffer size to
15416
* ~LONG_MAX and the buffer policy is ultimately set to
15417
* be "manual", the buffer allocation is guaranteed to
15418
* fail, if only because the allocation requires two
15419
* buffers. (We set the the size to the highest
15420
* multiple of 128m because it ensures that the size
15421
* will remain a multiple of a megabyte when
15422
* repeatedly halved -- all the way down to 15m.)
15423
*/
15424
val = LONG_MAX - (1 << 27) + 1;
15425
}
15426
}
15427
15428
state->dts_options[option] = val;
15429
15430
return (0);
15431
}
15432
15433
static void
15434
dtrace_state_destroy(dtrace_state_t *state)
15435
{
15436
dtrace_ecb_t *ecb;
15437
dtrace_vstate_t *vstate = &state->dts_vstate;
15438
#ifdef illumos
15439
minor_t minor = getminor(state->dts_dev);
15440
#endif
15441
int i, bufsize = (mp_maxid + 1) * sizeof (dtrace_buffer_t);
15442
dtrace_speculation_t *spec = state->dts_speculations;
15443
int nspec = state->dts_nspeculations;
15444
uint32_t match;
15445
15446
ASSERT(MUTEX_HELD(&dtrace_lock));
15447
ASSERT(MUTEX_HELD(&cpu_lock));
15448
15449
/*
15450
* First, retract any retained enablings for this state.
15451
*/
15452
dtrace_enabling_retract(state);
15453
ASSERT(state->dts_nretained == 0);
15454
15455
if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
15456
state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
15457
/*
15458
* We have managed to come into dtrace_state_destroy() on a
15459
* hot enabling -- almost certainly because of a disorderly
15460
* shutdown of a consumer. (That is, a consumer that is
15461
* exiting without having called dtrace_stop().) In this case,
15462
* we're going to set our activity to be KILLED, and then
15463
* issue a sync to be sure that everyone is out of probe
15464
* context before we start blowing away ECBs.
15465
*/
15466
state->dts_activity = DTRACE_ACTIVITY_KILLED;
15467
dtrace_sync();
15468
}
15469
15470
/*
15471
* Release the credential hold we took in dtrace_state_create().
15472
*/
15473
if (state->dts_cred.dcr_cred != NULL)
15474
crfree(state->dts_cred.dcr_cred);
15475
15476
/*
15477
* Now we can safely disable and destroy any enabled probes. Because
15478
* any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
15479
* (especially if they're all enabled), we take two passes through the
15480
* ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
15481
* in the second we disable whatever is left over.
15482
*/
15483
for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
15484
for (i = 0; i < state->dts_necbs; i++) {
15485
if ((ecb = state->dts_ecbs[i]) == NULL)
15486
continue;
15487
15488
if (match && ecb->dte_probe != NULL) {
15489
dtrace_probe_t *probe = ecb->dte_probe;
15490
dtrace_provider_t *prov = probe->dtpr_provider;
15491
15492
if (!(prov->dtpv_priv.dtpp_flags & match))
15493
continue;
15494
}
15495
15496
dtrace_ecb_disable(ecb);
15497
dtrace_ecb_destroy(ecb);
15498
}
15499
15500
if (!match)
15501
break;
15502
}
15503
15504
/*
15505
* Before we free the buffers, perform one more sync to assure that
15506
* every CPU is out of probe context.
15507
*/
15508
dtrace_sync();
15509
15510
dtrace_buffer_free(state->dts_buffer);
15511
dtrace_buffer_free(state->dts_aggbuffer);
15512
15513
for (i = 0; i < nspec; i++)
15514
dtrace_buffer_free(spec[i].dtsp_buffer);
15515
15516
#ifdef illumos
15517
if (state->dts_cleaner != CYCLIC_NONE)
15518
cyclic_remove(state->dts_cleaner);
15519
15520
if (state->dts_deadman != CYCLIC_NONE)
15521
cyclic_remove(state->dts_deadman);
15522
#else
15523
callout_stop(&state->dts_cleaner);
15524
callout_drain(&state->dts_cleaner);
15525
callout_stop(&state->dts_deadman);
15526
callout_drain(&state->dts_deadman);
15527
#endif
15528
15529
dtrace_dstate_fini(&vstate->dtvs_dynvars);
15530
dtrace_vstate_fini(vstate);
15531
if (state->dts_ecbs != NULL)
15532
kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
15533
15534
if (state->dts_aggregations != NULL) {
15535
#ifdef DEBUG
15536
for (i = 0; i < state->dts_naggregations; i++)
15537
ASSERT(state->dts_aggregations[i] == NULL);
15538
#endif
15539
ASSERT(state->dts_naggregations > 0);
15540
kmem_free(state->dts_aggregations,
15541
state->dts_naggregations * sizeof (dtrace_aggregation_t *));
15542
}
15543
15544
kmem_free(state->dts_buffer, bufsize);
15545
kmem_free(state->dts_aggbuffer, bufsize);
15546
15547
for (i = 0; i < nspec; i++)
15548
kmem_free(spec[i].dtsp_buffer, bufsize);
15549
15550
if (spec != NULL)
15551
kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
15552
15553
dtrace_format_destroy(state);
15554
15555
if (state->dts_aggid_arena != NULL) {
15556
#ifdef illumos
15557
vmem_destroy(state->dts_aggid_arena);
15558
#else
15559
delete_unrhdr(state->dts_aggid_arena);
15560
#endif
15561
state->dts_aggid_arena = NULL;
15562
}
15563
#ifdef illumos
15564
ddi_soft_state_free(dtrace_softstate, minor);
15565
vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
15566
#endif
15567
}
15568
15569
/*
15570
* DTrace Anonymous Enabling Functions
15571
*/
15572
static dtrace_state_t *
15573
dtrace_anon_grab(void)
15574
{
15575
dtrace_state_t *state;
15576
15577
ASSERT(MUTEX_HELD(&dtrace_lock));
15578
15579
if ((state = dtrace_anon.dta_state) == NULL) {
15580
ASSERT(dtrace_anon.dta_enabling == NULL);
15581
return (NULL);
15582
}
15583
15584
ASSERT(dtrace_anon.dta_enabling != NULL);
15585
ASSERT(dtrace_retained != NULL);
15586
15587
dtrace_enabling_destroy(dtrace_anon.dta_enabling);
15588
dtrace_anon.dta_enabling = NULL;
15589
dtrace_anon.dta_state = NULL;
15590
15591
return (state);
15592
}
15593
15594
static void
15595
dtrace_anon_property(void)
15596
{
15597
int i, rv;
15598
dtrace_state_t *state;
15599
dof_hdr_t *dof;
15600
char c[32]; /* enough for "dof-data-" + digits */
15601
15602
ASSERT(MUTEX_HELD(&dtrace_lock));
15603
ASSERT(MUTEX_HELD(&cpu_lock));
15604
15605
for (i = 0; ; i++) {
15606
(void) snprintf(c, sizeof (c), "dof-data-%d", i);
15607
15608
dtrace_err_verbose = 1;
15609
15610
if ((dof = dtrace_dof_property(c)) == NULL) {
15611
dtrace_err_verbose = 0;
15612
break;
15613
}
15614
15615
#ifdef illumos
15616
/*
15617
* We want to create anonymous state, so we need to transition
15618
* the kernel debugger to indicate that DTrace is active. If
15619
* this fails (e.g. because the debugger has modified text in
15620
* some way), we won't continue with the processing.
15621
*/
15622
if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
15623
cmn_err(CE_NOTE, "kernel debugger active; anonymous "
15624
"enabling ignored.");
15625
dtrace_dof_destroy(dof);
15626
break;
15627
}
15628
#endif
15629
15630
/*
15631
* If we haven't allocated an anonymous state, we'll do so now.
15632
*/
15633
if ((state = dtrace_anon.dta_state) == NULL) {
15634
state = dtrace_state_create(NULL, NULL);
15635
dtrace_anon.dta_state = state;
15636
15637
if (state == NULL) {
15638
/*
15639
* This basically shouldn't happen: the only
15640
* failure mode from dtrace_state_create() is a
15641
* failure of ddi_soft_state_zalloc() that
15642
* itself should never happen. Still, the
15643
* interface allows for a failure mode, and
15644
* we want to fail as gracefully as possible:
15645
* we'll emit an error message and cease
15646
* processing anonymous state in this case.
15647
*/
15648
cmn_err(CE_WARN, "failed to create "
15649
"anonymous state");
15650
dtrace_dof_destroy(dof);
15651
break;
15652
}
15653
}
15654
15655
rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
15656
&dtrace_anon.dta_enabling, 0, 0, B_TRUE);
15657
15658
if (rv == 0)
15659
rv = dtrace_dof_options(dof, state);
15660
15661
dtrace_err_verbose = 0;
15662
dtrace_dof_destroy(dof);
15663
15664
if (rv != 0) {
15665
/*
15666
* This is malformed DOF; chuck any anonymous state
15667
* that we created.
15668
*/
15669
ASSERT(dtrace_anon.dta_enabling == NULL);
15670
dtrace_state_destroy(state);
15671
dtrace_anon.dta_state = NULL;
15672
break;
15673
}
15674
15675
ASSERT(dtrace_anon.dta_enabling != NULL);
15676
}
15677
15678
if (dtrace_anon.dta_enabling != NULL) {
15679
int rval;
15680
15681
/*
15682
* dtrace_enabling_retain() can only fail because we are
15683
* trying to retain more enablings than are allowed -- but
15684
* we only have one anonymous enabling, and we are guaranteed
15685
* to be allowed at least one retained enabling; we assert
15686
* that dtrace_enabling_retain() returns success.
15687
*/
15688
rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
15689
ASSERT(rval == 0);
15690
15691
dtrace_enabling_dump(dtrace_anon.dta_enabling);
15692
}
15693
}
15694
15695
/*
15696
* DTrace Helper Functions
15697
*/
15698
static void
15699
dtrace_helper_trace(dtrace_helper_action_t *helper,
15700
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
15701
{
15702
uint32_t size, next, nnext, i;
15703
dtrace_helptrace_t *ent, *buffer;
15704
uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags;
15705
15706
if ((buffer = dtrace_helptrace_buffer) == NULL)
15707
return;
15708
15709
ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
15710
15711
/*
15712
* What would a tracing framework be without its own tracing
15713
* framework? (Well, a hell of a lot simpler, for starters...)
15714
*/
15715
size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
15716
sizeof (uint64_t) - sizeof (uint64_t);
15717
15718
/*
15719
* Iterate until we can allocate a slot in the trace buffer.
15720
*/
15721
do {
15722
next = dtrace_helptrace_next;
15723
15724
if (next + size < dtrace_helptrace_bufsize) {
15725
nnext = next + size;
15726
} else {
15727
nnext = size;
15728
}
15729
} while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
15730
15731
/*
15732
* We have our slot; fill it in.
15733
*/
15734
if (nnext == size) {
15735
dtrace_helptrace_wrapped++;
15736
next = 0;
15737
}
15738
15739
ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next);
15740
ent->dtht_helper = helper;
15741
ent->dtht_where = where;
15742
ent->dtht_nlocals = vstate->dtvs_nlocals;
15743
15744
ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
15745
mstate->dtms_fltoffs : -1;
15746
ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
15747
ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval;
15748
15749
for (i = 0; i < vstate->dtvs_nlocals; i++) {
15750
dtrace_statvar_t *svar;
15751
15752
if ((svar = vstate->dtvs_locals[i]) == NULL)
15753
continue;
15754
15755
ASSERT(svar->dtsv_size >= (mp_maxid + 1) * sizeof (uint64_t));
15756
ent->dtht_locals[i] =
15757
((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu];
15758
}
15759
}
15760
15761
static uint64_t
15762
dtrace_helper(int which, dtrace_mstate_t *mstate,
15763
dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
15764
{
15765
uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
15766
uint64_t sarg0 = mstate->dtms_arg[0];
15767
uint64_t sarg1 = mstate->dtms_arg[1];
15768
uint64_t rval = 0;
15769
dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
15770
dtrace_helper_action_t *helper;
15771
dtrace_vstate_t *vstate;
15772
dtrace_difo_t *pred;
15773
int i, trace = dtrace_helptrace_buffer != NULL;
15774
15775
ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
15776
15777
if (helpers == NULL)
15778
return (0);
15779
15780
if ((helper = helpers->dthps_actions[which]) == NULL)
15781
return (0);
15782
15783
vstate = &helpers->dthps_vstate;
15784
mstate->dtms_arg[0] = arg0;
15785
mstate->dtms_arg[1] = arg1;
15786
15787
/*
15788
* Now iterate over each helper. If its predicate evaluates to 'true',
15789
* we'll call the corresponding actions. Note that the below calls
15790
* to dtrace_dif_emulate() may set faults in machine state. This is
15791
* okay: our caller (the outer dtrace_dif_emulate()) will simply plow
15792
* the stored DIF offset with its own (which is the desired behavior).
15793
* Also, note the calls to dtrace_dif_emulate() may allocate scratch
15794
* from machine state; this is okay, too.
15795
*/
15796
for (; helper != NULL; helper = helper->dtha_next) {
15797
if ((pred = helper->dtha_predicate) != NULL) {
15798
if (trace)
15799
dtrace_helper_trace(helper, mstate, vstate, 0);
15800
15801
if (!dtrace_dif_emulate(pred, mstate, vstate, state))
15802
goto next;
15803
15804
if (*flags & CPU_DTRACE_FAULT)
15805
goto err;
15806
}
15807
15808
for (i = 0; i < helper->dtha_nactions; i++) {
15809
if (trace)
15810
dtrace_helper_trace(helper,
15811
mstate, vstate, i + 1);
15812
15813
rval = dtrace_dif_emulate(helper->dtha_actions[i],
15814
mstate, vstate, state);
15815
15816
if (*flags & CPU_DTRACE_FAULT)
15817
goto err;
15818
}
15819
15820
next:
15821
if (trace)
15822
dtrace_helper_trace(helper, mstate, vstate,
15823
DTRACE_HELPTRACE_NEXT);
15824
}
15825
15826
if (trace)
15827
dtrace_helper_trace(helper, mstate, vstate,
15828
DTRACE_HELPTRACE_DONE);
15829
15830
/*
15831
* Restore the arg0 that we saved upon entry.
15832
*/
15833
mstate->dtms_arg[0] = sarg0;
15834
mstate->dtms_arg[1] = sarg1;
15835
15836
return (rval);
15837
15838
err:
15839
if (trace)
15840
dtrace_helper_trace(helper, mstate, vstate,
15841
DTRACE_HELPTRACE_ERR);
15842
15843
/*
15844
* Restore the arg0 that we saved upon entry.
15845
*/
15846
mstate->dtms_arg[0] = sarg0;
15847
mstate->dtms_arg[1] = sarg1;
15848
15849
return (0);
15850
}
15851
15852
static void
15853
dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
15854
dtrace_vstate_t *vstate)
15855
{
15856
int i;
15857
15858
if (helper->dtha_predicate != NULL)
15859
dtrace_difo_release(helper->dtha_predicate, vstate);
15860
15861
for (i = 0; i < helper->dtha_nactions; i++) {
15862
ASSERT(helper->dtha_actions[i] != NULL);
15863
dtrace_difo_release(helper->dtha_actions[i], vstate);
15864
}
15865
15866
kmem_free(helper->dtha_actions,
15867
helper->dtha_nactions * sizeof (dtrace_difo_t *));
15868
kmem_free(helper, sizeof (dtrace_helper_action_t));
15869
}
15870
15871
static int
15872
dtrace_helper_destroygen(dtrace_helpers_t *help, int gen)
15873
{
15874
proc_t *p = curproc;
15875
dtrace_vstate_t *vstate;
15876
int i;
15877
15878
if (help == NULL)
15879
help = p->p_dtrace_helpers;
15880
15881
ASSERT(MUTEX_HELD(&dtrace_lock));
15882
15883
if (help == NULL || gen > help->dthps_generation)
15884
return (EINVAL);
15885
15886
vstate = &help->dthps_vstate;
15887
15888
for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15889
dtrace_helper_action_t *last = NULL, *h, *next;
15890
15891
for (h = help->dthps_actions[i]; h != NULL; h = next) {
15892
next = h->dtha_next;
15893
15894
if (h->dtha_generation == gen) {
15895
if (last != NULL) {
15896
last->dtha_next = next;
15897
} else {
15898
help->dthps_actions[i] = next;
15899
}
15900
15901
dtrace_helper_action_destroy(h, vstate);
15902
} else {
15903
last = h;
15904
}
15905
}
15906
}
15907
15908
/*
15909
* Interate until we've cleared out all helper providers with the
15910
* given generation number.
15911
*/
15912
for (;;) {
15913
dtrace_helper_provider_t *prov;
15914
15915
/*
15916
* Look for a helper provider with the right generation. We
15917
* have to start back at the beginning of the list each time
15918
* because we drop dtrace_lock. It's unlikely that we'll make
15919
* more than two passes.
15920
*/
15921
for (i = 0; i < help->dthps_nprovs; i++) {
15922
prov = help->dthps_provs[i];
15923
15924
if (prov->dthp_generation == gen)
15925
break;
15926
}
15927
15928
/*
15929
* If there were no matches, we're done.
15930
*/
15931
if (i == help->dthps_nprovs)
15932
break;
15933
15934
/*
15935
* Move the last helper provider into this slot.
15936
*/
15937
help->dthps_nprovs--;
15938
help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
15939
help->dthps_provs[help->dthps_nprovs] = NULL;
15940
15941
mutex_exit(&dtrace_lock);
15942
15943
/*
15944
* If we have a meta provider, remove this helper provider.
15945
*/
15946
mutex_enter(&dtrace_meta_lock);
15947
if (dtrace_meta_pid != NULL) {
15948
ASSERT(dtrace_deferred_pid == NULL);
15949
dtrace_helper_provider_remove(&prov->dthp_prov,
15950
p->p_pid);
15951
}
15952
mutex_exit(&dtrace_meta_lock);
15953
15954
dtrace_helper_provider_destroy(prov);
15955
15956
mutex_enter(&dtrace_lock);
15957
}
15958
15959
return (0);
15960
}
15961
15962
static int
15963
dtrace_helper_validate(dtrace_helper_action_t *helper)
15964
{
15965
int err = 0, i;
15966
dtrace_difo_t *dp;
15967
15968
if ((dp = helper->dtha_predicate) != NULL)
15969
err += dtrace_difo_validate_helper(dp);
15970
15971
for (i = 0; i < helper->dtha_nactions; i++)
15972
err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
15973
15974
return (err == 0);
15975
}
15976
15977
static int
15978
dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep,
15979
dtrace_helpers_t *help)
15980
{
15981
dtrace_helper_action_t *helper, *last;
15982
dtrace_actdesc_t *act;
15983
dtrace_vstate_t *vstate;
15984
dtrace_predicate_t *pred;
15985
int count = 0, nactions = 0, i;
15986
15987
if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
15988
return (EINVAL);
15989
15990
last = help->dthps_actions[which];
15991
vstate = &help->dthps_vstate;
15992
15993
for (count = 0; last != NULL; last = last->dtha_next) {
15994
count++;
15995
if (last->dtha_next == NULL)
15996
break;
15997
}
15998
15999
/*
16000
* If we already have dtrace_helper_actions_max helper actions for this
16001
* helper action type, we'll refuse to add a new one.
16002
*/
16003
if (count >= dtrace_helper_actions_max)
16004
return (ENOSPC);
16005
16006
helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
16007
helper->dtha_generation = help->dthps_generation;
16008
16009
if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
16010
ASSERT(pred->dtp_difo != NULL);
16011
dtrace_difo_hold(pred->dtp_difo);
16012
helper->dtha_predicate = pred->dtp_difo;
16013
}
16014
16015
for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
16016
if (act->dtad_kind != DTRACEACT_DIFEXPR)
16017
goto err;
16018
16019
if (act->dtad_difo == NULL)
16020
goto err;
16021
16022
nactions++;
16023
}
16024
16025
helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
16026
(helper->dtha_nactions = nactions), KM_SLEEP);
16027
16028
for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
16029
dtrace_difo_hold(act->dtad_difo);
16030
helper->dtha_actions[i++] = act->dtad_difo;
16031
}
16032
16033
if (!dtrace_helper_validate(helper))
16034
goto err;
16035
16036
if (last == NULL) {
16037
help->dthps_actions[which] = helper;
16038
} else {
16039
last->dtha_next = helper;
16040
}
16041
16042
if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
16043
dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
16044
dtrace_helptrace_next = 0;
16045
}
16046
16047
return (0);
16048
err:
16049
dtrace_helper_action_destroy(helper, vstate);
16050
return (EINVAL);
16051
}
16052
16053
static void
16054
dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
16055
dof_helper_t *dofhp)
16056
{
16057
ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
16058
16059
mutex_enter(&dtrace_meta_lock);
16060
mutex_enter(&dtrace_lock);
16061
16062
if (!dtrace_attached() || dtrace_meta_pid == NULL) {
16063
/*
16064
* If the dtrace module is loaded but not attached, or if
16065
* there aren't isn't a meta provider registered to deal with
16066
* these provider descriptions, we need to postpone creating
16067
* the actual providers until later.
16068
*/
16069
16070
if (help->dthps_next == NULL && help->dthps_prev == NULL &&
16071
dtrace_deferred_pid != help) {
16072
help->dthps_deferred = 1;
16073
help->dthps_pid = p->p_pid;
16074
help->dthps_next = dtrace_deferred_pid;
16075
help->dthps_prev = NULL;
16076
if (dtrace_deferred_pid != NULL)
16077
dtrace_deferred_pid->dthps_prev = help;
16078
dtrace_deferred_pid = help;
16079
}
16080
16081
mutex_exit(&dtrace_lock);
16082
16083
} else if (dofhp != NULL) {
16084
/*
16085
* If the dtrace module is loaded and we have a particular
16086
* helper provider description, pass that off to the
16087
* meta provider.
16088
*/
16089
16090
mutex_exit(&dtrace_lock);
16091
16092
dtrace_helper_provide(dofhp, p->p_pid);
16093
16094
} else {
16095
/*
16096
* Otherwise, just pass all the helper provider descriptions
16097
* off to the meta provider.
16098
*/
16099
16100
int i;
16101
mutex_exit(&dtrace_lock);
16102
16103
for (i = 0; i < help->dthps_nprovs; i++) {
16104
dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
16105
p->p_pid);
16106
}
16107
}
16108
16109
mutex_exit(&dtrace_meta_lock);
16110
}
16111
16112
static int
16113
dtrace_helper_provider_add(dof_helper_t *dofhp, dtrace_helpers_t *help, int gen)
16114
{
16115
dtrace_helper_provider_t *hprov, **tmp_provs;
16116
uint_t tmp_maxprovs, i;
16117
16118
ASSERT(MUTEX_HELD(&dtrace_lock));
16119
ASSERT(help != NULL);
16120
16121
/*
16122
* If we already have dtrace_helper_providers_max helper providers,
16123
* we're refuse to add a new one.
16124
*/
16125
if (help->dthps_nprovs >= dtrace_helper_providers_max)
16126
return (ENOSPC);
16127
16128
/*
16129
* Check to make sure this isn't a duplicate.
16130
*/
16131
for (i = 0; i < help->dthps_nprovs; i++) {
16132
if (dofhp->dofhp_addr ==
16133
help->dthps_provs[i]->dthp_prov.dofhp_addr)
16134
return (EALREADY);
16135
}
16136
16137
hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
16138
hprov->dthp_prov = *dofhp;
16139
hprov->dthp_ref = 1;
16140
hprov->dthp_generation = gen;
16141
16142
/*
16143
* Allocate a bigger table for helper providers if it's already full.
16144
*/
16145
if (help->dthps_maxprovs == help->dthps_nprovs) {
16146
tmp_maxprovs = help->dthps_maxprovs;
16147
tmp_provs = help->dthps_provs;
16148
16149
if (help->dthps_maxprovs == 0)
16150
help->dthps_maxprovs = 2;
16151
else
16152
help->dthps_maxprovs *= 2;
16153
if (help->dthps_maxprovs > dtrace_helper_providers_max)
16154
help->dthps_maxprovs = dtrace_helper_providers_max;
16155
16156
ASSERT(tmp_maxprovs < help->dthps_maxprovs);
16157
16158
help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
16159
sizeof (dtrace_helper_provider_t *), KM_SLEEP);
16160
16161
if (tmp_provs != NULL) {
16162
bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
16163
sizeof (dtrace_helper_provider_t *));
16164
kmem_free(tmp_provs, tmp_maxprovs *
16165
sizeof (dtrace_helper_provider_t *));
16166
}
16167
}
16168
16169
help->dthps_provs[help->dthps_nprovs] = hprov;
16170
help->dthps_nprovs++;
16171
16172
return (0);
16173
}
16174
16175
static void
16176
dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
16177
{
16178
mutex_enter(&dtrace_lock);
16179
16180
if (--hprov->dthp_ref == 0) {
16181
dof_hdr_t *dof;
16182
mutex_exit(&dtrace_lock);
16183
dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
16184
dtrace_dof_destroy(dof);
16185
kmem_free(hprov, sizeof (dtrace_helper_provider_t));
16186
} else {
16187
mutex_exit(&dtrace_lock);
16188
}
16189
}
16190
16191
static int
16192
dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
16193
{
16194
uintptr_t daddr = (uintptr_t)dof;
16195
dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
16196
dof_provider_t *provider;
16197
dof_probe_t *probe;
16198
uint8_t *arg;
16199
char *strtab, *typestr;
16200
dof_stridx_t typeidx;
16201
size_t typesz;
16202
uint_t nprobes, j, k;
16203
16204
ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
16205
16206
if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
16207
dtrace_dof_error(dof, "misaligned section offset");
16208
return (-1);
16209
}
16210
16211
/*
16212
* The section needs to be large enough to contain the DOF provider
16213
* structure appropriate for the given version.
16214
*/
16215
if (sec->dofs_size <
16216
((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
16217
offsetof(dof_provider_t, dofpv_prenoffs) :
16218
sizeof (dof_provider_t))) {
16219
dtrace_dof_error(dof, "provider section too small");
16220
return (-1);
16221
}
16222
16223
provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
16224
str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
16225
prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
16226
arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
16227
off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
16228
16229
if (str_sec == NULL || prb_sec == NULL ||
16230
arg_sec == NULL || off_sec == NULL)
16231
return (-1);
16232
16233
enoff_sec = NULL;
16234
16235
if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
16236
provider->dofpv_prenoffs != DOF_SECT_NONE &&
16237
(enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
16238
provider->dofpv_prenoffs)) == NULL)
16239
return (-1);
16240
16241
strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
16242
16243
if (provider->dofpv_name >= str_sec->dofs_size ||
16244
strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
16245
dtrace_dof_error(dof, "invalid provider name");
16246
return (-1);
16247
}
16248
16249
if (prb_sec->dofs_entsize == 0 ||
16250
prb_sec->dofs_entsize > prb_sec->dofs_size) {
16251
dtrace_dof_error(dof, "invalid entry size");
16252
return (-1);
16253
}
16254
16255
if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
16256
dtrace_dof_error(dof, "misaligned entry size");
16257
return (-1);
16258
}
16259
16260
if (off_sec->dofs_entsize != sizeof (uint32_t)) {
16261
dtrace_dof_error(dof, "invalid entry size");
16262
return (-1);
16263
}
16264
16265
if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
16266
dtrace_dof_error(dof, "misaligned section offset");
16267
return (-1);
16268
}
16269
16270
if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
16271
dtrace_dof_error(dof, "invalid entry size");
16272
return (-1);
16273
}
16274
16275
arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
16276
16277
nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
16278
16279
/*
16280
* Take a pass through the probes to check for errors.
16281
*/
16282
for (j = 0; j < nprobes; j++) {
16283
probe = (dof_probe_t *)(uintptr_t)(daddr +
16284
prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
16285
16286
if (probe->dofpr_func >= str_sec->dofs_size) {
16287
dtrace_dof_error(dof, "invalid function name");
16288
return (-1);
16289
}
16290
16291
if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
16292
dtrace_dof_error(dof, "function name too long");
16293
/*
16294
* Keep going if the function name is too long.
16295
* Unlike provider and probe names, we cannot reasonably
16296
* impose restrictions on function names, since they're
16297
* a property of the code being instrumented. We will
16298
* skip this probe in dtrace_helper_provide_one().
16299
*/
16300
}
16301
16302
if (probe->dofpr_name >= str_sec->dofs_size ||
16303
strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
16304
dtrace_dof_error(dof, "invalid probe name");
16305
return (-1);
16306
}
16307
16308
/*
16309
* The offset count must not wrap the index, and the offsets
16310
* must also not overflow the section's data.
16311
*/
16312
if (probe->dofpr_offidx + probe->dofpr_noffs <
16313
probe->dofpr_offidx ||
16314
(probe->dofpr_offidx + probe->dofpr_noffs) *
16315
off_sec->dofs_entsize > off_sec->dofs_size) {
16316
dtrace_dof_error(dof, "invalid probe offset");
16317
return (-1);
16318
}
16319
16320
if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
16321
/*
16322
* If there's no is-enabled offset section, make sure
16323
* there aren't any is-enabled offsets. Otherwise
16324
* perform the same checks as for probe offsets
16325
* (immediately above).
16326
*/
16327
if (enoff_sec == NULL) {
16328
if (probe->dofpr_enoffidx != 0 ||
16329
probe->dofpr_nenoffs != 0) {
16330
dtrace_dof_error(dof, "is-enabled "
16331
"offsets with null section");
16332
return (-1);
16333
}
16334
} else if (probe->dofpr_enoffidx +
16335
probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
16336
(probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
16337
enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
16338
dtrace_dof_error(dof, "invalid is-enabled "
16339
"offset");
16340
return (-1);
16341
}
16342
16343
if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
16344
dtrace_dof_error(dof, "zero probe and "
16345
"is-enabled offsets");
16346
return (-1);
16347
}
16348
} else if (probe->dofpr_noffs == 0) {
16349
dtrace_dof_error(dof, "zero probe offsets");
16350
return (-1);
16351
}
16352
16353
if (probe->dofpr_argidx + probe->dofpr_xargc <
16354
probe->dofpr_argidx ||
16355
(probe->dofpr_argidx + probe->dofpr_xargc) *
16356
arg_sec->dofs_entsize > arg_sec->dofs_size) {
16357
dtrace_dof_error(dof, "invalid args");
16358
return (-1);
16359
}
16360
16361
typeidx = probe->dofpr_nargv;
16362
typestr = strtab + probe->dofpr_nargv;
16363
for (k = 0; k < probe->dofpr_nargc; k++) {
16364
if (typeidx >= str_sec->dofs_size) {
16365
dtrace_dof_error(dof, "bad "
16366
"native argument type");
16367
return (-1);
16368
}
16369
16370
typesz = strlen(typestr) + 1;
16371
if (typesz > DTRACE_ARGTYPELEN) {
16372
dtrace_dof_error(dof, "native "
16373
"argument type too long");
16374
return (-1);
16375
}
16376
typeidx += typesz;
16377
typestr += typesz;
16378
}
16379
16380
typeidx = probe->dofpr_xargv;
16381
typestr = strtab + probe->dofpr_xargv;
16382
for (k = 0; k < probe->dofpr_xargc; k++) {
16383
if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
16384
dtrace_dof_error(dof, "bad "
16385
"native argument index");
16386
return (-1);
16387
}
16388
16389
if (typeidx >= str_sec->dofs_size) {
16390
dtrace_dof_error(dof, "bad "
16391
"translated argument type");
16392
return (-1);
16393
}
16394
16395
typesz = strlen(typestr) + 1;
16396
if (typesz > DTRACE_ARGTYPELEN) {
16397
dtrace_dof_error(dof, "translated argument "
16398
"type too long");
16399
return (-1);
16400
}
16401
16402
typeidx += typesz;
16403
typestr += typesz;
16404
}
16405
}
16406
16407
return (0);
16408
}
16409
16410
static int
16411
dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp, struct proc *p)
16412
{
16413
dtrace_helpers_t *help;
16414
dtrace_vstate_t *vstate;
16415
dtrace_enabling_t *enab = NULL;
16416
int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
16417
uintptr_t daddr = (uintptr_t)dof;
16418
16419
ASSERT(MUTEX_HELD(&dtrace_lock));
16420
16421
if ((help = p->p_dtrace_helpers) == NULL)
16422
help = dtrace_helpers_create(p);
16423
16424
vstate = &help->dthps_vstate;
16425
16426
if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, dhp->dofhp_addr,
16427
dhp->dofhp_dof, B_FALSE)) != 0) {
16428
dtrace_dof_destroy(dof);
16429
return (rv);
16430
}
16431
16432
/*
16433
* Look for helper providers and validate their descriptions.
16434
*/
16435
for (i = 0; i < dof->dofh_secnum; i++) {
16436
dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
16437
dof->dofh_secoff + i * dof->dofh_secsize);
16438
16439
if (sec->dofs_type != DOF_SECT_PROVIDER)
16440
continue;
16441
16442
if (dtrace_helper_provider_validate(dof, sec) != 0) {
16443
dtrace_enabling_destroy(enab);
16444
dtrace_dof_destroy(dof);
16445
return (-1);
16446
}
16447
16448
nprovs++;
16449
}
16450
16451
/*
16452
* Now we need to walk through the ECB descriptions in the enabling.
16453
*/
16454
for (i = 0; i < enab->dten_ndesc; i++) {
16455
dtrace_ecbdesc_t *ep = enab->dten_desc[i];
16456
dtrace_probedesc_t *desc = &ep->dted_probe;
16457
16458
if (strcmp(desc->dtpd_provider, "dtrace") != 0)
16459
continue;
16460
16461
if (strcmp(desc->dtpd_mod, "helper") != 0)
16462
continue;
16463
16464
if (strcmp(desc->dtpd_func, "ustack") != 0)
16465
continue;
16466
16467
if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
16468
ep, help)) != 0) {
16469
/*
16470
* Adding this helper action failed -- we are now going
16471
* to rip out the entire generation and return failure.
16472
*/
16473
(void) dtrace_helper_destroygen(help,
16474
help->dthps_generation);
16475
dtrace_enabling_destroy(enab);
16476
dtrace_dof_destroy(dof);
16477
return (-1);
16478
}
16479
16480
nhelpers++;
16481
}
16482
16483
if (nhelpers < enab->dten_ndesc)
16484
dtrace_dof_error(dof, "unmatched helpers");
16485
16486
gen = help->dthps_generation++;
16487
dtrace_enabling_destroy(enab);
16488
16489
if (nprovs > 0) {
16490
/*
16491
* Now that this is in-kernel, we change the sense of the
16492
* members: dofhp_dof denotes the in-kernel copy of the DOF
16493
* and dofhp_addr denotes the address at user-level.
16494
*/
16495
dhp->dofhp_addr = dhp->dofhp_dof;
16496
dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
16497
16498
if (dtrace_helper_provider_add(dhp, help, gen) == 0) {
16499
mutex_exit(&dtrace_lock);
16500
dtrace_helper_provider_register(p, help, dhp);
16501
mutex_enter(&dtrace_lock);
16502
16503
destroy = 0;
16504
}
16505
}
16506
16507
if (destroy)
16508
dtrace_dof_destroy(dof);
16509
16510
return (gen);
16511
}
16512
16513
static dtrace_helpers_t *
16514
dtrace_helpers_create(proc_t *p)
16515
{
16516
dtrace_helpers_t *help;
16517
16518
ASSERT(MUTEX_HELD(&dtrace_lock));
16519
ASSERT(p->p_dtrace_helpers == NULL);
16520
16521
help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
16522
help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
16523
DTRACE_NHELPER_ACTIONS, KM_SLEEP);
16524
16525
p->p_dtrace_helpers = help;
16526
dtrace_helpers++;
16527
16528
return (help);
16529
}
16530
16531
#ifdef illumos
16532
static
16533
#endif
16534
void
16535
dtrace_helpers_destroy(proc_t *p)
16536
{
16537
dtrace_helpers_t *help;
16538
dtrace_vstate_t *vstate;
16539
#ifdef illumos
16540
proc_t *p = curproc;
16541
#endif
16542
int i;
16543
16544
mutex_enter(&dtrace_lock);
16545
16546
ASSERT(p->p_dtrace_helpers != NULL);
16547
ASSERT(dtrace_helpers > 0);
16548
16549
help = p->p_dtrace_helpers;
16550
vstate = &help->dthps_vstate;
16551
16552
/*
16553
* We're now going to lose the help from this process.
16554
*/
16555
p->p_dtrace_helpers = NULL;
16556
dtrace_sync();
16557
16558
/*
16559
* Destory the helper actions.
16560
*/
16561
for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
16562
dtrace_helper_action_t *h, *next;
16563
16564
for (h = help->dthps_actions[i]; h != NULL; h = next) {
16565
next = h->dtha_next;
16566
dtrace_helper_action_destroy(h, vstate);
16567
h = next;
16568
}
16569
}
16570
16571
mutex_exit(&dtrace_lock);
16572
16573
/*
16574
* Destroy the helper providers.
16575
*/
16576
if (help->dthps_maxprovs > 0) {
16577
mutex_enter(&dtrace_meta_lock);
16578
if (dtrace_meta_pid != NULL) {
16579
ASSERT(dtrace_deferred_pid == NULL);
16580
16581
for (i = 0; i < help->dthps_nprovs; i++) {
16582
dtrace_helper_provider_remove(
16583
&help->dthps_provs[i]->dthp_prov, p->p_pid);
16584
}
16585
} else {
16586
mutex_enter(&dtrace_lock);
16587
ASSERT(help->dthps_deferred == 0 ||
16588
help->dthps_next != NULL ||
16589
help->dthps_prev != NULL ||
16590
help == dtrace_deferred_pid);
16591
16592
/*
16593
* Remove the helper from the deferred list.
16594
*/
16595
if (help->dthps_next != NULL)
16596
help->dthps_next->dthps_prev = help->dthps_prev;
16597
if (help->dthps_prev != NULL)
16598
help->dthps_prev->dthps_next = help->dthps_next;
16599
if (dtrace_deferred_pid == help) {
16600
dtrace_deferred_pid = help->dthps_next;
16601
ASSERT(help->dthps_prev == NULL);
16602
}
16603
16604
mutex_exit(&dtrace_lock);
16605
}
16606
16607
mutex_exit(&dtrace_meta_lock);
16608
16609
for (i = 0; i < help->dthps_nprovs; i++) {
16610
dtrace_helper_provider_destroy(help->dthps_provs[i]);
16611
}
16612
16613
kmem_free(help->dthps_provs, help->dthps_maxprovs *
16614
sizeof (dtrace_helper_provider_t *));
16615
}
16616
16617
mutex_enter(&dtrace_lock);
16618
16619
dtrace_vstate_fini(&help->dthps_vstate);
16620
kmem_free(help->dthps_actions,
16621
sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
16622
kmem_free(help, sizeof (dtrace_helpers_t));
16623
16624
--dtrace_helpers;
16625
mutex_exit(&dtrace_lock);
16626
}
16627
16628
#ifdef illumos
16629
static
16630
#endif
16631
void
16632
dtrace_helpers_duplicate(proc_t *from, proc_t *to)
16633
{
16634
dtrace_helpers_t *help, *newhelp;
16635
dtrace_helper_action_t *helper, *new, *last;
16636
dtrace_difo_t *dp;
16637
dtrace_vstate_t *vstate;
16638
int i, j, sz, hasprovs = 0;
16639
16640
mutex_enter(&dtrace_lock);
16641
ASSERT(from->p_dtrace_helpers != NULL);
16642
ASSERT(dtrace_helpers > 0);
16643
16644
help = from->p_dtrace_helpers;
16645
newhelp = dtrace_helpers_create(to);
16646
ASSERT(to->p_dtrace_helpers != NULL);
16647
16648
newhelp->dthps_generation = help->dthps_generation;
16649
vstate = &newhelp->dthps_vstate;
16650
16651
/*
16652
* Duplicate the helper actions.
16653
*/
16654
for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
16655
if ((helper = help->dthps_actions[i]) == NULL)
16656
continue;
16657
16658
for (last = NULL; helper != NULL; helper = helper->dtha_next) {
16659
new = kmem_zalloc(sizeof (dtrace_helper_action_t),
16660
KM_SLEEP);
16661
new->dtha_generation = helper->dtha_generation;
16662
16663
if ((dp = helper->dtha_predicate) != NULL) {
16664
dp = dtrace_difo_duplicate(dp, vstate);
16665
new->dtha_predicate = dp;
16666
}
16667
16668
new->dtha_nactions = helper->dtha_nactions;
16669
sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
16670
new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
16671
16672
for (j = 0; j < new->dtha_nactions; j++) {
16673
dtrace_difo_t *dp = helper->dtha_actions[j];
16674
16675
ASSERT(dp != NULL);
16676
dp = dtrace_difo_duplicate(dp, vstate);
16677
new->dtha_actions[j] = dp;
16678
}
16679
16680
if (last != NULL) {
16681
last->dtha_next = new;
16682
} else {
16683
newhelp->dthps_actions[i] = new;
16684
}
16685
16686
last = new;
16687
}
16688
}
16689
16690
/*
16691
* Duplicate the helper providers and register them with the
16692
* DTrace framework.
16693
*/
16694
if (help->dthps_nprovs > 0) {
16695
newhelp->dthps_nprovs = help->dthps_nprovs;
16696
newhelp->dthps_maxprovs = help->dthps_nprovs;
16697
newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
16698
sizeof (dtrace_helper_provider_t *), KM_SLEEP);
16699
for (i = 0; i < newhelp->dthps_nprovs; i++) {
16700
newhelp->dthps_provs[i] = help->dthps_provs[i];
16701
newhelp->dthps_provs[i]->dthp_ref++;
16702
}
16703
16704
hasprovs = 1;
16705
}
16706
16707
mutex_exit(&dtrace_lock);
16708
16709
if (hasprovs)
16710
dtrace_helper_provider_register(to, newhelp, NULL);
16711
}
16712
16713
/*
16714
* DTrace Hook Functions
16715
*/
16716
static void
16717
dtrace_module_loaded(modctl_t *ctl)
16718
{
16719
dtrace_provider_t *prv;
16720
16721
mutex_enter(&dtrace_provider_lock);
16722
#ifdef illumos
16723
mutex_enter(&mod_lock);
16724
#endif
16725
16726
#ifdef illumos
16727
ASSERT(ctl->mod_busy);
16728
#endif
16729
16730
/*
16731
* We're going to call each providers per-module provide operation
16732
* specifying only this module.
16733
*/
16734
for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
16735
prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
16736
16737
#ifdef illumos
16738
mutex_exit(&mod_lock);
16739
#endif
16740
mutex_exit(&dtrace_provider_lock);
16741
16742
/*
16743
* If we have any retained enablings, we need to match against them.
16744
* Enabling probes requires that cpu_lock be held, and we cannot hold
16745
* cpu_lock here -- it is legal for cpu_lock to be held when loading a
16746
* module. (In particular, this happens when loading scheduling
16747
* classes.) So if we have any retained enablings, we need to dispatch
16748
* our task queue to do the match for us.
16749
*/
16750
mutex_enter(&dtrace_lock);
16751
16752
if (dtrace_retained == NULL) {
16753
mutex_exit(&dtrace_lock);
16754
return;
16755
}
16756
16757
(void)taskq_dispatch(dtrace_taskq,
16758
(task_func_t *)dtrace_enabling_matchall_task, NULL, TQ_SLEEP);
16759
16760
mutex_exit(&dtrace_lock);
16761
16762
/*
16763
* And now, for a little heuristic sleaze: in general, we want to
16764
* match modules as soon as they load. However, we cannot guarantee
16765
* this, because it would lead us to the lock ordering violation
16766
* outlined above. The common case, of course, is that cpu_lock is
16767
* _not_ held -- so we delay here for a clock tick, hoping that that's
16768
* long enough for the task queue to do its work. If it's not, it's
16769
* not a serious problem -- it just means that the module that we
16770
* just loaded may not be immediately instrumentable.
16771
*/
16772
delay(1);
16773
}
16774
16775
static void
16776
#ifdef illumos
16777
dtrace_module_unloaded(modctl_t *ctl)
16778
#else
16779
dtrace_module_unloaded(modctl_t *ctl, int *error)
16780
#endif
16781
{
16782
dtrace_probe_t template, *probe, *first, *next;
16783
dtrace_provider_t *prov;
16784
#ifndef illumos
16785
char modname[DTRACE_MODNAMELEN];
16786
size_t len;
16787
#endif
16788
16789
#ifdef illumos
16790
template.dtpr_mod = ctl->mod_modname;
16791
#else
16792
/* Handle the fact that ctl->filename may end in ".ko". */
16793
strlcpy(modname, ctl->filename, sizeof(modname));
16794
len = strlen(ctl->filename);
16795
if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
16796
modname[len - 3] = '\0';
16797
template.dtpr_mod = modname;
16798
#endif
16799
16800
mutex_enter(&dtrace_provider_lock);
16801
#ifdef illumos
16802
mutex_enter(&mod_lock);
16803
#endif
16804
mutex_enter(&dtrace_lock);
16805
16806
#ifndef illumos
16807
if (ctl->nenabled > 0) {
16808
/* Don't allow unloads if a probe is enabled. */
16809
mutex_exit(&dtrace_provider_lock);
16810
mutex_exit(&dtrace_lock);
16811
*error = -1;
16812
printf(
16813
"kldunload: attempt to unload module that has DTrace probes enabled\n");
16814
return;
16815
}
16816
#endif
16817
16818
if (dtrace_bymod == NULL) {
16819
/*
16820
* The DTrace module is loaded (obviously) but not attached;
16821
* we don't have any work to do.
16822
*/
16823
mutex_exit(&dtrace_provider_lock);
16824
#ifdef illumos
16825
mutex_exit(&mod_lock);
16826
#endif
16827
mutex_exit(&dtrace_lock);
16828
return;
16829
}
16830
16831
for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
16832
probe != NULL; probe = probe->dtpr_nextmod) {
16833
if (probe->dtpr_ecb != NULL) {
16834
mutex_exit(&dtrace_provider_lock);
16835
#ifdef illumos
16836
mutex_exit(&mod_lock);
16837
#endif
16838
mutex_exit(&dtrace_lock);
16839
16840
/*
16841
* This shouldn't _actually_ be possible -- we're
16842
* unloading a module that has an enabled probe in it.
16843
* (It's normally up to the provider to make sure that
16844
* this can't happen.) However, because dtps_enable()
16845
* doesn't have a failure mode, there can be an
16846
* enable/unload race. Upshot: we don't want to
16847
* assert, but we're not going to disable the
16848
* probe, either.
16849
*/
16850
if (dtrace_err_verbose) {
16851
#ifdef illumos
16852
cmn_err(CE_WARN, "unloaded module '%s' had "
16853
"enabled probes", ctl->mod_modname);
16854
#else
16855
cmn_err(CE_WARN, "unloaded module '%s' had "
16856
"enabled probes", modname);
16857
#endif
16858
}
16859
16860
return;
16861
}
16862
}
16863
16864
probe = first;
16865
16866
for (first = NULL; probe != NULL; probe = next) {
16867
ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
16868
16869
dtrace_probes[probe->dtpr_id - 1] = NULL;
16870
16871
next = probe->dtpr_nextmod;
16872
dtrace_hash_remove(dtrace_bymod, probe);
16873
dtrace_hash_remove(dtrace_byfunc, probe);
16874
dtrace_hash_remove(dtrace_byname, probe);
16875
16876
if (first == NULL) {
16877
first = probe;
16878
probe->dtpr_nextmod = NULL;
16879
} else {
16880
probe->dtpr_nextmod = first;
16881
first = probe;
16882
}
16883
}
16884
16885
/*
16886
* We've removed all of the module's probes from the hash chains and
16887
* from the probe array. Now issue a dtrace_sync() to be sure that
16888
* everyone has cleared out from any probe array processing.
16889
*/
16890
dtrace_sync();
16891
16892
for (probe = first; probe != NULL; probe = first) {
16893
first = probe->dtpr_nextmod;
16894
prov = probe->dtpr_provider;
16895
prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
16896
probe->dtpr_arg);
16897
kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
16898
kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
16899
kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
16900
#ifdef illumos
16901
vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
16902
#else
16903
free_unr(dtrace_arena, probe->dtpr_id);
16904
#endif
16905
kmem_free(probe, sizeof (dtrace_probe_t));
16906
}
16907
16908
mutex_exit(&dtrace_lock);
16909
#ifdef illumos
16910
mutex_exit(&mod_lock);
16911
#endif
16912
mutex_exit(&dtrace_provider_lock);
16913
}
16914
16915
#ifndef illumos
16916
static void
16917
dtrace_kld_load(void *arg __unused, linker_file_t lf)
16918
{
16919
16920
dtrace_module_loaded(lf);
16921
}
16922
16923
static void
16924
dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error)
16925
{
16926
16927
if (*error != 0)
16928
/* We already have an error, so don't do anything. */
16929
return;
16930
dtrace_module_unloaded(lf, error);
16931
}
16932
#endif
16933
16934
#ifdef illumos
16935
static void
16936
dtrace_suspend(void)
16937
{
16938
dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
16939
}
16940
16941
static void
16942
dtrace_resume(void)
16943
{
16944
dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
16945
}
16946
#endif
16947
16948
static int
16949
dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
16950
{
16951
ASSERT(MUTEX_HELD(&cpu_lock));
16952
mutex_enter(&dtrace_lock);
16953
16954
switch (what) {
16955
case CPU_CONFIG: {
16956
dtrace_state_t *state;
16957
dtrace_optval_t *opt, rs, c;
16958
16959
/*
16960
* For now, we only allocate a new buffer for anonymous state.
16961
*/
16962
if ((state = dtrace_anon.dta_state) == NULL)
16963
break;
16964
16965
if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
16966
break;
16967
16968
opt = state->dts_options;
16969
c = opt[DTRACEOPT_CPU];
16970
16971
if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
16972
break;
16973
16974
/*
16975
* Regardless of what the actual policy is, we're going to
16976
* temporarily set our resize policy to be manual. We're
16977
* also going to temporarily set our CPU option to denote
16978
* the newly configured CPU.
16979
*/
16980
rs = opt[DTRACEOPT_BUFRESIZE];
16981
opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
16982
opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
16983
16984
(void) dtrace_state_buffers(state);
16985
16986
opt[DTRACEOPT_BUFRESIZE] = rs;
16987
opt[DTRACEOPT_CPU] = c;
16988
16989
break;
16990
}
16991
16992
case CPU_UNCONFIG:
16993
/*
16994
* We don't free the buffer in the CPU_UNCONFIG case. (The
16995
* buffer will be freed when the consumer exits.)
16996
*/
16997
break;
16998
16999
default:
17000
break;
17001
}
17002
17003
mutex_exit(&dtrace_lock);
17004
return (0);
17005
}
17006
17007
#ifdef illumos
17008
static void
17009
dtrace_cpu_setup_initial(processorid_t cpu)
17010
{
17011
(void) dtrace_cpu_setup(CPU_CONFIG, cpu);
17012
}
17013
#endif
17014
17015
static void
17016
dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
17017
{
17018
if (dtrace_toxranges >= dtrace_toxranges_max) {
17019
int osize, nsize;
17020
dtrace_toxrange_t *range;
17021
17022
osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
17023
17024
if (osize == 0) {
17025
ASSERT(dtrace_toxrange == NULL);
17026
ASSERT(dtrace_toxranges_max == 0);
17027
dtrace_toxranges_max = 1;
17028
} else {
17029
dtrace_toxranges_max <<= 1;
17030
}
17031
17032
nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
17033
range = kmem_zalloc(nsize, KM_SLEEP);
17034
17035
if (dtrace_toxrange != NULL) {
17036
ASSERT(osize != 0);
17037
bcopy(dtrace_toxrange, range, osize);
17038
kmem_free(dtrace_toxrange, osize);
17039
}
17040
17041
dtrace_toxrange = range;
17042
}
17043
17044
ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0);
17045
ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0);
17046
17047
dtrace_toxrange[dtrace_toxranges].dtt_base = base;
17048
dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
17049
dtrace_toxranges++;
17050
}
17051
17052
static void
17053
dtrace_getf_barrier(void)
17054
{
17055
#ifdef illumos
17056
/*
17057
* When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
17058
* that contain calls to getf(), this routine will be called on every
17059
* closef() before either the underlying vnode is released or the
17060
* file_t itself is freed. By the time we are here, it is essential
17061
* that the file_t can no longer be accessed from a call to getf()
17062
* in probe context -- that assures that a dtrace_sync() can be used
17063
* to clear out any enablings referring to the old structures.
17064
*/
17065
if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
17066
kcred->cr_zone->zone_dtrace_getf != 0)
17067
dtrace_sync();
17068
#endif
17069
}
17070
17071
/*
17072
* DTrace Driver Cookbook Functions
17073
*/
17074
#ifdef illumos
17075
/*ARGSUSED*/
17076
static int
17077
dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
17078
{
17079
dtrace_provider_id_t id;
17080
dtrace_state_t *state = NULL;
17081
dtrace_enabling_t *enab;
17082
17083
mutex_enter(&cpu_lock);
17084
mutex_enter(&dtrace_provider_lock);
17085
mutex_enter(&dtrace_lock);
17086
17087
if (ddi_soft_state_init(&dtrace_softstate,
17088
sizeof (dtrace_state_t), 0) != 0) {
17089
cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
17090
mutex_exit(&cpu_lock);
17091
mutex_exit(&dtrace_provider_lock);
17092
mutex_exit(&dtrace_lock);
17093
return (DDI_FAILURE);
17094
}
17095
17096
if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
17097
DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
17098
ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
17099
DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
17100
cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
17101
ddi_remove_minor_node(devi, NULL);
17102
ddi_soft_state_fini(&dtrace_softstate);
17103
mutex_exit(&cpu_lock);
17104
mutex_exit(&dtrace_provider_lock);
17105
mutex_exit(&dtrace_lock);
17106
return (DDI_FAILURE);
17107
}
17108
17109
ddi_report_dev(devi);
17110
dtrace_devi = devi;
17111
17112
dtrace_modload = dtrace_module_loaded;
17113
dtrace_modunload = dtrace_module_unloaded;
17114
dtrace_cpu_init = dtrace_cpu_setup_initial;
17115
dtrace_helpers_cleanup = dtrace_helpers_destroy;
17116
dtrace_helpers_fork = dtrace_helpers_duplicate;
17117
dtrace_cpustart_init = dtrace_suspend;
17118
dtrace_cpustart_fini = dtrace_resume;
17119
dtrace_debugger_init = dtrace_suspend;
17120
dtrace_debugger_fini = dtrace_resume;
17121
17122
register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
17123
17124
ASSERT(MUTEX_HELD(&cpu_lock));
17125
17126
dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
17127
NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
17128
dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
17129
UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
17130
VM_SLEEP | VMC_IDENTIFIER);
17131
dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
17132
1, INT_MAX, 0);
17133
17134
dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
17135
sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
17136
NULL, NULL, NULL, NULL, NULL, 0);
17137
17138
ASSERT(MUTEX_HELD(&cpu_lock));
17139
dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
17140
offsetof(dtrace_probe_t, dtpr_nextmod),
17141
offsetof(dtrace_probe_t, dtpr_prevmod));
17142
17143
dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
17144
offsetof(dtrace_probe_t, dtpr_nextfunc),
17145
offsetof(dtrace_probe_t, dtpr_prevfunc));
17146
17147
dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
17148
offsetof(dtrace_probe_t, dtpr_nextname),
17149
offsetof(dtrace_probe_t, dtpr_prevname));
17150
17151
if (dtrace_retain_max < 1) {
17152
cmn_err(CE_WARN, "illegal value (%zu) for dtrace_retain_max; "
17153
"setting to 1", dtrace_retain_max);
17154
dtrace_retain_max = 1;
17155
}
17156
17157
/*
17158
* Now discover our toxic ranges.
17159
*/
17160
dtrace_toxic_ranges(dtrace_toxrange_add);
17161
17162
/*
17163
* Before we register ourselves as a provider to our own framework,
17164
* we would like to assert that dtrace_provider is NULL -- but that's
17165
* not true if we were loaded as a dependency of a DTrace provider.
17166
* Once we've registered, we can assert that dtrace_provider is our
17167
* pseudo provider.
17168
*/
17169
(void) dtrace_register("dtrace", &dtrace_provider_attr,
17170
DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
17171
17172
ASSERT(dtrace_provider != NULL);
17173
ASSERT((dtrace_provider_id_t)dtrace_provider == id);
17174
17175
dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
17176
dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
17177
dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
17178
dtrace_provider, NULL, NULL, "END", 0, NULL);
17179
dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
17180
dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
17181
17182
dtrace_anon_property();
17183
mutex_exit(&cpu_lock);
17184
17185
/*
17186
* If there are already providers, we must ask them to provide their
17187
* probes, and then match any anonymous enabling against them. Note
17188
* that there should be no other retained enablings at this time:
17189
* the only retained enablings at this time should be the anonymous
17190
* enabling.
17191
*/
17192
if (dtrace_anon.dta_enabling != NULL) {
17193
ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
17194
17195
dtrace_enabling_provide(NULL);
17196
state = dtrace_anon.dta_state;
17197
17198
/*
17199
* We couldn't hold cpu_lock across the above call to
17200
* dtrace_enabling_provide(), but we must hold it to actually
17201
* enable the probes. We have to drop all of our locks, pick
17202
* up cpu_lock, and regain our locks before matching the
17203
* retained anonymous enabling.
17204
*/
17205
mutex_exit(&dtrace_lock);
17206
mutex_exit(&dtrace_provider_lock);
17207
17208
mutex_enter(&cpu_lock);
17209
mutex_enter(&dtrace_provider_lock);
17210
mutex_enter(&dtrace_lock);
17211
17212
if ((enab = dtrace_anon.dta_enabling) != NULL)
17213
(void) dtrace_enabling_match(enab, NULL);
17214
17215
mutex_exit(&cpu_lock);
17216
}
17217
17218
mutex_exit(&dtrace_lock);
17219
mutex_exit(&dtrace_provider_lock);
17220
17221
if (state != NULL) {
17222
/*
17223
* If we created any anonymous state, set it going now.
17224
*/
17225
(void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
17226
}
17227
17228
return (DDI_SUCCESS);
17229
}
17230
#endif /* illumos */
17231
17232
#ifndef illumos
17233
static void dtrace_dtr(void *);
17234
#endif
17235
17236
/*ARGSUSED*/
17237
static int
17238
#ifdef illumos
17239
dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
17240
#else
17241
dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
17242
#endif
17243
{
17244
dtrace_state_t *state;
17245
uint32_t priv;
17246
uid_t uid;
17247
zoneid_t zoneid;
17248
17249
#ifdef illumos
17250
if (getminor(*devp) == DTRACEMNRN_HELPER)
17251
return (0);
17252
17253
/*
17254
* If this wasn't an open with the "helper" minor, then it must be
17255
* the "dtrace" minor.
17256
*/
17257
if (getminor(*devp) == DTRACEMNRN_DTRACE)
17258
return (ENXIO);
17259
#else
17260
cred_t *cred_p = NULL;
17261
cred_p = dev->si_cred;
17262
17263
/*
17264
* If no DTRACE_PRIV_* bits are set in the credential, then the
17265
* caller lacks sufficient permission to do anything with DTrace.
17266
*/
17267
dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
17268
if (priv == DTRACE_PRIV_NONE) {
17269
#endif
17270
17271
return (EACCES);
17272
}
17273
17274
/*
17275
* Ask all providers to provide all their probes.
17276
*/
17277
mutex_enter(&dtrace_provider_lock);
17278
dtrace_probe_provide(NULL, NULL);
17279
mutex_exit(&dtrace_provider_lock);
17280
17281
mutex_enter(&cpu_lock);
17282
mutex_enter(&dtrace_lock);
17283
dtrace_opens++;
17284
dtrace_membar_producer();
17285
17286
#ifdef illumos
17287
/*
17288
* If the kernel debugger is active (that is, if the kernel debugger
17289
* modified text in some way), we won't allow the open.
17290
*/
17291
if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
17292
dtrace_opens--;
17293
mutex_exit(&cpu_lock);
17294
mutex_exit(&dtrace_lock);
17295
return (EBUSY);
17296
}
17297
17298
if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) {
17299
/*
17300
* If DTrace helper tracing is enabled, we need to allocate the
17301
* trace buffer and initialize the values.
17302
*/
17303
dtrace_helptrace_buffer =
17304
kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
17305
dtrace_helptrace_next = 0;
17306
dtrace_helptrace_wrapped = 0;
17307
dtrace_helptrace_enable = 0;
17308
}
17309
17310
state = dtrace_state_create(devp, cred_p);
17311
#else
17312
state = dtrace_state_create(dev, NULL);
17313
devfs_set_cdevpriv(state, dtrace_dtr);
17314
#endif
17315
17316
mutex_exit(&cpu_lock);
17317
17318
if (state == NULL) {
17319
#ifdef illumos
17320
if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
17321
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
17322
#else
17323
--dtrace_opens;
17324
#endif
17325
mutex_exit(&dtrace_lock);
17326
return (EAGAIN);
17327
}
17328
17329
mutex_exit(&dtrace_lock);
17330
17331
return (0);
17332
}
17333
17334
/*ARGSUSED*/
17335
#ifdef illumos
17336
static int
17337
dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
17338
#else
17339
static void
17340
dtrace_dtr(void *data)
17341
#endif
17342
{
17343
#ifdef illumos
17344
minor_t minor = getminor(dev);
17345
dtrace_state_t *state;
17346
#endif
17347
dtrace_helptrace_t *buf = NULL;
17348
17349
#ifdef illumos
17350
if (minor == DTRACEMNRN_HELPER)
17351
return (0);
17352
17353
state = ddi_get_soft_state(dtrace_softstate, minor);
17354
#else
17355
dtrace_state_t *state = data;
17356
#endif
17357
17358
mutex_enter(&cpu_lock);
17359
mutex_enter(&dtrace_lock);
17360
17361
#ifdef illumos
17362
if (state->dts_anon)
17363
#else
17364
if (state != NULL && state->dts_anon)
17365
#endif
17366
{
17367
/*
17368
* There is anonymous state. Destroy that first.
17369
*/
17370
ASSERT(dtrace_anon.dta_state == NULL);
17371
dtrace_state_destroy(state->dts_anon);
17372
}
17373
17374
if (dtrace_helptrace_disable) {
17375
/*
17376
* If we have been told to disable helper tracing, set the
17377
* buffer to NULL before calling into dtrace_state_destroy();
17378
* we take advantage of its dtrace_sync() to know that no
17379
* CPU is in probe context with enabled helper tracing
17380
* after it returns.
17381
*/
17382
buf = dtrace_helptrace_buffer;
17383
dtrace_helptrace_buffer = NULL;
17384
}
17385
17386
#ifdef illumos
17387
dtrace_state_destroy(state);
17388
#else
17389
if (state != NULL) {
17390
dtrace_state_destroy(state);
17391
kmem_free(state, 0);
17392
}
17393
#endif
17394
ASSERT(dtrace_opens > 0);
17395
17396
#ifdef illumos
17397
/*
17398
* Only relinquish control of the kernel debugger interface when there
17399
* are no consumers and no anonymous enablings.
17400
*/
17401
if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
17402
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
17403
#else
17404
--dtrace_opens;
17405
#endif
17406
17407
if (buf != NULL) {
17408
kmem_free(buf, dtrace_helptrace_bufsize);
17409
dtrace_helptrace_disable = 0;
17410
}
17411
17412
mutex_exit(&dtrace_lock);
17413
mutex_exit(&cpu_lock);
17414
17415
#ifdef illumos
17416
return (0);
17417
#endif
17418
}
17419
17420
#ifdef illumos
17421
/*ARGSUSED*/
17422
static int
17423
dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
17424
{
17425
int rval;
17426
dof_helper_t help, *dhp = NULL;
17427
17428
switch (cmd) {
17429
case DTRACEHIOC_ADDDOF:
17430
if (copyin((void *)arg, &help, sizeof (help)) != 0) {
17431
dtrace_dof_error(NULL, "failed to copyin DOF helper");
17432
return (EFAULT);
17433
}
17434
17435
dhp = &help;
17436
arg = (intptr_t)help.dofhp_dof;
17437
/*FALLTHROUGH*/
17438
17439
case DTRACEHIOC_ADD: {
17440
dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
17441
17442
if (dof == NULL)
17443
return (rval);
17444
17445
mutex_enter(&dtrace_lock);
17446
17447
/*
17448
* dtrace_helper_slurp() takes responsibility for the dof --
17449
* it may free it now or it may save it and free it later.
17450
*/
17451
if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
17452
*rv = rval;
17453
rval = 0;
17454
} else {
17455
rval = EINVAL;
17456
}
17457
17458
mutex_exit(&dtrace_lock);
17459
return (rval);
17460
}
17461
17462
case DTRACEHIOC_REMOVE: {
17463
mutex_enter(&dtrace_lock);
17464
rval = dtrace_helper_destroygen(NULL, arg);
17465
mutex_exit(&dtrace_lock);
17466
17467
return (rval);
17468
}
17469
17470
default:
17471
break;
17472
}
17473
17474
return (ENOTTY);
17475
}
17476
17477
/*ARGSUSED*/
17478
static int
17479
dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
17480
{
17481
minor_t minor = getminor(dev);
17482
dtrace_state_t *state;
17483
int rval;
17484
17485
if (minor == DTRACEMNRN_HELPER)
17486
return (dtrace_ioctl_helper(cmd, arg, rv));
17487
17488
state = ddi_get_soft_state(dtrace_softstate, minor);
17489
17490
if (state->dts_anon) {
17491
ASSERT(dtrace_anon.dta_state == NULL);
17492
state = state->dts_anon;
17493
}
17494
17495
switch (cmd) {
17496
case DTRACEIOC_PROVIDER: {
17497
dtrace_providerdesc_t pvd;
17498
dtrace_provider_t *pvp;
17499
17500
if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
17501
return (EFAULT);
17502
17503
pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
17504
mutex_enter(&dtrace_provider_lock);
17505
17506
for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
17507
if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
17508
break;
17509
}
17510
17511
mutex_exit(&dtrace_provider_lock);
17512
17513
if (pvp == NULL)
17514
return (ESRCH);
17515
17516
bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
17517
bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
17518
17519
if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
17520
return (EFAULT);
17521
17522
return (0);
17523
}
17524
17525
case DTRACEIOC_EPROBE: {
17526
dtrace_eprobedesc_t epdesc;
17527
dtrace_ecb_t *ecb;
17528
dtrace_action_t *act;
17529
void *buf;
17530
size_t size;
17531
uintptr_t dest;
17532
int nrecs;
17533
17534
if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
17535
return (EFAULT);
17536
17537
mutex_enter(&dtrace_lock);
17538
17539
if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
17540
mutex_exit(&dtrace_lock);
17541
return (EINVAL);
17542
}
17543
17544
if (ecb->dte_probe == NULL) {
17545
mutex_exit(&dtrace_lock);
17546
return (EINVAL);
17547
}
17548
17549
epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
17550
epdesc.dtepd_uarg = ecb->dte_uarg;
17551
epdesc.dtepd_size = ecb->dte_size;
17552
17553
nrecs = epdesc.dtepd_nrecs;
17554
epdesc.dtepd_nrecs = 0;
17555
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
17556
if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
17557
continue;
17558
17559
epdesc.dtepd_nrecs++;
17560
}
17561
17562
/*
17563
* Now that we have the size, we need to allocate a temporary
17564
* buffer in which to store the complete description. We need
17565
* the temporary buffer to be able to drop dtrace_lock()
17566
* across the copyout(), below.
17567
*/
17568
size = sizeof (dtrace_eprobedesc_t) +
17569
(epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
17570
17571
buf = kmem_alloc(size, KM_SLEEP);
17572
dest = (uintptr_t)buf;
17573
17574
bcopy(&epdesc, (void *)dest, sizeof (epdesc));
17575
dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
17576
17577
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
17578
if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
17579
continue;
17580
17581
if (nrecs-- == 0)
17582
break;
17583
17584
bcopy(&act->dta_rec, (void *)dest,
17585
sizeof (dtrace_recdesc_t));
17586
dest += sizeof (dtrace_recdesc_t);
17587
}
17588
17589
mutex_exit(&dtrace_lock);
17590
17591
if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
17592
kmem_free(buf, size);
17593
return (EFAULT);
17594
}
17595
17596
kmem_free(buf, size);
17597
return (0);
17598
}
17599
17600
case DTRACEIOC_AGGDESC: {
17601
dtrace_aggdesc_t aggdesc;
17602
dtrace_action_t *act;
17603
dtrace_aggregation_t *agg;
17604
int nrecs;
17605
uint32_t offs;
17606
dtrace_recdesc_t *lrec;
17607
void *buf;
17608
size_t size;
17609
uintptr_t dest;
17610
17611
if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
17612
return (EFAULT);
17613
17614
mutex_enter(&dtrace_lock);
17615
17616
if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
17617
mutex_exit(&dtrace_lock);
17618
return (EINVAL);
17619
}
17620
17621
aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
17622
17623
nrecs = aggdesc.dtagd_nrecs;
17624
aggdesc.dtagd_nrecs = 0;
17625
17626
offs = agg->dtag_base;
17627
lrec = &agg->dtag_action.dta_rec;
17628
aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
17629
17630
for (act = agg->dtag_first; ; act = act->dta_next) {
17631
ASSERT(act->dta_intuple ||
17632
DTRACEACT_ISAGG(act->dta_kind));
17633
17634
/*
17635
* If this action has a record size of zero, it
17636
* denotes an argument to the aggregating action.
17637
* Because the presence of this record doesn't (or
17638
* shouldn't) affect the way the data is interpreted,
17639
* we don't copy it out to save user-level the
17640
* confusion of dealing with a zero-length record.
17641
*/
17642
if (act->dta_rec.dtrd_size == 0) {
17643
ASSERT(agg->dtag_hasarg);
17644
continue;
17645
}
17646
17647
aggdesc.dtagd_nrecs++;
17648
17649
if (act == &agg->dtag_action)
17650
break;
17651
}
17652
17653
/*
17654
* Now that we have the size, we need to allocate a temporary
17655
* buffer in which to store the complete description. We need
17656
* the temporary buffer to be able to drop dtrace_lock()
17657
* across the copyout(), below.
17658
*/
17659
size = sizeof (dtrace_aggdesc_t) +
17660
(aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
17661
17662
buf = kmem_alloc(size, KM_SLEEP);
17663
dest = (uintptr_t)buf;
17664
17665
bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
17666
dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
17667
17668
for (act = agg->dtag_first; ; act = act->dta_next) {
17669
dtrace_recdesc_t rec = act->dta_rec;
17670
17671
/*
17672
* See the comment in the above loop for why we pass
17673
* over zero-length records.
17674
*/
17675
if (rec.dtrd_size == 0) {
17676
ASSERT(agg->dtag_hasarg);
17677
continue;
17678
}
17679
17680
if (nrecs-- == 0)
17681
break;
17682
17683
rec.dtrd_offset -= offs;
17684
bcopy(&rec, (void *)dest, sizeof (rec));
17685
dest += sizeof (dtrace_recdesc_t);
17686
17687
if (act == &agg->dtag_action)
17688
break;
17689
}
17690
17691
mutex_exit(&dtrace_lock);
17692
17693
if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
17694
kmem_free(buf, size);
17695
return (EFAULT);
17696
}
17697
17698
kmem_free(buf, size);
17699
return (0);
17700
}
17701
17702
case DTRACEIOC_ENABLE: {
17703
dof_hdr_t *dof;
17704
dtrace_enabling_t *enab = NULL;
17705
dtrace_vstate_t *vstate;
17706
int err = 0;
17707
17708
*rv = 0;
17709
17710
/*
17711
* If a NULL argument has been passed, we take this as our
17712
* cue to reevaluate our enablings.
17713
*/
17714
if (arg == NULL) {
17715
dtrace_enabling_matchall();
17716
17717
return (0);
17718
}
17719
17720
if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
17721
return (rval);
17722
17723
mutex_enter(&cpu_lock);
17724
mutex_enter(&dtrace_lock);
17725
vstate = &state->dts_vstate;
17726
17727
if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
17728
mutex_exit(&dtrace_lock);
17729
mutex_exit(&cpu_lock);
17730
dtrace_dof_destroy(dof);
17731
return (EBUSY);
17732
}
17733
17734
if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
17735
mutex_exit(&dtrace_lock);
17736
mutex_exit(&cpu_lock);
17737
dtrace_dof_destroy(dof);
17738
return (EINVAL);
17739
}
17740
17741
if ((rval = dtrace_dof_options(dof, state)) != 0) {
17742
dtrace_enabling_destroy(enab);
17743
mutex_exit(&dtrace_lock);
17744
mutex_exit(&cpu_lock);
17745
dtrace_dof_destroy(dof);
17746
return (rval);
17747
}
17748
17749
if ((err = dtrace_enabling_match(enab, rv)) == 0) {
17750
err = dtrace_enabling_retain(enab);
17751
} else {
17752
dtrace_enabling_destroy(enab);
17753
}
17754
17755
mutex_exit(&cpu_lock);
17756
mutex_exit(&dtrace_lock);
17757
dtrace_dof_destroy(dof);
17758
17759
return (err);
17760
}
17761
17762
case DTRACEIOC_REPLICATE: {
17763
dtrace_repldesc_t desc;
17764
dtrace_probedesc_t *match = &desc.dtrpd_match;
17765
dtrace_probedesc_t *create = &desc.dtrpd_create;
17766
int err;
17767
17768
if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
17769
return (EFAULT);
17770
17771
match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17772
match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17773
match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17774
match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17775
17776
create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17777
create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17778
create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17779
create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17780
17781
mutex_enter(&dtrace_lock);
17782
err = dtrace_enabling_replicate(state, match, create);
17783
mutex_exit(&dtrace_lock);
17784
17785
return (err);
17786
}
17787
17788
case DTRACEIOC_PROBEMATCH:
17789
case DTRACEIOC_PROBES: {
17790
dtrace_probe_t *probe = NULL;
17791
dtrace_probedesc_t desc;
17792
dtrace_probekey_t pkey;
17793
dtrace_id_t i;
17794
int m = 0;
17795
uint32_t priv;
17796
uid_t uid;
17797
zoneid_t zoneid;
17798
17799
if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
17800
return (EFAULT);
17801
17802
desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17803
desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17804
desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17805
desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17806
17807
/*
17808
* Before we attempt to match this probe, we want to give
17809
* all providers the opportunity to provide it.
17810
*/
17811
if (desc.dtpd_id == DTRACE_IDNONE) {
17812
mutex_enter(&dtrace_provider_lock);
17813
dtrace_probe_provide(&desc, NULL);
17814
mutex_exit(&dtrace_provider_lock);
17815
desc.dtpd_id++;
17816
}
17817
17818
if (cmd == DTRACEIOC_PROBEMATCH) {
17819
dtrace_probekey(&desc, &pkey);
17820
pkey.dtpk_id = DTRACE_IDNONE;
17821
}
17822
17823
dtrace_cred2priv(cr, &priv, &uid, &zoneid);
17824
17825
mutex_enter(&dtrace_lock);
17826
17827
if (cmd == DTRACEIOC_PROBEMATCH) {
17828
for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
17829
if ((probe = dtrace_probes[i - 1]) != NULL &&
17830
(m = dtrace_match_probe(probe, &pkey,
17831
priv, uid, zoneid)) != 0)
17832
break;
17833
}
17834
17835
if (m < 0) {
17836
mutex_exit(&dtrace_lock);
17837
return (EINVAL);
17838
}
17839
17840
} else {
17841
for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
17842
if ((probe = dtrace_probes[i - 1]) != NULL &&
17843
dtrace_match_priv(probe, priv, uid, zoneid))
17844
break;
17845
}
17846
}
17847
17848
if (probe == NULL) {
17849
mutex_exit(&dtrace_lock);
17850
return (ESRCH);
17851
}
17852
17853
dtrace_probe_description(probe, &desc);
17854
mutex_exit(&dtrace_lock);
17855
17856
if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
17857
return (EFAULT);
17858
17859
return (0);
17860
}
17861
17862
case DTRACEIOC_PROBEARG: {
17863
dtrace_argdesc_t desc;
17864
dtrace_probe_t *probe;
17865
dtrace_provider_t *prov;
17866
17867
if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
17868
return (EFAULT);
17869
17870
if (desc.dtargd_id == DTRACE_IDNONE)
17871
return (EINVAL);
17872
17873
if (desc.dtargd_ndx == DTRACE_ARGNONE)
17874
return (EINVAL);
17875
17876
mutex_enter(&dtrace_provider_lock);
17877
mutex_enter(&mod_lock);
17878
mutex_enter(&dtrace_lock);
17879
17880
if (desc.dtargd_id > dtrace_nprobes) {
17881
mutex_exit(&dtrace_lock);
17882
mutex_exit(&mod_lock);
17883
mutex_exit(&dtrace_provider_lock);
17884
return (EINVAL);
17885
}
17886
17887
if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
17888
mutex_exit(&dtrace_lock);
17889
mutex_exit(&mod_lock);
17890
mutex_exit(&dtrace_provider_lock);
17891
return (EINVAL);
17892
}
17893
17894
mutex_exit(&dtrace_lock);
17895
17896
prov = probe->dtpr_provider;
17897
17898
if (prov->dtpv_pops.dtps_getargdesc == NULL) {
17899
/*
17900
* There isn't any typed information for this probe.
17901
* Set the argument number to DTRACE_ARGNONE.
17902
*/
17903
desc.dtargd_ndx = DTRACE_ARGNONE;
17904
} else {
17905
desc.dtargd_native[0] = '\0';
17906
desc.dtargd_xlate[0] = '\0';
17907
desc.dtargd_mapping = desc.dtargd_ndx;
17908
17909
prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
17910
probe->dtpr_id, probe->dtpr_arg, &desc);
17911
}
17912
17913
mutex_exit(&mod_lock);
17914
mutex_exit(&dtrace_provider_lock);
17915
17916
if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
17917
return (EFAULT);
17918
17919
return (0);
17920
}
17921
17922
case DTRACEIOC_GO: {
17923
processorid_t cpuid;
17924
rval = dtrace_state_go(state, &cpuid);
17925
17926
if (rval != 0)
17927
return (rval);
17928
17929
if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
17930
return (EFAULT);
17931
17932
return (0);
17933
}
17934
17935
case DTRACEIOC_STOP: {
17936
processorid_t cpuid;
17937
17938
mutex_enter(&dtrace_lock);
17939
rval = dtrace_state_stop(state, &cpuid);
17940
mutex_exit(&dtrace_lock);
17941
17942
if (rval != 0)
17943
return (rval);
17944
17945
if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
17946
return (EFAULT);
17947
17948
return (0);
17949
}
17950
17951
case DTRACEIOC_DOFGET: {
17952
dof_hdr_t hdr, *dof;
17953
uint64_t len;
17954
17955
if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
17956
return (EFAULT);
17957
17958
mutex_enter(&dtrace_lock);
17959
dof = dtrace_dof_create(state);
17960
mutex_exit(&dtrace_lock);
17961
17962
len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
17963
rval = copyout(dof, (void *)arg, len);
17964
dtrace_dof_destroy(dof);
17965
17966
return (rval == 0 ? 0 : EFAULT);
17967
}
17968
17969
case DTRACEIOC_AGGSNAP:
17970
case DTRACEIOC_BUFSNAP: {
17971
dtrace_bufdesc_t desc;
17972
caddr_t cached;
17973
dtrace_buffer_t *buf;
17974
17975
if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
17976
return (EFAULT);
17977
17978
if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
17979
return (EINVAL);
17980
17981
mutex_enter(&dtrace_lock);
17982
17983
if (cmd == DTRACEIOC_BUFSNAP) {
17984
buf = &state->dts_buffer[desc.dtbd_cpu];
17985
} else {
17986
buf = &state->dts_aggbuffer[desc.dtbd_cpu];
17987
}
17988
17989
if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
17990
size_t sz = buf->dtb_offset;
17991
17992
if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
17993
mutex_exit(&dtrace_lock);
17994
return (EBUSY);
17995
}
17996
17997
/*
17998
* If this buffer has already been consumed, we're
17999
* going to indicate that there's nothing left here
18000
* to consume.
18001
*/
18002
if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
18003
mutex_exit(&dtrace_lock);
18004
18005
desc.dtbd_size = 0;
18006
desc.dtbd_drops = 0;
18007
desc.dtbd_errors = 0;
18008
desc.dtbd_oldest = 0;
18009
sz = sizeof (desc);
18010
18011
if (copyout(&desc, (void *)arg, sz) != 0)
18012
return (EFAULT);
18013
18014
return (0);
18015
}
18016
18017
/*
18018
* If this is a ring buffer that has wrapped, we want
18019
* to copy the whole thing out.
18020
*/
18021
if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
18022
dtrace_buffer_polish(buf);
18023
sz = buf->dtb_size;
18024
}
18025
18026
if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
18027
mutex_exit(&dtrace_lock);
18028
return (EFAULT);
18029
}
18030
18031
desc.dtbd_size = sz;
18032
desc.dtbd_drops = buf->dtb_drops;
18033
desc.dtbd_errors = buf->dtb_errors;
18034
desc.dtbd_oldest = buf->dtb_xamot_offset;
18035
desc.dtbd_timestamp = dtrace_gethrtime();
18036
18037
mutex_exit(&dtrace_lock);
18038
18039
if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
18040
return (EFAULT);
18041
18042
buf->dtb_flags |= DTRACEBUF_CONSUMED;
18043
18044
return (0);
18045
}
18046
18047
if (buf->dtb_tomax == NULL) {
18048
ASSERT(buf->dtb_xamot == NULL);
18049
mutex_exit(&dtrace_lock);
18050
return (ENOENT);
18051
}
18052
18053
cached = buf->dtb_tomax;
18054
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
18055
18056
dtrace_xcall(desc.dtbd_cpu,
18057
(dtrace_xcall_t)dtrace_buffer_switch, buf);
18058
18059
state->dts_errors += buf->dtb_xamot_errors;
18060
18061
/*
18062
* If the buffers did not actually switch, then the cross call
18063
* did not take place -- presumably because the given CPU is
18064
* not in the ready set. If this is the case, we'll return
18065
* ENOENT.
18066
*/
18067
if (buf->dtb_tomax == cached) {
18068
ASSERT(buf->dtb_xamot != cached);
18069
mutex_exit(&dtrace_lock);
18070
return (ENOENT);
18071
}
18072
18073
ASSERT(cached == buf->dtb_xamot);
18074
18075
/*
18076
* We have our snapshot; now copy it out.
18077
*/
18078
if (copyout(buf->dtb_xamot, desc.dtbd_data,
18079
buf->dtb_xamot_offset) != 0) {
18080
mutex_exit(&dtrace_lock);
18081
return (EFAULT);
18082
}
18083
18084
desc.dtbd_size = buf->dtb_xamot_offset;
18085
desc.dtbd_drops = buf->dtb_xamot_drops;
18086
desc.dtbd_errors = buf->dtb_xamot_errors;
18087
desc.dtbd_oldest = 0;
18088
desc.dtbd_timestamp = buf->dtb_switched;
18089
18090
mutex_exit(&dtrace_lock);
18091
18092
/*
18093
* Finally, copy out the buffer description.
18094
*/
18095
if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
18096
return (EFAULT);
18097
18098
return (0);
18099
}
18100
18101
case DTRACEIOC_CONF: {
18102
dtrace_conf_t conf;
18103
18104
bzero(&conf, sizeof (conf));
18105
conf.dtc_difversion = DIF_VERSION;
18106
conf.dtc_difintregs = DIF_DIR_NREGS;
18107
conf.dtc_diftupregs = DIF_DTR_NREGS;
18108
conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
18109
18110
if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
18111
return (EFAULT);
18112
18113
return (0);
18114
}
18115
18116
case DTRACEIOC_STATUS: {
18117
dtrace_status_t stat;
18118
dtrace_dstate_t *dstate;
18119
int i, j;
18120
uint64_t nerrs;
18121
18122
/*
18123
* See the comment in dtrace_state_deadman() for the reason
18124
* for setting dts_laststatus to INT64_MAX before setting
18125
* it to the correct value.
18126
*/
18127
state->dts_laststatus = INT64_MAX;
18128
dtrace_membar_producer();
18129
state->dts_laststatus = dtrace_gethrtime();
18130
18131
bzero(&stat, sizeof (stat));
18132
18133
mutex_enter(&dtrace_lock);
18134
18135
if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
18136
mutex_exit(&dtrace_lock);
18137
return (ENOENT);
18138
}
18139
18140
if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
18141
stat.dtst_exiting = 1;
18142
18143
nerrs = state->dts_errors;
18144
dstate = &state->dts_vstate.dtvs_dynvars;
18145
18146
for (i = 0; i < NCPU; i++) {
18147
dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
18148
18149
stat.dtst_dyndrops += dcpu->dtdsc_drops;
18150
stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
18151
stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
18152
18153
if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
18154
stat.dtst_filled++;
18155
18156
nerrs += state->dts_buffer[i].dtb_errors;
18157
18158
for (j = 0; j < state->dts_nspeculations; j++) {
18159
dtrace_speculation_t *spec;
18160
dtrace_buffer_t *buf;
18161
18162
spec = &state->dts_speculations[j];
18163
buf = &spec->dtsp_buffer[i];
18164
stat.dtst_specdrops += buf->dtb_xamot_drops;
18165
}
18166
}
18167
18168
stat.dtst_specdrops_busy = state->dts_speculations_busy;
18169
stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
18170
stat.dtst_stkstroverflows = state->dts_stkstroverflows;
18171
stat.dtst_dblerrors = state->dts_dblerrors;
18172
stat.dtst_killed =
18173
(state->dts_activity == DTRACE_ACTIVITY_KILLED);
18174
stat.dtst_errors = nerrs;
18175
18176
mutex_exit(&dtrace_lock);
18177
18178
if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
18179
return (EFAULT);
18180
18181
return (0);
18182
}
18183
18184
case DTRACEIOC_FORMAT: {
18185
dtrace_fmtdesc_t fmt;
18186
char *str;
18187
int len;
18188
18189
if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
18190
return (EFAULT);
18191
18192
mutex_enter(&dtrace_lock);
18193
18194
if (fmt.dtfd_format == 0 ||
18195
fmt.dtfd_format > state->dts_nformats) {
18196
mutex_exit(&dtrace_lock);
18197
return (EINVAL);
18198
}
18199
18200
/*
18201
* Format strings are allocated contiguously and they are
18202
* never freed; if a format index is less than the number
18203
* of formats, we can assert that the format map is non-NULL
18204
* and that the format for the specified index is non-NULL.
18205
*/
18206
ASSERT(state->dts_formats != NULL);
18207
str = state->dts_formats[fmt.dtfd_format - 1];
18208
ASSERT(str != NULL);
18209
18210
len = strlen(str) + 1;
18211
18212
if (len > fmt.dtfd_length) {
18213
fmt.dtfd_length = len;
18214
18215
if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
18216
mutex_exit(&dtrace_lock);
18217
return (EINVAL);
18218
}
18219
} else {
18220
if (copyout(str, fmt.dtfd_string, len) != 0) {
18221
mutex_exit(&dtrace_lock);
18222
return (EINVAL);
18223
}
18224
}
18225
18226
mutex_exit(&dtrace_lock);
18227
return (0);
18228
}
18229
18230
default:
18231
break;
18232
}
18233
18234
return (ENOTTY);
18235
}
18236
18237
/*ARGSUSED*/
18238
static int
18239
dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
18240
{
18241
dtrace_state_t *state;
18242
18243
switch (cmd) {
18244
case DDI_DETACH:
18245
break;
18246
18247
case DDI_SUSPEND:
18248
return (DDI_SUCCESS);
18249
18250
default:
18251
return (DDI_FAILURE);
18252
}
18253
18254
mutex_enter(&cpu_lock);
18255
mutex_enter(&dtrace_provider_lock);
18256
mutex_enter(&dtrace_lock);
18257
18258
ASSERT(dtrace_opens == 0);
18259
18260
if (dtrace_helpers > 0) {
18261
mutex_exit(&dtrace_provider_lock);
18262
mutex_exit(&dtrace_lock);
18263
mutex_exit(&cpu_lock);
18264
return (DDI_FAILURE);
18265
}
18266
18267
if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
18268
mutex_exit(&dtrace_provider_lock);
18269
mutex_exit(&dtrace_lock);
18270
mutex_exit(&cpu_lock);
18271
return (DDI_FAILURE);
18272
}
18273
18274
dtrace_provider = NULL;
18275
18276
if ((state = dtrace_anon_grab()) != NULL) {
18277
/*
18278
* If there were ECBs on this state, the provider should
18279
* have not been allowed to detach; assert that there is
18280
* none.
18281
*/
18282
ASSERT(state->dts_necbs == 0);
18283
dtrace_state_destroy(state);
18284
18285
/*
18286
* If we're being detached with anonymous state, we need to
18287
* indicate to the kernel debugger that DTrace is now inactive.
18288
*/
18289
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
18290
}
18291
18292
bzero(&dtrace_anon, sizeof (dtrace_anon_t));
18293
unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
18294
dtrace_cpu_init = NULL;
18295
dtrace_helpers_cleanup = NULL;
18296
dtrace_helpers_fork = NULL;
18297
dtrace_cpustart_init = NULL;
18298
dtrace_cpustart_fini = NULL;
18299
dtrace_debugger_init = NULL;
18300
dtrace_debugger_fini = NULL;
18301
dtrace_modload = NULL;
18302
dtrace_modunload = NULL;
18303
18304
ASSERT(dtrace_getf == 0);
18305
ASSERT(dtrace_closef == NULL);
18306
18307
mutex_exit(&cpu_lock);
18308
18309
kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
18310
dtrace_probes = NULL;
18311
dtrace_nprobes = 0;
18312
18313
dtrace_hash_destroy(dtrace_bymod);
18314
dtrace_hash_destroy(dtrace_byfunc);
18315
dtrace_hash_destroy(dtrace_byname);
18316
dtrace_bymod = NULL;
18317
dtrace_byfunc = NULL;
18318
dtrace_byname = NULL;
18319
18320
kmem_cache_destroy(dtrace_state_cache);
18321
vmem_destroy(dtrace_minor);
18322
vmem_destroy(dtrace_arena);
18323
18324
if (dtrace_toxrange != NULL) {
18325
kmem_free(dtrace_toxrange,
18326
dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
18327
dtrace_toxrange = NULL;
18328
dtrace_toxranges = 0;
18329
dtrace_toxranges_max = 0;
18330
}
18331
18332
ddi_remove_minor_node(dtrace_devi, NULL);
18333
dtrace_devi = NULL;
18334
18335
ddi_soft_state_fini(&dtrace_softstate);
18336
18337
ASSERT(dtrace_vtime_references == 0);
18338
ASSERT(dtrace_opens == 0);
18339
ASSERT(dtrace_retained == NULL);
18340
18341
mutex_exit(&dtrace_lock);
18342
mutex_exit(&dtrace_provider_lock);
18343
18344
/*
18345
* We don't destroy the task queue until after we have dropped our
18346
* locks (taskq_destroy() may block on running tasks). To prevent
18347
* attempting to do work after we have effectively detached but before
18348
* the task queue has been destroyed, all tasks dispatched via the
18349
* task queue must check that DTrace is still attached before
18350
* performing any operation.
18351
*/
18352
taskq_destroy(dtrace_taskq);
18353
dtrace_taskq = NULL;
18354
18355
return (DDI_SUCCESS);
18356
}
18357
#endif
18358
18359
#ifdef illumos
18360
/*ARGSUSED*/
18361
static int
18362
dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
18363
{
18364
int error;
18365
18366
switch (infocmd) {
18367
case DDI_INFO_DEVT2DEVINFO:
18368
*result = (void *)dtrace_devi;
18369
error = DDI_SUCCESS;
18370
break;
18371
case DDI_INFO_DEVT2INSTANCE:
18372
*result = (void *)0;
18373
error = DDI_SUCCESS;
18374
break;
18375
default:
18376
error = DDI_FAILURE;
18377
}
18378
return (error);
18379
}
18380
#endif
18381
18382
#ifdef illumos
18383
static struct cb_ops dtrace_cb_ops = {
18384
dtrace_open, /* open */
18385
dtrace_close, /* close */
18386
nulldev, /* strategy */
18387
nulldev, /* print */
18388
nodev, /* dump */
18389
nodev, /* read */
18390
nodev, /* write */
18391
dtrace_ioctl, /* ioctl */
18392
nodev, /* devmap */
18393
nodev, /* mmap */
18394
nodev, /* segmap */
18395
nochpoll, /* poll */
18396
ddi_prop_op, /* cb_prop_op */
18397
0, /* streamtab */
18398
D_NEW | D_MP /* Driver compatibility flag */
18399
};
18400
18401
static struct dev_ops dtrace_ops = {
18402
DEVO_REV, /* devo_rev */
18403
0, /* refcnt */
18404
dtrace_info, /* get_dev_info */
18405
nulldev, /* identify */
18406
nulldev, /* probe */
18407
dtrace_attach, /* attach */
18408
dtrace_detach, /* detach */
18409
nodev, /* reset */
18410
&dtrace_cb_ops, /* driver operations */
18411
NULL, /* bus operations */
18412
nodev /* dev power */
18413
};
18414
18415
static struct modldrv modldrv = {
18416
&mod_driverops, /* module type (this is a pseudo driver) */
18417
"Dynamic Tracing", /* name of module */
18418
&dtrace_ops, /* driver ops */
18419
};
18420
18421
static struct modlinkage modlinkage = {
18422
MODREV_1,
18423
(void *)&modldrv,
18424
NULL
18425
};
18426
18427
int
18428
_init(void)
18429
{
18430
return (mod_install(&modlinkage));
18431
}
18432
18433
int
18434
_info(struct modinfo *modinfop)
18435
{
18436
return (mod_info(&modlinkage, modinfop));
18437
}
18438
18439
int
18440
_fini(void)
18441
{
18442
return (mod_remove(&modlinkage));
18443
}
18444
#else
18445
18446
static d_ioctl_t dtrace_ioctl;
18447
static d_ioctl_t dtrace_ioctl_helper;
18448
static void dtrace_load(void *);
18449
static int dtrace_unload(void);
18450
static struct cdev *dtrace_dev;
18451
static struct cdev *helper_dev;
18452
18453
void dtrace_invop_init(void);
18454
void dtrace_invop_uninit(void);
18455
18456
static struct cdevsw dtrace_cdevsw = {
18457
.d_version = D_VERSION,
18458
.d_ioctl = dtrace_ioctl,
18459
.d_open = dtrace_open,
18460
.d_name = "dtrace",
18461
};
18462
18463
static struct cdevsw helper_cdevsw = {
18464
.d_version = D_VERSION,
18465
.d_ioctl = dtrace_ioctl_helper,
18466
.d_name = "helper",
18467
};
18468
18469
#include <dtrace_anon.c>
18470
#include <dtrace_ioctl.c>
18471
#include <dtrace_load.c>
18472
#include <dtrace_modevent.c>
18473
#include <dtrace_sysctl.c>
18474
#include <dtrace_unload.c>
18475
#include <dtrace_vtime.c>
18476
#include <dtrace_hacks.c>
18477
18478
SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL);
18479
SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL);
18480
SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL);
18481
18482
DEV_MODULE(dtrace, dtrace_modevent, NULL);
18483
MODULE_VERSION(dtrace, 1);
18484
MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1);
18485
#endif
18486
18487