Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/cddl/dev/profile/profile.c
48255 views
1
/*
2
* CDDL HEADER START
3
*
4
* The contents of this file are subject to the terms of the
5
* Common Development and Distribution License (the "License").
6
* You may not use this file except in compliance with the License.
7
*
8
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9
* or http://www.opensolaris.org/os/licensing.
10
* See the License for the specific language governing permissions
11
* and limitations under the License.
12
*
13
* When distributing Covered Code, include this CDDL HEADER in each
14
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15
* If applicable, add the following below this CDDL HEADER, with the
16
* fields enclosed by brackets "[]" replaced with your own identifying
17
* information: Portions Copyright [yyyy] [name of copyright owner]
18
*
19
* CDDL HEADER END
20
*
21
* Portions Copyright 2006-2008 John Birrell [email protected]
22
*
23
*/
24
25
/*
26
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27
* Use is subject to license terms.
28
*/
29
30
#include <sys/param.h>
31
#include <sys/systm.h>
32
#include <sys/conf.h>
33
#include <sys/cpuvar.h>
34
#include <sys/endian.h>
35
#include <sys/fcntl.h>
36
#include <sys/filio.h>
37
#include <sys/kdb.h>
38
#include <sys/kernel.h>
39
#include <sys/kmem.h>
40
#include <sys/kthread.h>
41
#include <sys/limits.h>
42
#include <sys/linker.h>
43
#include <sys/lock.h>
44
#include <sys/malloc.h>
45
#include <sys/module.h>
46
#include <sys/mutex.h>
47
#include <sys/poll.h>
48
#include <sys/proc.h>
49
#include <sys/selinfo.h>
50
#include <sys/smp.h>
51
#include <sys/stdarg.h>
52
#include <sys/sysctl.h>
53
#include <sys/uio.h>
54
#include <sys/unistd.h>
55
#include <machine/cpu.h>
56
57
#include <sys/dtrace.h>
58
#include <sys/dtrace_bsd.h>
59
60
#include <cddl/dev/dtrace/dtrace_cddl.h>
61
62
#define PROF_NAMELEN 15
63
64
#define PROF_PROFILE 0
65
#define PROF_TICK 1
66
#define PROF_PREFIX_PROFILE "profile-"
67
#define PROF_PREFIX_TICK "tick-"
68
69
/*
70
* Regardless of platform, there are five artificial frames in the case of the
71
* profile provider:
72
*
73
* profile_fire
74
* cyclic_expire
75
* cyclic_fire
76
* [ cbe ]
77
* [ locore ]
78
*
79
* On amd64, there are two frames associated with locore: one in locore, and
80
* another in common interrupt dispatch code. (i386 has not been modified to
81
* use this common layer.) Further, on i386, the interrupted instruction
82
* appears as its own stack frame. All of this means that we need to add one
83
* frame for amd64, and then take one away for both amd64 and i386.
84
*
85
* All of the above constraints lead to the mess below. Yes, the profile
86
* provider should ideally figure this out on-the-fly by hiting one of its own
87
* probes and then walking its own stack trace. This is complicated, however,
88
* and the static definition doesn't seem to be overly brittle. Still, we
89
* allow for a manual override in case we get it completely wrong.
90
*/
91
#ifdef __amd64
92
#define PROF_ARTIFICIAL_FRAMES 10
93
#else
94
#ifdef __i386
95
#define PROF_ARTIFICIAL_FRAMES 6
96
#endif
97
#endif
98
99
#ifdef __powerpc__
100
/*
101
* This value is bogus just to make module compilable on powerpc
102
*/
103
#define PROF_ARTIFICIAL_FRAMES 8
104
#endif
105
106
struct profile_probe_percpu;
107
108
#ifdef __arm__
109
#define PROF_ARTIFICIAL_FRAMES 3
110
#endif
111
112
#ifdef __aarch64__
113
#define PROF_ARTIFICIAL_FRAMES 12
114
#endif
115
116
#ifdef __riscv
117
#define PROF_ARTIFICIAL_FRAMES 12
118
#endif
119
120
typedef struct profile_probe {
121
dtrace_id_t prof_id;
122
int prof_kind;
123
#ifdef illumos
124
hrtime_t prof_interval;
125
cyclic_id_t prof_cyclic;
126
#else
127
sbintime_t prof_interval;
128
struct callout prof_cyclic;
129
sbintime_t prof_expected;
130
struct profile_probe_percpu **prof_pcpus;
131
#endif
132
} profile_probe_t;
133
134
typedef struct profile_probe_percpu {
135
hrtime_t profc_expected;
136
hrtime_t profc_interval;
137
profile_probe_t *profc_probe;
138
#ifdef __FreeBSD__
139
struct callout profc_cyclic;
140
#endif
141
} profile_probe_percpu_t;
142
143
static int profile_unload(void);
144
static void profile_create(hrtime_t, char *, int);
145
static void profile_destroy(void *, dtrace_id_t, void *);
146
static void profile_enable(void *, dtrace_id_t, void *);
147
static void profile_disable(void *, dtrace_id_t, void *);
148
static void profile_load(void *);
149
static void profile_provide(void *, dtrace_probedesc_t *);
150
151
static int profile_rates[] = {
152
97, 199, 499, 997, 1999,
153
4001, 4999, 0, 0, 0,
154
0, 0, 0, 0, 0,
155
0, 0, 0, 0, 0
156
};
157
158
static int profile_ticks[] = {
159
1, 10, 100, 500, 1000,
160
5000, 0, 0, 0, 0,
161
0, 0, 0, 0, 0
162
};
163
164
/*
165
* profile_max defines the upper bound on the number of profile probes that
166
* can exist (this is to prevent malicious or clumsy users from exhausing
167
* system resources by creating a slew of profile probes). At mod load time,
168
* this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
169
* present in the profile.conf file.
170
*/
171
#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
172
static uint32_t profile_max = PROFILE_MAX_DEFAULT;
173
/* maximum number of profile probes */
174
static uint32_t profile_total; /* current number of profile probes */
175
176
static dtrace_pattr_t profile_attr = {
177
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
178
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
179
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
180
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
181
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
182
};
183
184
static dtrace_pops_t profile_pops = {
185
.dtps_provide = profile_provide,
186
.dtps_provide_module = NULL,
187
.dtps_enable = profile_enable,
188
.dtps_disable = profile_disable,
189
.dtps_suspend = NULL,
190
.dtps_resume = NULL,
191
.dtps_getargdesc = NULL,
192
.dtps_getargval = NULL,
193
.dtps_usermode = NULL,
194
.dtps_destroy = profile_destroy
195
};
196
197
static dtrace_provider_id_t profile_id;
198
static hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */
199
static int profile_aframes = PROF_ARTIFICIAL_FRAMES;
200
201
SYSCTL_DECL(_kern_dtrace);
202
SYSCTL_NODE(_kern_dtrace, OID_AUTO, profile, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
203
"DTrace profile parameters");
204
SYSCTL_INT(_kern_dtrace_profile, OID_AUTO, aframes, CTLFLAG_RW, &profile_aframes,
205
0, "Skipped frames for profile provider");
206
207
static sbintime_t
208
nsec_to_sbt(hrtime_t nsec)
209
{
210
time_t sec;
211
212
/*
213
* We need to calculate nsec * 2^32 / 10^9
214
* Seconds and nanoseconds are split to avoid overflow.
215
*/
216
sec = nsec / NANOSEC;
217
nsec = nsec % NANOSEC;
218
return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC);
219
}
220
221
static hrtime_t
222
sbt_to_nsec(sbintime_t sbt)
223
{
224
225
return ((sbt >> 32) * NANOSEC +
226
(((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32));
227
}
228
229
static void
230
profile_probe(profile_probe_t *prof, hrtime_t late)
231
{
232
struct thread *td;
233
struct trapframe *frame;
234
uintfptr_t pc, upc;
235
236
td = curthread;
237
pc = upc = 0;
238
239
/*
240
* td_intr_frame can be unset if this is a catch-up event upon waking up
241
* from idle sleep. This can only happen on a CPU idle thread. Use a
242
* representative arg0 value in this case so that one of the probe
243
* arguments is non-zero.
244
*/
245
frame = td->td_intr_frame;
246
if (frame != NULL) {
247
if (TRAPF_USERMODE(frame))
248
upc = TRAPF_PC(frame);
249
else {
250
pc = TRAPF_PC(frame);
251
td->t_dtrace_trapframe = frame;
252
}
253
} else if (TD_IS_IDLETHREAD(td))
254
pc = (uintfptr_t)&cpu_idle;
255
256
dtrace_probe(prof->prof_id, pc, upc, late, 0, 0);
257
td->t_dtrace_trapframe = NULL;
258
}
259
260
static void
261
profile_fire(void *arg)
262
{
263
profile_probe_percpu_t *pcpu = arg;
264
profile_probe_t *prof = pcpu->profc_probe;
265
hrtime_t late;
266
267
late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected);
268
269
profile_probe(prof, late);
270
pcpu->profc_expected += pcpu->profc_interval;
271
callout_schedule_sbt_curcpu(&pcpu->profc_cyclic,
272
pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
273
}
274
275
static void
276
profile_tick(void *arg)
277
{
278
profile_probe_t *prof = arg;
279
280
profile_probe(prof, 0);
281
prof->prof_expected += prof->prof_interval;
282
callout_schedule_sbt(&prof->prof_cyclic,
283
prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
284
}
285
286
static void
287
profile_create(hrtime_t interval, char *name, int kind)
288
{
289
profile_probe_t *prof;
290
291
if (interval < profile_interval_min)
292
return;
293
294
if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
295
return;
296
297
atomic_add_32(&profile_total, 1);
298
if (profile_total > profile_max) {
299
atomic_add_32(&profile_total, -1);
300
return;
301
}
302
303
prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
304
#ifdef illumos
305
prof->prof_interval = interval;
306
prof->prof_cyclic = CYCLIC_NONE;
307
#else
308
prof->prof_interval = nsec_to_sbt(interval);
309
callout_init(&prof->prof_cyclic, 1);
310
#endif
311
prof->prof_kind = kind;
312
prof->prof_id = dtrace_probe_create(profile_id,
313
NULL, NULL, name,
314
profile_aframes, prof);
315
}
316
317
/*ARGSUSED*/
318
static void
319
profile_provide(void *arg, dtrace_probedesc_t *desc)
320
{
321
int i, j, rate, kind;
322
hrtime_t val = 0, mult = 1, len = 0;
323
char *name, *suffix = NULL;
324
325
const struct {
326
char *prefix;
327
int kind;
328
} types[] = {
329
{ PROF_PREFIX_PROFILE, PROF_PROFILE },
330
{ PROF_PREFIX_TICK, PROF_TICK },
331
{ 0, 0 }
332
};
333
334
const struct {
335
char *name;
336
hrtime_t mult;
337
} suffixes[] = {
338
{ "ns", NANOSEC / NANOSEC },
339
{ "nsec", NANOSEC / NANOSEC },
340
{ "us", NANOSEC / MICROSEC },
341
{ "usec", NANOSEC / MICROSEC },
342
{ "ms", NANOSEC / MILLISEC },
343
{ "msec", NANOSEC / MILLISEC },
344
{ "s", NANOSEC / SEC },
345
{ "sec", NANOSEC / SEC },
346
{ "m", NANOSEC * (hrtime_t)60 },
347
{ "min", NANOSEC * (hrtime_t)60 },
348
{ "h", NANOSEC * (hrtime_t)(60 * 60) },
349
{ "hour", NANOSEC * (hrtime_t)(60 * 60) },
350
{ "d", NANOSEC * (hrtime_t)(24 * 60 * 60) },
351
{ "day", NANOSEC * (hrtime_t)(24 * 60 * 60) },
352
{ "hz", 0 },
353
{ NULL }
354
};
355
356
if (desc == NULL) {
357
char n[PROF_NAMELEN];
358
359
/*
360
* If no description was provided, provide all of our probes.
361
*/
362
for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
363
if ((rate = profile_rates[i]) == 0)
364
continue;
365
366
(void) snprintf(n, PROF_NAMELEN, "%s%d",
367
PROF_PREFIX_PROFILE, rate);
368
profile_create(NANOSEC / rate, n, PROF_PROFILE);
369
}
370
371
for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
372
if ((rate = profile_ticks[i]) == 0)
373
continue;
374
375
(void) snprintf(n, PROF_NAMELEN, "%s%d",
376
PROF_PREFIX_TICK, rate);
377
profile_create(NANOSEC / rate, n, PROF_TICK);
378
}
379
380
return;
381
}
382
383
name = desc->dtpd_name;
384
385
for (i = 0; types[i].prefix != NULL; i++) {
386
len = strlen(types[i].prefix);
387
388
if (strncmp(name, types[i].prefix, len) != 0)
389
continue;
390
break;
391
}
392
393
if (types[i].prefix == NULL)
394
return;
395
396
kind = types[i].kind;
397
j = strlen(name) - len;
398
399
/*
400
* We need to start before any time suffix.
401
*/
402
for (j = strlen(name); j >= len; j--) {
403
if (name[j] >= '0' && name[j] <= '9')
404
break;
405
suffix = &name[j];
406
}
407
408
ASSERT(suffix != NULL);
409
410
/*
411
* Now determine the numerical value present in the probe name.
412
*/
413
for (; j >= len; j--) {
414
if (name[j] < '0' || name[j] > '9')
415
return;
416
417
val += (name[j] - '0') * mult;
418
mult *= (hrtime_t)10;
419
}
420
421
if (val == 0)
422
return;
423
424
/*
425
* Look-up the suffix to determine the multiplier.
426
*/
427
for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
428
if (strcasecmp(suffixes[i].name, suffix) == 0) {
429
mult = suffixes[i].mult;
430
break;
431
}
432
}
433
434
if (suffixes[i].name == NULL && *suffix != '\0')
435
return;
436
437
if (mult == 0) {
438
/*
439
* The default is frequency-per-second.
440
*/
441
val = NANOSEC / val;
442
} else {
443
val *= mult;
444
}
445
446
profile_create(val, name, kind);
447
}
448
449
/* ARGSUSED */
450
static void
451
profile_destroy(void *arg, dtrace_id_t id, void *parg)
452
{
453
profile_probe_t *prof = parg;
454
455
#ifdef illumos
456
ASSERT(prof->prof_cyclic == CYCLIC_NONE);
457
#else
458
ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL);
459
#endif
460
kmem_free(prof, sizeof (profile_probe_t));
461
462
ASSERT(profile_total >= 1);
463
atomic_add_32(&profile_total, -1);
464
}
465
466
#ifdef illumos
467
/*ARGSUSED*/
468
static void
469
profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
470
{
471
profile_probe_t *prof = arg;
472
profile_probe_percpu_t *pcpu;
473
474
pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
475
pcpu->profc_probe = prof;
476
477
hdlr->cyh_func = profile_fire;
478
hdlr->cyh_arg = pcpu;
479
480
when->cyt_interval = prof->prof_interval;
481
when->cyt_when = gethrtime() + when->cyt_interval;
482
483
pcpu->profc_expected = when->cyt_when;
484
pcpu->profc_interval = when->cyt_interval;
485
}
486
487
/*ARGSUSED*/
488
static void
489
profile_offline(void *arg, cpu_t *cpu, void *oarg)
490
{
491
profile_probe_percpu_t *pcpu = oarg;
492
493
ASSERT(pcpu->profc_probe == arg);
494
kmem_free(pcpu, sizeof (profile_probe_percpu_t));
495
}
496
497
/* ARGSUSED */
498
static void
499
profile_enable(void *arg, dtrace_id_t id, void *parg)
500
{
501
profile_probe_t *prof = parg;
502
cyc_omni_handler_t omni;
503
cyc_handler_t hdlr;
504
cyc_time_t when;
505
506
ASSERT(prof->prof_interval != 0);
507
ASSERT(MUTEX_HELD(&cpu_lock));
508
509
if (prof->prof_kind == PROF_TICK) {
510
hdlr.cyh_func = profile_tick;
511
hdlr.cyh_arg = prof;
512
513
when.cyt_interval = prof->prof_interval;
514
when.cyt_when = gethrtime() + when.cyt_interval;
515
} else {
516
ASSERT(prof->prof_kind == PROF_PROFILE);
517
omni.cyo_online = profile_online;
518
omni.cyo_offline = profile_offline;
519
omni.cyo_arg = prof;
520
}
521
522
if (prof->prof_kind == PROF_TICK) {
523
prof->prof_cyclic = cyclic_add(&hdlr, &when);
524
} else {
525
prof->prof_cyclic = cyclic_add_omni(&omni);
526
}
527
}
528
529
/* ARGSUSED */
530
static void
531
profile_disable(void *arg, dtrace_id_t id, void *parg)
532
{
533
profile_probe_t *prof = parg;
534
535
ASSERT(prof->prof_cyclic != CYCLIC_NONE);
536
ASSERT(MUTEX_HELD(&cpu_lock));
537
538
cyclic_remove(prof->prof_cyclic);
539
prof->prof_cyclic = CYCLIC_NONE;
540
}
541
542
#else
543
544
static void
545
profile_enable_omni(profile_probe_t *prof)
546
{
547
profile_probe_percpu_t *pcpu;
548
int cpu;
549
550
prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP);
551
CPU_FOREACH(cpu) {
552
pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP);
553
prof->prof_pcpus[cpu] = pcpu;
554
pcpu->profc_probe = prof;
555
pcpu->profc_expected = sbinuptime() + prof->prof_interval;
556
pcpu->profc_interval = prof->prof_interval;
557
callout_init(&pcpu->profc_cyclic, 1);
558
callout_reset_sbt_on(&pcpu->profc_cyclic,
559
pcpu->profc_expected, 0, profile_fire, pcpu,
560
cpu, C_DIRECT_EXEC | C_ABSOLUTE);
561
}
562
}
563
564
static void
565
profile_disable_omni(profile_probe_t *prof)
566
{
567
profile_probe_percpu_t *pcpu;
568
int cpu;
569
570
ASSERT(prof->prof_pcpus != NULL);
571
CPU_FOREACH(cpu) {
572
pcpu = prof->prof_pcpus[cpu];
573
ASSERT(pcpu->profc_probe == prof);
574
ASSERT(callout_active(&pcpu->profc_cyclic));
575
callout_stop(&pcpu->profc_cyclic);
576
callout_drain(&pcpu->profc_cyclic);
577
kmem_free(pcpu, sizeof(profile_probe_percpu_t));
578
}
579
kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu));
580
prof->prof_pcpus = NULL;
581
}
582
583
/* ARGSUSED */
584
static void
585
profile_enable(void *arg, dtrace_id_t id, void *parg)
586
{
587
profile_probe_t *prof = parg;
588
589
if (prof->prof_kind == PROF_TICK) {
590
prof->prof_expected = sbinuptime() + prof->prof_interval;
591
callout_reset_sbt(&prof->prof_cyclic,
592
prof->prof_expected, 0, profile_tick, prof,
593
C_DIRECT_EXEC | C_ABSOLUTE);
594
} else {
595
ASSERT(prof->prof_kind == PROF_PROFILE);
596
profile_enable_omni(prof);
597
}
598
}
599
600
/* ARGSUSED */
601
static void
602
profile_disable(void *arg, dtrace_id_t id, void *parg)
603
{
604
profile_probe_t *prof = parg;
605
606
if (prof->prof_kind == PROF_TICK) {
607
ASSERT(callout_active(&prof->prof_cyclic));
608
callout_stop(&prof->prof_cyclic);
609
callout_drain(&prof->prof_cyclic);
610
} else {
611
ASSERT(prof->prof_kind == PROF_PROFILE);
612
profile_disable_omni(prof);
613
}
614
}
615
#endif
616
617
static void
618
profile_load(void *dummy)
619
{
620
if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
621
NULL, &profile_pops, NULL, &profile_id) != 0)
622
return;
623
}
624
625
626
static int
627
profile_unload(void)
628
{
629
int error = 0;
630
631
if ((error = dtrace_unregister(profile_id)) != 0)
632
return (error);
633
634
return (error);
635
}
636
637
/* ARGSUSED */
638
static int
639
profile_modevent(module_t mod __unused, int type, void *data __unused)
640
{
641
int error = 0;
642
643
switch (type) {
644
case MOD_LOAD:
645
break;
646
647
case MOD_UNLOAD:
648
break;
649
650
case MOD_SHUTDOWN:
651
break;
652
653
default:
654
error = EOPNOTSUPP;
655
break;
656
657
}
658
return (error);
659
}
660
661
SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
662
SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
663
664
DEV_MODULE(profile, profile_modevent, NULL);
665
MODULE_VERSION(profile, 1);
666
MODULE_DEPEND(profile, dtrace, 1, 1, 1);
667
MODULE_DEPEND(profile, opensolaris, 1, 1, 1);
668
669