Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_clock.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1982, 1986, 1991, 1993
5
* The Regents of the University of California. All rights reserved.
6
* (c) UNIX System Laboratories, Inc.
7
* All or some portions of this file are derived from material licensed
8
* to the University of California by American Telephone and Telegraph
9
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
10
* the permission of UNIX System Laboratories, Inc.
11
*
12
* Redistribution and use in source and binary forms, with or without
13
* modification, are permitted provided that the following conditions
14
* are met:
15
* 1. Redistributions of source code must retain the above copyright
16
* notice, this list of conditions and the following disclaimer.
17
* 2. Redistributions in binary form must reproduce the above copyright
18
* notice, this list of conditions and the following disclaimer in the
19
* documentation and/or other materials provided with the distribution.
20
* 3. Neither the name of the University nor the names of its contributors
21
* may be used to endorse or promote products derived from this software
22
* without specific prior written permission.
23
*
24
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
* SUCH DAMAGE.
35
*/
36
37
#include <sys/cdefs.h>
38
#include "opt_kdb.h"
39
#include "opt_device_polling.h"
40
#include "opt_hwpmc_hooks.h"
41
#include "opt_ntp.h"
42
#include "opt_watchdog.h"
43
44
#include <sys/param.h>
45
#include <sys/systm.h>
46
#include <sys/callout.h>
47
#include <sys/epoch.h>
48
#include <sys/eventhandler.h>
49
#include <sys/gtaskqueue.h>
50
#include <sys/kdb.h>
51
#include <sys/kernel.h>
52
#include <sys/kthread.h>
53
#include <sys/ktr.h>
54
#include <sys/lock.h>
55
#include <sys/mutex.h>
56
#include <sys/proc.h>
57
#include <sys/resource.h>
58
#include <sys/resourcevar.h>
59
#include <sys/sched.h>
60
#include <sys/sdt.h>
61
#include <sys/signalvar.h>
62
#include <sys/sleepqueue.h>
63
#include <sys/smp.h>
64
#include <vm/vm.h>
65
#include <vm/pmap.h>
66
#include <vm/vm_map.h>
67
#include <sys/sysctl.h>
68
#include <sys/bus.h>
69
#include <sys/interrupt.h>
70
#include <sys/limits.h>
71
#include <sys/timetc.h>
72
73
#ifdef HWPMC_HOOKS
74
#include <sys/pmckern.h>
75
PMC_SOFT_DEFINE( , , clock, hard);
76
PMC_SOFT_DEFINE( , , clock, stat);
77
PMC_SOFT_DEFINE_EX( , , clock, prof, \
78
cpu_startprofclock, cpu_stopprofclock);
79
#endif
80
81
#ifdef DEVICE_POLLING
82
extern void hardclock_device_poll(void);
83
#endif /* DEVICE_POLLING */
84
85
/* Spin-lock protecting profiling statistics. */
86
static struct mtx time_lock;
87
88
SDT_PROVIDER_DECLARE(sched);
89
SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *");
90
91
static int
92
sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
93
{
94
int error;
95
long cp_time[CPUSTATES];
96
#ifdef SCTL_MASK32
97
int i;
98
unsigned int cp_time32[CPUSTATES];
99
#endif
100
101
read_cpu_time(cp_time);
102
#ifdef SCTL_MASK32
103
if (req->flags & SCTL_MASK32) {
104
if (!req->oldptr)
105
return SYSCTL_OUT(req, 0, sizeof(cp_time32));
106
for (i = 0; i < CPUSTATES; i++)
107
cp_time32[i] = (unsigned int)cp_time[i];
108
error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
109
} else
110
#endif
111
{
112
if (!req->oldptr)
113
return SYSCTL_OUT(req, 0, sizeof(cp_time));
114
error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
115
}
116
return error;
117
}
118
119
SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
120
0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
121
122
static long empty[CPUSTATES];
123
124
static int
125
sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
126
{
127
struct pcpu *pcpu;
128
int error;
129
int c;
130
long *cp_time;
131
#ifdef SCTL_MASK32
132
unsigned int cp_time32[CPUSTATES];
133
int i;
134
#endif
135
136
if (!req->oldptr) {
137
#ifdef SCTL_MASK32
138
if (req->flags & SCTL_MASK32)
139
return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
140
else
141
#endif
142
return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
143
}
144
for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
145
if (!CPU_ABSENT(c)) {
146
pcpu = pcpu_find(c);
147
cp_time = pcpu->pc_cp_time;
148
} else {
149
cp_time = empty;
150
}
151
#ifdef SCTL_MASK32
152
if (req->flags & SCTL_MASK32) {
153
for (i = 0; i < CPUSTATES; i++)
154
cp_time32[i] = (unsigned int)cp_time[i];
155
error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
156
} else
157
#endif
158
error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
159
}
160
return error;
161
}
162
163
SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
164
0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
165
166
#ifdef DEADLKRES
167
static const char *blessed[] = {
168
"getblk",
169
"so_snd_sx",
170
"so_rcv_sx",
171
NULL
172
};
173
static int slptime_threshold = 1800;
174
static int blktime_threshold = 900;
175
static int sleepfreq = 3;
176
177
static void
178
deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks)
179
{
180
int tticks;
181
182
sx_assert(&allproc_lock, SX_LOCKED);
183
PROC_LOCK_ASSERT(p, MA_OWNED);
184
THREAD_LOCK_ASSERT(td, MA_OWNED);
185
/*
186
* The thread should be blocked on a turnstile, simply check
187
* if the turnstile channel is in good state.
188
*/
189
MPASS(td->td_blocked != NULL);
190
191
tticks = ticks - td->td_blktick;
192
if (tticks > blkticks)
193
/*
194
* Accordingly with provided thresholds, this thread is stuck
195
* for too long on a turnstile.
196
*/
197
panic("%s: possible deadlock detected for %p (%s), "
198
"blocked for %d ticks\n", __func__,
199
td, sched_tdname(td), tticks);
200
}
201
202
static void
203
deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks)
204
{
205
const void *wchan;
206
int i, slptype, tticks;
207
208
sx_assert(&allproc_lock, SX_LOCKED);
209
PROC_LOCK_ASSERT(p, MA_OWNED);
210
THREAD_LOCK_ASSERT(td, MA_OWNED);
211
/*
212
* Check if the thread is sleeping on a lock, otherwise skip the check.
213
* Drop the thread lock in order to avoid a LOR with the sleepqueue
214
* spinlock.
215
*/
216
wchan = td->td_wchan;
217
tticks = ticks - td->td_slptick;
218
slptype = sleepq_type(wchan);
219
if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) &&
220
tticks > slpticks) {
221
/*
222
* Accordingly with provided thresholds, this thread is stuck
223
* for too long on a sleepqueue.
224
* However, being on a sleepqueue, we might still check for the
225
* blessed list.
226
*/
227
for (i = 0; blessed[i] != NULL; i++)
228
if (!strcmp(blessed[i], td->td_wmesg))
229
return;
230
231
panic("%s: possible deadlock detected for %p (%s), "
232
"blocked for %d ticks\n", __func__,
233
td, sched_tdname(td), tticks);
234
}
235
}
236
237
static void
238
deadlkres(void)
239
{
240
struct proc *p;
241
struct thread *td;
242
int blkticks, slpticks, tryl;
243
244
tryl = 0;
245
for (;;) {
246
blkticks = blktime_threshold * hz;
247
slpticks = slptime_threshold * hz;
248
249
/*
250
* Avoid to sleep on the sx_lock in order to avoid a
251
* possible priority inversion problem leading to
252
* starvation.
253
* If the lock can't be held after 100 tries, panic.
254
*/
255
if (!sx_try_slock(&allproc_lock)) {
256
if (tryl > 100)
257
panic("%s: possible deadlock detected "
258
"on allproc_lock\n", __func__);
259
tryl++;
260
pause("allproc", sleepfreq * hz);
261
continue;
262
}
263
tryl = 0;
264
FOREACH_PROC_IN_SYSTEM(p) {
265
PROC_LOCK(p);
266
if (p->p_state == PRS_NEW) {
267
PROC_UNLOCK(p);
268
continue;
269
}
270
FOREACH_THREAD_IN_PROC(p, td) {
271
thread_lock(td);
272
if (TD_ON_LOCK(td))
273
deadlres_td_on_lock(p, td,
274
blkticks);
275
else if (TD_IS_SLEEPING(td))
276
deadlres_td_sleep_q(p, td,
277
slpticks);
278
thread_unlock(td);
279
}
280
PROC_UNLOCK(p);
281
}
282
sx_sunlock(&allproc_lock);
283
284
/* Sleep for sleepfreq seconds. */
285
pause("-", sleepfreq * hz);
286
}
287
}
288
289
static struct kthread_desc deadlkres_kd = {
290
"deadlkres",
291
deadlkres,
292
(struct thread **)NULL
293
};
294
295
SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
296
297
static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
298
"Deadlock resolver");
299
SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RWTUN,
300
&slptime_threshold, 0,
301
"Number of seconds within is valid to sleep on a sleepqueue");
302
SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RWTUN,
303
&blktime_threshold, 0,
304
"Number of seconds within is valid to block on a turnstile");
305
SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RWTUN, &sleepfreq, 0,
306
"Number of seconds between any deadlock resolver thread run");
307
#endif /* DEADLKRES */
308
309
void
310
read_cpu_time(long *cp_time)
311
{
312
struct pcpu *pc;
313
int i, j;
314
315
/* Sum up global cp_time[]. */
316
bzero(cp_time, sizeof(long) * CPUSTATES);
317
CPU_FOREACH(i) {
318
pc = pcpu_find(i);
319
for (j = 0; j < CPUSTATES; j++)
320
cp_time[j] += pc->pc_cp_time[j];
321
}
322
}
323
324
#include <sys/watchdog.h>
325
326
static long watchdog_ticks;
327
static int watchdog_enabled;
328
static void watchdog_fire(void);
329
static void watchdog_config(void *, u_int, int *);
330
331
static void
332
watchdog_attach(void)
333
{
334
EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
335
}
336
337
/*
338
* Clock handling routines.
339
*
340
* This code is written to operate with two timers that run independently of
341
* each other.
342
*
343
* The main timer, running hz times per second, is used to trigger interval
344
* timers, timeouts and rescheduling as needed.
345
*
346
* The second timer handles kernel and user profiling,
347
* and does resource use estimation. If the second timer is programmable,
348
* it is randomized to avoid aliasing between the two clocks. For example,
349
* the randomization prevents an adversary from always giving up the cpu
350
* just before its quantum expires. Otherwise, it would never accumulate
351
* cpu ticks. The mean frequency of the second timer is stathz.
352
*
353
* If no second timer exists, stathz will be zero; in this case we drive
354
* profiling and statistics off the main clock. This WILL NOT be accurate;
355
* do not do it unless absolutely necessary.
356
*
357
* The statistics clock may (or may not) be run at a higher rate while
358
* profiling. This profile clock runs at profhz. We require that profhz
359
* be an integral multiple of stathz.
360
*
361
* If the statistics clock is running fast, it must be divided by the ratio
362
* profhz/stathz for statistics. (For profiling, every tick counts.)
363
*
364
* Time-of-day is maintained using a "timecounter", which may or may
365
* not be related to the hardware generating the above mentioned
366
* interrupts.
367
*/
368
369
int stathz;
370
int profhz;
371
int profprocs;
372
int psratio;
373
374
DPCPU_DEFINE_STATIC(long, pcputicks); /* Per-CPU version of ticks. */
375
#ifdef DEVICE_POLLING
376
static int devpoll_run = 0;
377
#endif
378
379
static void
380
ast_oweupc(struct thread *td, int tda __unused)
381
{
382
if ((td->td_proc->p_flag & P_PROFIL) == 0)
383
return;
384
addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
385
td->td_profil_ticks = 0;
386
td->td_pflags &= ~TDP_OWEUPC;
387
}
388
389
static void
390
ast_alrm(struct thread *td, int tda __unused)
391
{
392
struct proc *p;
393
394
p = td->td_proc;
395
PROC_LOCK(p);
396
kern_psignal(p, SIGVTALRM);
397
PROC_UNLOCK(p);
398
}
399
400
static void
401
ast_prof(struct thread *td, int tda __unused)
402
{
403
struct proc *p;
404
405
p = td->td_proc;
406
PROC_LOCK(p);
407
kern_psignal(p, SIGPROF);
408
PROC_UNLOCK(p);
409
}
410
411
/*
412
* Initialize clock frequencies and start both clocks running.
413
*/
414
static void
415
initclocks(void *dummy __unused)
416
{
417
int i;
418
419
/*
420
* Set divisors to 1 (normal case) and let the machine-specific
421
* code do its bit.
422
*/
423
mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
424
cpu_initclocks();
425
426
/*
427
* Compute profhz/stathz, and fix profhz if needed.
428
*/
429
i = stathz ? stathz : hz;
430
if (profhz == 0)
431
profhz = i;
432
psratio = profhz / i;
433
434
ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc);
435
ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm);
436
ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof);
437
438
#ifdef SW_WATCHDOG
439
/* Enable hardclock watchdog now, even if a hardware watchdog exists. */
440
watchdog_attach();
441
#else
442
/* Volunteer to run a software watchdog. */
443
if (wdog_software_attach == NULL)
444
wdog_software_attach = watchdog_attach;
445
#endif
446
}
447
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
448
449
static __noinline void
450
hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode)
451
{
452
struct proc *p;
453
int ast;
454
455
ast = 0;
456
p = td->td_proc;
457
if (usermode &&
458
timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
459
PROC_ITIMLOCK(p);
460
if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
461
tick * cnt) == 0)
462
ast |= TDAI(TDA_ALRM);
463
PROC_ITIMUNLOCK(p);
464
}
465
if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
466
PROC_ITIMLOCK(p);
467
if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
468
tick * cnt) == 0)
469
ast |= TDAI(TDA_PROF);
470
PROC_ITIMUNLOCK(p);
471
}
472
if (ast != 0)
473
ast_sched_mask(td, ast);
474
}
475
476
void
477
hardclock(int cnt, int usermode)
478
{
479
struct pstats *pstats;
480
struct thread *td = curthread;
481
struct proc *p = td->td_proc;
482
long global, newticks, *t;
483
484
/*
485
* Update per-CPU and possibly global ticks values.
486
*/
487
t = DPCPU_PTR(pcputicks);
488
*t += cnt;
489
global = atomic_load_long(&ticksl);
490
do {
491
newticks = *t - global;
492
if (newticks <= 0) {
493
if (newticks < -1)
494
*t = global - 1;
495
newticks = 0;
496
break;
497
}
498
} while (!atomic_fcmpset_long(&ticksl, &global, *t));
499
500
/*
501
* Run current process's virtual and profile time, as needed.
502
*/
503
pstats = p->p_stats;
504
if (__predict_false(
505
timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) ||
506
timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)))
507
hardclock_itimer(td, pstats, cnt, usermode);
508
509
#ifdef HWPMC_HOOKS
510
if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
511
PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
512
if (td->td_intr_frame != NULL)
513
PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
514
#endif
515
/* We are in charge to handle this tick duty. */
516
if (newticks > 0) {
517
tc_ticktock(newticks);
518
#ifdef DEVICE_POLLING
519
/* Dangerous and no need to call these things concurrently. */
520
if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) {
521
/* This is very short and quick. */
522
hardclock_device_poll();
523
atomic_store_rel_int(&devpoll_run, 0);
524
}
525
#endif /* DEVICE_POLLING */
526
if (watchdog_enabled > 0) {
527
long left;
528
529
left = atomic_fetchadd_long(&watchdog_ticks, -newticks);
530
if (left > 0 && left <= newticks)
531
watchdog_fire();
532
}
533
intr_event_handle(clk_intr_event, NULL);
534
}
535
if (curcpu == CPU_FIRST())
536
cpu_tick_calibration();
537
if (__predict_false(DPCPU_GET(epoch_cb_count)))
538
GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task));
539
}
540
541
void
542
hardclock_sync(int cpu)
543
{
544
long *t;
545
546
KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
547
548
t = DPCPU_ID_PTR(cpu, pcputicks);
549
*t = ticksl;
550
}
551
552
/*
553
* Regular integer scaling formula without losing precision:
554
*/
555
#define TIME_INT_SCALE(value, mul, div) \
556
(((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div))
557
558
/*
559
* Macro for converting seconds and microseconds into actual ticks,
560
* based on the given hz value:
561
*/
562
#define TIME_TO_TICKS(sec, usec, hz) \
563
((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6))
564
565
#define TIME_ASSERT_VALID_HZ(hz) \
566
_Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \
567
TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \
568
"tvtohz() can overflow the regular integer type")
569
570
/*
571
* Compile time assert the maximum and minimum values to fit into a
572
* regular integer when computing TIME_TO_TICKS():
573
*/
574
TIME_ASSERT_VALID_HZ(HZ_MAXIMUM);
575
TIME_ASSERT_VALID_HZ(HZ_MINIMUM);
576
577
/*
578
* The formula is mostly linear, but test some more common values just
579
* in case:
580
*/
581
TIME_ASSERT_VALID_HZ(1024);
582
TIME_ASSERT_VALID_HZ(1000);
583
TIME_ASSERT_VALID_HZ(128);
584
TIME_ASSERT_VALID_HZ(100);
585
586
/*
587
* Compute number of ticks representing the specified amount of time.
588
* If the specified time is negative, a value of 1 is returned. This
589
* function returns a value from 1 up to and including INT_MAX.
590
*/
591
int
592
tvtohz(struct timeval *tv)
593
{
594
int retval;
595
596
/*
597
* The values passed here may come from user-space and these
598
* checks ensure "tv_usec" is within its allowed range:
599
*/
600
601
/* check for tv_usec underflow */
602
if (__predict_false(tv->tv_usec < 0)) {
603
tv->tv_sec += tv->tv_usec / 1000000;
604
tv->tv_usec = tv->tv_usec % 1000000;
605
/* convert tv_usec to a positive value */
606
if (__predict_true(tv->tv_usec < 0)) {
607
tv->tv_usec += 1000000;
608
tv->tv_sec -= 1;
609
}
610
/* check for tv_usec overflow */
611
} else if (__predict_false(tv->tv_usec >= 1000000)) {
612
tv->tv_sec += tv->tv_usec / 1000000;
613
tv->tv_usec = tv->tv_usec % 1000000;
614
}
615
616
/* check for tv_sec underflow */
617
if (__predict_false(tv->tv_sec < 0))
618
return (1);
619
/* check for tv_sec overflow (including room for the tv_usec part) */
620
else if (__predict_false(tv->tv_sec >= tick_seconds_max))
621
return (INT_MAX);
622
623
/* cast to "int" to avoid platform differences */
624
retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz);
625
626
/* add one additional tick */
627
return (retval + 1);
628
}
629
630
/*
631
* Start profiling on a process.
632
*
633
* Kernel profiling passes proc0 which never exits and hence
634
* keeps the profile clock running constantly.
635
*/
636
void
637
startprofclock(struct proc *p)
638
{
639
640
PROC_LOCK_ASSERT(p, MA_OWNED);
641
if (p->p_flag & P_STOPPROF)
642
return;
643
if ((p->p_flag & P_PROFIL) == 0) {
644
p->p_flag |= P_PROFIL;
645
mtx_lock(&time_lock);
646
if (++profprocs == 1)
647
cpu_startprofclock();
648
mtx_unlock(&time_lock);
649
}
650
}
651
652
/*
653
* Stop profiling on a process.
654
*/
655
void
656
stopprofclock(struct proc *p)
657
{
658
659
PROC_LOCK_ASSERT(p, MA_OWNED);
660
if (p->p_flag & P_PROFIL) {
661
if (p->p_profthreads != 0) {
662
while (p->p_profthreads != 0) {
663
p->p_flag |= P_STOPPROF;
664
msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
665
"stopprof", 0);
666
}
667
}
668
if ((p->p_flag & P_PROFIL) == 0)
669
return;
670
p->p_flag &= ~P_PROFIL;
671
mtx_lock(&time_lock);
672
if (--profprocs == 0)
673
cpu_stopprofclock();
674
mtx_unlock(&time_lock);
675
}
676
}
677
678
/*
679
* Statistics clock. Updates rusage information and calls the scheduler
680
* to adjust priorities of the active thread.
681
*
682
* This should be called by all active processors.
683
*/
684
void
685
statclock(int cnt, int usermode)
686
{
687
struct rusage *ru;
688
struct vmspace *vm;
689
struct thread *td;
690
struct proc *p;
691
long rss;
692
long *cp_time;
693
uint64_t runtime, new_switchtime;
694
695
td = curthread;
696
p = td->td_proc;
697
698
cp_time = (long *)PCPU_PTR(cp_time);
699
if (usermode) {
700
/*
701
* Charge the time as appropriate.
702
*/
703
td->td_uticks += cnt;
704
if (p->p_nice > NZERO)
705
cp_time[CP_NICE] += cnt;
706
else
707
cp_time[CP_USER] += cnt;
708
} else {
709
/*
710
* Came from kernel mode, so we were:
711
* - handling an interrupt,
712
* - doing syscall or trap work on behalf of the current
713
* user process, or
714
* - spinning in the idle loop.
715
* Whichever it is, charge the time as appropriate.
716
* Note that we charge interrupts to the current process,
717
* regardless of whether they are ``for'' that process,
718
* so that we know how much of its real time was spent
719
* in ``non-process'' (i.e., interrupt) work.
720
*/
721
if ((td->td_pflags & TDP_ITHREAD) ||
722
td->td_intr_nesting_level >= 2) {
723
td->td_iticks += cnt;
724
cp_time[CP_INTR] += cnt;
725
} else {
726
td->td_pticks += cnt;
727
td->td_sticks += cnt;
728
if (!TD_IS_IDLETHREAD(td))
729
cp_time[CP_SYS] += cnt;
730
else
731
cp_time[CP_IDLE] += cnt;
732
}
733
}
734
735
/* Update resource usage integrals and maximums. */
736
MPASS(p->p_vmspace != NULL);
737
vm = p->p_vmspace;
738
ru = &td->td_ru;
739
ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
740
ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
741
ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
742
rss = pgtok(vmspace_resident_count(vm));
743
if (ru->ru_maxrss < rss)
744
ru->ru_maxrss = rss;
745
KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
746
"prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
747
SDT_PROBE2(sched, , , tick, td, td->td_proc);
748
thread_lock_flags(td, MTX_QUIET);
749
750
/*
751
* Compute the amount of time during which the current
752
* thread was running, and add that to its total so far.
753
*/
754
new_switchtime = cpu_ticks();
755
runtime = new_switchtime - PCPU_GET(switchtime);
756
td->td_runtime += runtime;
757
td->td_incruntime += runtime;
758
PCPU_SET(switchtime, new_switchtime);
759
760
sched_clock(td, cnt);
761
thread_unlock(td);
762
#ifdef HWPMC_HOOKS
763
if (td->td_intr_frame != NULL)
764
PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
765
#endif
766
}
767
768
void
769
profclock(int cnt, int usermode, uintfptr_t pc)
770
{
771
struct thread *td;
772
773
td = curthread;
774
if (usermode) {
775
/*
776
* Came from user mode; CPU was in user state.
777
* If this process is being profiled, record the tick.
778
* if there is no related user location yet, don't
779
* bother trying to count it.
780
*/
781
if (td->td_proc->p_flag & P_PROFIL)
782
addupc_intr(td, pc, cnt);
783
}
784
#ifdef HWPMC_HOOKS
785
if (td->td_intr_frame != NULL)
786
PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame);
787
#endif
788
}
789
790
/*
791
* Return information about system clocks.
792
*/
793
static int
794
sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
795
{
796
struct clockinfo clkinfo;
797
/*
798
* Construct clockinfo structure.
799
*/
800
bzero(&clkinfo, sizeof(clkinfo));
801
clkinfo.hz = hz;
802
clkinfo.tick = tick;
803
clkinfo.profhz = profhz;
804
clkinfo.stathz = stathz ? stathz : hz;
805
return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
806
}
807
808
SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
809
CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
810
0, 0, sysctl_kern_clockrate, "S,clockinfo",
811
"Rate and period of various kernel clocks");
812
813
static void
814
watchdog_config(void *unused __unused, u_int cmd, int *error)
815
{
816
u_int u;
817
818
u = cmd & WD_INTERVAL;
819
if (u >= WD_TO_1SEC) {
820
watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
821
watchdog_enabled = 1;
822
*error = 0;
823
} else {
824
watchdog_enabled = 0;
825
}
826
}
827
828
/*
829
* Handle a watchdog timeout by dropping to DDB or panicking.
830
*/
831
static void
832
watchdog_fire(void)
833
{
834
835
#if defined(KDB) && !defined(KDB_UNATTENDED)
836
kdb_backtrace();
837
kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
838
#else
839
panic("watchdog timeout");
840
#endif
841
}
842
843