CoCalc -- kern

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_clock.c
³⁹⁴⁷⁵ views
1
/*-
2
 * SPDX-License-Identifier: BSD-3-Clause
3
 *
4
 * Copyright (c) 1982, 1986, 1991, 1993
5
 *	The Regents of the University of California.  All rights reserved.
6
 * (c) UNIX System Laboratories, Inc.
7
 * All or some portions of this file are derived from material licensed
8
 * to the University of California by American Telephone and Telegraph
9
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10
 * the permission of UNIX System Laboratories, Inc.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions
14
 * are met:
15
 * 1. Redistributions of source code must retain the above copyright
16
 *    notice, this list of conditions and the following disclaimer.
17
 * 2. Redistributions in binary form must reproduce the above copyright
18
 *    notice, this list of conditions and the following disclaimer in the
19
 *    documentation and/or other materials provided with the distribution.
20
 * 3. Neither the name of the University nor the names of its contributors
21
 *    may be used to endorse or promote products derived from this software
22
 *    without specific prior written permission.
23
 *
24
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
 * SUCH DAMAGE.
35
 */
36

37
#include <sys/cdefs.h>
38
#include "opt_kdb.h"
39
#include "opt_device_polling.h"
40
#include "opt_hwpmc_hooks.h"
41
#include "opt_ntp.h"
42
#include "opt_watchdog.h"
43

44
#include <sys/param.h>
45
#include <sys/systm.h>
46
#include <sys/callout.h>
47
#include <sys/epoch.h>
48
#include <sys/eventhandler.h>
49
#include <sys/gtaskqueue.h>
50
#include <sys/kdb.h>
51
#include <sys/kernel.h>
52
#include <sys/kthread.h>
53
#include <sys/ktr.h>
54
#include <sys/lock.h>
55
#include <sys/mutex.h>
56
#include <sys/proc.h>
57
#include <sys/resource.h>
58
#include <sys/resourcevar.h>
59
#include <sys/sched.h>
60
#include <sys/sdt.h>
61
#include <sys/signalvar.h>
62
#include <sys/sleepqueue.h>
63
#include <sys/smp.h>
64
#include <vm/vm.h>
65
#include <vm/pmap.h>
66
#include <vm/vm_map.h>
67
#include <sys/sysctl.h>
68
#include <sys/bus.h>
69
#include <sys/interrupt.h>
70
#include <sys/limits.h>
71
#include <sys/timetc.h>
72

73
#ifdef HWPMC_HOOKS
74
#include <sys/pmckern.h>
75
PMC_SOFT_DEFINE( , , clock, hard);
76
PMC_SOFT_DEFINE( , , clock, stat);
77
PMC_SOFT_DEFINE_EX( , , clock, prof, \
78
    cpu_startprofclock, cpu_stopprofclock);
79
#endif
80

81
#ifdef DEVICE_POLLING
82
extern void hardclock_device_poll(void);
83
#endif /* DEVICE_POLLING */
84

85
/* Spin-lock protecting profiling statistics. */
86
static struct mtx time_lock;
87

88
SDT_PROVIDER_DECLARE(sched);
89
SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *");
90

91
static int
92
sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
93
{
94
	int error;
95
	long cp_time[CPUSTATES];
96
#ifdef SCTL_MASK32
97
	int i;
98
	unsigned int cp_time32[CPUSTATES];
99
#endif
100

101
	read_cpu_time(cp_time);
102
#ifdef SCTL_MASK32
103
	if (req->flags & SCTL_MASK32) {
104
		if (!req->oldptr)
105
			return SYSCTL_OUT(req, 0, sizeof(cp_time32));
106
		for (i = 0; i < CPUSTATES; i++)
107
			cp_time32[i] = (unsigned int)cp_time[i];
108
		error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
109
	} else
110
#endif
111
	{
112
		if (!req->oldptr)
113
			return SYSCTL_OUT(req, 0, sizeof(cp_time));
114
		error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
115
	}
116
	return error;
117
}
118

119
SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
120
    0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
121

122
static long empty[CPUSTATES];
123

124
static int
125
sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
126
{
127
	struct pcpu *pcpu;
128
	int error;
129
	int c;
130
	long *cp_time;
131
#ifdef SCTL_MASK32
132
	unsigned int cp_time32[CPUSTATES];
133
	int i;
134
#endif
135

136
	if (!req->oldptr) {
137
#ifdef SCTL_MASK32
138
		if (req->flags & SCTL_MASK32)
139
			return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
140
		else
141
#endif
142
			return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
143
	}
144
	for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
145
		if (!CPU_ABSENT(c)) {
146
			pcpu = pcpu_find(c);
147
			cp_time = pcpu->pc_cp_time;
148
		} else {
149
			cp_time = empty;
150
		}
151
#ifdef SCTL_MASK32
152
		if (req->flags & SCTL_MASK32) {
153
			for (i = 0; i < CPUSTATES; i++)
154
				cp_time32[i] = (unsigned int)cp_time[i];
155
			error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
156
		} else
157
#endif
158
			error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
159
	}
160
	return error;
161
}
162

163
SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
164
    0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
165

166
#ifdef DEADLKRES
167
static const char *blessed[] = {
168
	"getblk",
169
	"so_snd_sx",
170
	"so_rcv_sx",
171
	NULL
172
};
173
static int slptime_threshold = 1800;
174
static int blktime_threshold = 900;
175
static int sleepfreq = 3;
176

177
static void
178
deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks)
179
{
180
	int tticks;
181

182
	sx_assert(&allproc_lock, SX_LOCKED);
183
	PROC_LOCK_ASSERT(p, MA_OWNED);
184
	THREAD_LOCK_ASSERT(td, MA_OWNED);
185
	/*
186
	 * The thread should be blocked on a turnstile, simply check
187
	 * if the turnstile channel is in good state.
188
	 */
189
	MPASS(td->td_blocked != NULL);
190

191
	tticks = ticks - td->td_blktick;
192
	if (tticks > blkticks)
193
		/*
194
		 * Accordingly with provided thresholds, this thread is stuck
195
		 * for too long on a turnstile.
196
		 */
197
		panic("%s: possible deadlock detected for %p (%s), "
198
		    "blocked for %d ticks\n", __func__,
199
		    td, sched_tdname(td), tticks);
200
}
201

202
static void
203
deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks)
204
{
205
	const void *wchan;
206
	int i, slptype, tticks;
207

208
	sx_assert(&allproc_lock, SX_LOCKED);
209
	PROC_LOCK_ASSERT(p, MA_OWNED);
210
	THREAD_LOCK_ASSERT(td, MA_OWNED);
211
	/*
212
	 * Check if the thread is sleeping on a lock, otherwise skip the check.
213
	 * Drop the thread lock in order to avoid a LOR with the sleepqueue
214
	 * spinlock.
215
	 */
216
	wchan = td->td_wchan;
217
	tticks = ticks - td->td_slptick;
218
	slptype = sleepq_type(wchan);
219
	if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) &&
220
	    tticks > slpticks) {
221
		/*
222
		 * Accordingly with provided thresholds, this thread is stuck
223
		 * for too long on a sleepqueue.
224
		 * However, being on a sleepqueue, we might still check for the
225
		 * blessed list.
226
		 */
227
		for (i = 0; blessed[i] != NULL; i++)
228
			if (!strcmp(blessed[i], td->td_wmesg))
229
				return;
230

231
		panic("%s: possible deadlock detected for %p (%s), "
232
		    "blocked for %d ticks\n", __func__,
233
		    td, sched_tdname(td), tticks);
234
	}
235
}
236

237
static void
238
deadlkres(void)
239
{
240
	struct proc *p;
241
	struct thread *td;
242
	int blkticks, slpticks, tryl;
243

244
	tryl = 0;
245
	for (;;) {
246
		blkticks = blktime_threshold * hz;
247
		slpticks = slptime_threshold * hz;
248

249
		/*
250
		 * Avoid to sleep on the sx_lock in order to avoid a
251
		 * possible priority inversion problem leading to
252
		 * starvation.
253
		 * If the lock can't be held after 100 tries, panic.
254
		 */
255
		if (!sx_try_slock(&allproc_lock)) {
256
			if (tryl > 100)
257
				panic("%s: possible deadlock detected "
258
				    "on allproc_lock\n", __func__);
259
			tryl++;
260
			pause("allproc", sleepfreq * hz);
261
			continue;
262
		}
263
		tryl = 0;
264
		FOREACH_PROC_IN_SYSTEM(p) {
265
			PROC_LOCK(p);
266
			if (p->p_state == PRS_NEW) {
267
				PROC_UNLOCK(p);
268
				continue;
269
			}
270
			FOREACH_THREAD_IN_PROC(p, td) {
271
				thread_lock(td);
272
				if (TD_ON_LOCK(td))
273
					deadlres_td_on_lock(p, td,
274
					    blkticks);
275
				else if (TD_IS_SLEEPING(td))
276
					deadlres_td_sleep_q(p, td,
277
					    slpticks);
278
				thread_unlock(td);
279
			}
280
			PROC_UNLOCK(p);
281
		}
282
		sx_sunlock(&allproc_lock);
283

284
		/* Sleep for sleepfreq seconds. */
285
		pause("-", sleepfreq * hz);
286
	}
287
}
288

289
static struct kthread_desc deadlkres_kd = {
290
	"deadlkres",
291
	deadlkres,
292
	(struct thread **)NULL
293
};
294

295
SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
296

297
static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
298
    "Deadlock resolver");
299
SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RWTUN,
300
    &slptime_threshold, 0,
301
    "Number of seconds within is valid to sleep on a sleepqueue");
302
SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RWTUN,
303
    &blktime_threshold, 0,
304
    "Number of seconds within is valid to block on a turnstile");
305
SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RWTUN, &sleepfreq, 0,
306
    "Number of seconds between any deadlock resolver thread run");
307
#endif	/* DEADLKRES */
308

309
void
310
read_cpu_time(long *cp_time)
311
{
312
	struct pcpu *pc;
313
	int i, j;
314

315
	/* Sum up global cp_time[]. */
316
	bzero(cp_time, sizeof(long) * CPUSTATES);
317
	CPU_FOREACH(i) {
318
		pc = pcpu_find(i);
319
		for (j = 0; j < CPUSTATES; j++)
320
			cp_time[j] += pc->pc_cp_time[j];
321
	}
322
}
323

324
#include <sys/watchdog.h>
325

326
static long watchdog_ticks;
327
static int watchdog_enabled;
328
static void watchdog_fire(void);
329
static void watchdog_config(void *, u_int, int *);
330

331
static void
332
watchdog_attach(void)
333
{
334
	EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
335
}
336

337
/*
338
 * Clock handling routines.
339
 *
340
 * This code is written to operate with two timers that run independently of
341
 * each other.
342
 *
343
 * The main timer, running hz times per second, is used to trigger interval
344
 * timers, timeouts and rescheduling as needed.
345
 *
346
 * The second timer handles kernel and user profiling,
347
 * and does resource use estimation.  If the second timer is programmable,
348
 * it is randomized to avoid aliasing between the two clocks.  For example,
349
 * the randomization prevents an adversary from always giving up the cpu
350
 * just before its quantum expires.  Otherwise, it would never accumulate
351
 * cpu ticks.  The mean frequency of the second timer is stathz.
352
 *
353
 * If no second timer exists, stathz will be zero; in this case we drive
354
 * profiling and statistics off the main clock.  This WILL NOT be accurate;
355
 * do not do it unless absolutely necessary.
356
 *
357
 * The statistics clock may (or may not) be run at a higher rate while
358
 * profiling.  This profile clock runs at profhz.  We require that profhz
359
 * be an integral multiple of stathz.
360
 *
361
 * If the statistics clock is running fast, it must be divided by the ratio
362
 * profhz/stathz for statistics.  (For profiling, every tick counts.)
363
 *
364
 * Time-of-day is maintained using a "timecounter", which may or may
365
 * not be related to the hardware generating the above mentioned
366
 * interrupts.
367
 */
368

369
int	stathz;
370
int	profhz;
371
int	profprocs;
372
int	psratio;
373

374
DPCPU_DEFINE_STATIC(long, pcputicks);	/* Per-CPU version of ticks. */
375
#ifdef DEVICE_POLLING
376
static int devpoll_run = 0;
377
#endif
378

379
static void
380
ast_oweupc(struct thread *td, int tda __unused)
381
{
382
	if ((td->td_proc->p_flag & P_PROFIL) == 0)
383
		return;
384
	addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
385
	td->td_profil_ticks = 0;
386
	td->td_pflags &= ~TDP_OWEUPC;
387
}
388

389
static void
390
ast_alrm(struct thread *td, int tda __unused)
391
{
392
	struct proc *p;
393

394
	p = td->td_proc;
395
	PROC_LOCK(p);
396
	kern_psignal(p, SIGVTALRM);
397
	PROC_UNLOCK(p);
398
}
399

400
static void
401
ast_prof(struct thread *td, int tda __unused)
402
{
403
	struct proc *p;
404

405
	p = td->td_proc;
406
	PROC_LOCK(p);
407
	kern_psignal(p, SIGPROF);
408
	PROC_UNLOCK(p);
409
}
410

411
/*
412
 * Initialize clock frequencies and start both clocks running.
413
 */
414
static void
415
initclocks(void *dummy __unused)
416
{
417
	int i;
418

419
	/*
420
	 * Set divisors to 1 (normal case) and let the machine-specific
421
	 * code do its bit.
422
	 */
423
	mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
424
	cpu_initclocks();
425

426
	/*
427
	 * Compute profhz/stathz, and fix profhz if needed.
428
	 */
429
	i = stathz ? stathz : hz;
430
	if (profhz == 0)
431
		profhz = i;
432
	psratio = profhz / i;
433

434
	ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc);
435
	ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm);
436
	ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof);
437

438
#ifdef SW_WATCHDOG
439
	/* Enable hardclock watchdog now, even if a hardware watchdog exists. */
440
	watchdog_attach();
441
#else
442
	/* Volunteer to run a software watchdog. */
443
	if (wdog_software_attach == NULL)
444
		wdog_software_attach = watchdog_attach;
445
#endif
446
}
447
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
448

449
static __noinline void
450
hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode)
451
{
452
	struct proc *p;
453
	int ast;
454

455
	ast = 0;
456
	p = td->td_proc;
457
	if (usermode &&
458
	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
459
		PROC_ITIMLOCK(p);
460
		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
461
		    tick * cnt) == 0)
462
			ast |= TDAI(TDA_ALRM);
463
		PROC_ITIMUNLOCK(p);
464
	}
465
	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
466
		PROC_ITIMLOCK(p);
467
		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
468
		    tick * cnt) == 0)
469
			ast |= TDAI(TDA_PROF);
470
		PROC_ITIMUNLOCK(p);
471
	}
472
	if (ast != 0)
473
		ast_sched_mask(td, ast);
474
}
475

476
void
477
hardclock(int cnt, int usermode)
478
{
479
	struct pstats *pstats;
480
	struct thread *td = curthread;
481
	struct proc *p = td->td_proc;
482
	long global, newticks, *t;
483

484
	/*
485
	 * Update per-CPU and possibly global ticks values.
486
	 */
487
	t = DPCPU_PTR(pcputicks);
488
	*t += cnt;
489
	global = atomic_load_long(&ticksl);
490
	do {
491
		newticks = *t - global;
492
		if (newticks <= 0) {
493
			if (newticks < -1)
494
				*t = global - 1;
495
			newticks = 0;
496
			break;
497
		}
498
	} while (!atomic_fcmpset_long(&ticksl, &global, *t));
499

500
	/*
501
	 * Run current process's virtual and profile time, as needed.
502
	 */
503
	pstats = p->p_stats;
504
	if (__predict_false(
505
	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) ||
506
	    timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)))
507
		hardclock_itimer(td, pstats, cnt, usermode);
508

509
#ifdef	HWPMC_HOOKS
510
	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
511
		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
512
	if (td->td_intr_frame != NULL)
513
		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
514
#endif
515
	/* We are in charge to handle this tick duty. */
516
	if (newticks > 0) {
517
		tc_ticktock(newticks);
518
#ifdef DEVICE_POLLING
519
		/* Dangerous and no need to call these things concurrently. */
520
		if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) {
521
			/* This is very short and quick. */
522
			hardclock_device_poll();
523
			atomic_store_rel_int(&devpoll_run, 0);
524
		}
525
#endif /* DEVICE_POLLING */
526
		if (watchdog_enabled > 0) {
527
			long left;
528

529
			left = atomic_fetchadd_long(&watchdog_ticks, -newticks);
530
			if (left > 0 && left <= newticks)
531
				watchdog_fire();
532
		}
533
		intr_event_handle(clk_intr_event, NULL);
534
	}
535
	if (curcpu == CPU_FIRST())
536
		cpu_tick_calibration();
537
	if (__predict_false(DPCPU_GET(epoch_cb_count)))
538
		GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task));
539
}
540

541
void
542
hardclock_sync(int cpu)
543
{
544
	long *t;
545

546
	KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
547

548
	t = DPCPU_ID_PTR(cpu, pcputicks);
549
	*t = ticksl;
550
}
551

552
/*
553
 * Regular integer scaling formula without losing precision:
554
 */
555
#define	TIME_INT_SCALE(value, mul, div) \
556
	(((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div))
557

558
/*
559
 * Macro for converting seconds and microseconds into actual ticks,
560
 * based on the given hz value:
561
 */
562
#define	TIME_TO_TICKS(sec, usec, hz) \
563
	((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6))
564

565
#define	TIME_ASSERT_VALID_HZ(hz)	\
566
	_Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \
567
		       TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX,	\
568
		       "tvtohz() can overflow the regular integer type")
569

570
/*
571
 * Compile time assert the maximum and minimum values to fit into a
572
 * regular integer when computing TIME_TO_TICKS():
573
 */
574
TIME_ASSERT_VALID_HZ(HZ_MAXIMUM);
575
TIME_ASSERT_VALID_HZ(HZ_MINIMUM);
576

577
/*
578
 * The formula is mostly linear, but test some more common values just
579
 * in case:
580
 */
581
TIME_ASSERT_VALID_HZ(1024);
582
TIME_ASSERT_VALID_HZ(1000);
583
TIME_ASSERT_VALID_HZ(128);
584
TIME_ASSERT_VALID_HZ(100);
585

586
/*
587
 * Compute number of ticks representing the specified amount of time.
588
 * If the specified time is negative, a value of 1 is returned. This
589
 * function returns a value from 1 up to and including INT_MAX.
590
 */
591
int
592
tvtohz(struct timeval *tv)
593
{
594
	int retval;
595

596
	/*
597
	 * The values passed here may come from user-space and these
598
	 * checks ensure "tv_usec" is within its allowed range:
599
	 */
600

601
	/* check for tv_usec underflow */
602
	if (__predict_false(tv->tv_usec < 0)) {
603
		tv->tv_sec += tv->tv_usec / 1000000;
604
		tv->tv_usec = tv->tv_usec % 1000000;
605
		/* convert tv_usec to a positive value */
606
		if (__predict_true(tv->tv_usec < 0)) {
607
			tv->tv_usec += 1000000;
608
			tv->tv_sec -= 1;
609
		}
610
	/* check for tv_usec overflow */
611
	} else if (__predict_false(tv->tv_usec >= 1000000)) {
612
		tv->tv_sec += tv->tv_usec / 1000000;
613
		tv->tv_usec = tv->tv_usec % 1000000;
614
	}
615

616
	/* check for tv_sec underflow */
617
	if (__predict_false(tv->tv_sec < 0))
618
		return (1);
619
	/* check for tv_sec overflow (including room for the tv_usec part) */
620
	else if (__predict_false(tv->tv_sec >= tick_seconds_max))
621
		return (INT_MAX);
622

623
	/* cast to "int" to avoid platform differences */
624
	retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz);
625

626
	/* add one additional tick */
627
	return (retval + 1);
628
}
629

630
/*
631
 * Start profiling on a process.
632
 *
633
 * Kernel profiling passes proc0 which never exits and hence
634
 * keeps the profile clock running constantly.
635
 */
636
void
637
startprofclock(struct proc *p)
638
{
639

640
	PROC_LOCK_ASSERT(p, MA_OWNED);
641
	if (p->p_flag & P_STOPPROF)
642
		return;
643
	if ((p->p_flag & P_PROFIL) == 0) {
644
		p->p_flag |= P_PROFIL;
645
		mtx_lock(&time_lock);
646
		if (++profprocs == 1)
647
			cpu_startprofclock();
648
		mtx_unlock(&time_lock);
649
	}
650
}
651

652
/*
653
 * Stop profiling on a process.
654
 */
655
void
656
stopprofclock(struct proc *p)
657
{
658

659
	PROC_LOCK_ASSERT(p, MA_OWNED);
660
	if (p->p_flag & P_PROFIL) {
661
		if (p->p_profthreads != 0) {
662
			while (p->p_profthreads != 0) {
663
				p->p_flag |= P_STOPPROF;
664
				msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
665
				    "stopprof", 0);
666
			}
667
		}
668
		if ((p->p_flag & P_PROFIL) == 0)
669
			return;
670
		p->p_flag &= ~P_PROFIL;
671
		mtx_lock(&time_lock);
672
		if (--profprocs == 0)
673
			cpu_stopprofclock();
674
		mtx_unlock(&time_lock);
675
	}
676
}
677

678
/*
679
 * Statistics clock.  Updates rusage information and calls the scheduler
680
 * to adjust priorities of the active thread.
681
 *
682
 * This should be called by all active processors.
683
 */
684
void
685
statclock(int cnt, int usermode)
686
{
687
	struct rusage *ru;
688
	struct vmspace *vm;
689
	struct thread *td;
690
	struct proc *p;
691
	long rss;
692
	long *cp_time;
693
	uint64_t runtime, new_switchtime;
694

695
	td = curthread;
696
	p = td->td_proc;
697

698
	cp_time = (long *)PCPU_PTR(cp_time);
699
	if (usermode) {
700
		/*
701
		 * Charge the time as appropriate.
702
		 */
703
		td->td_uticks += cnt;
704
		if (p->p_nice > NZERO)
705
			cp_time[CP_NICE] += cnt;
706
		else
707
			cp_time[CP_USER] += cnt;
708
	} else {
709
		/*
710
		 * Came from kernel mode, so we were:
711
		 * - handling an interrupt,
712
		 * - doing syscall or trap work on behalf of the current
713
		 *   user process, or
714
		 * - spinning in the idle loop.
715
		 * Whichever it is, charge the time as appropriate.
716
		 * Note that we charge interrupts to the current process,
717
		 * regardless of whether they are ``for'' that process,
718
		 * so that we know how much of its real time was spent
719
		 * in ``non-process'' (i.e., interrupt) work.
720
		 */
721
		if ((td->td_pflags & TDP_ITHREAD) ||
722
		    td->td_intr_nesting_level >= 2) {
723
			td->td_iticks += cnt;
724
			cp_time[CP_INTR] += cnt;
725
		} else {
726
			td->td_pticks += cnt;
727
			td->td_sticks += cnt;
728
			if (!TD_IS_IDLETHREAD(td))
729
				cp_time[CP_SYS] += cnt;
730
			else
731
				cp_time[CP_IDLE] += cnt;
732
		}
733
	}
734

735
	/* Update resource usage integrals and maximums. */
736
	MPASS(p->p_vmspace != NULL);
737
	vm = p->p_vmspace;
738
	ru = &td->td_ru;
739
	ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
740
	ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
741
	ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
742
	rss = pgtok(vmspace_resident_count(vm));
743
	if (ru->ru_maxrss < rss)
744
		ru->ru_maxrss = rss;
745
	KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
746
	    "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
747
	SDT_PROBE2(sched, , , tick, td, td->td_proc);
748
	thread_lock_flags(td, MTX_QUIET);
749

750
	/*
751
	 * Compute the amount of time during which the current
752
	 * thread was running, and add that to its total so far.
753
	 */
754
	new_switchtime = cpu_ticks();
755
	runtime = new_switchtime - PCPU_GET(switchtime);
756
	td->td_runtime += runtime;
757
	td->td_incruntime += runtime;
758
	PCPU_SET(switchtime, new_switchtime);
759

760
	sched_clock(td, cnt);
761
	thread_unlock(td);
762
#ifdef HWPMC_HOOKS
763
	if (td->td_intr_frame != NULL)
764
		PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
765
#endif
766
}
767

768
void
769
profclock(int cnt, int usermode, uintfptr_t pc)
770
{
771
	struct thread *td;
772

773
	td = curthread;
774
	if (usermode) {
775
		/*
776
		 * Came from user mode; CPU was in user state.
777
		 * If this process is being profiled, record the tick.
778
		 * if there is no related user location yet, don't
779
		 * bother trying to count it.
780
		 */
781
		if (td->td_proc->p_flag & P_PROFIL)
782
			addupc_intr(td, pc, cnt);
783
	}
784
#ifdef HWPMC_HOOKS
785
	if (td->td_intr_frame != NULL)
786
		PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame);
787
#endif
788
}
789

790
/*
791
 * Return information about system clocks.
792
 */
793
static int
794
sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
795
{
796
	struct clockinfo clkinfo;
797
	/*
798
	 * Construct clockinfo structure.
799
	 */
800
	bzero(&clkinfo, sizeof(clkinfo));
801
	clkinfo.hz = hz;
802
	clkinfo.tick = tick;
803
	clkinfo.profhz = profhz;
804
	clkinfo.stathz = stathz ? stathz : hz;
805
	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
806
}
807

808
SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
809
	CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
810
	0, 0, sysctl_kern_clockrate, "S,clockinfo",
811
	"Rate and period of various kernel clocks");
812

813
static void
814
watchdog_config(void *unused __unused, u_int cmd, int *error)
815
{
816
	u_int u;
817

818
	u = cmd & WD_INTERVAL;
819
	if (u >= WD_TO_1SEC) {
820
		watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
821
		watchdog_enabled = 1;
822
		*error = 0;
823
	} else {
824
		watchdog_enabled = 0;
825
	}
826
}
827

828
/*
829
 * Handle a watchdog timeout by dropping to DDB or panicking.
830
 */
831
static void
832
watchdog_fire(void)
833
{
834

835
#if defined(KDB) && !defined(KDB_UNATTENDED)
836
	kdb_backtrace();
837
	kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
838
#else
839
	panic("watchdog timeout");
840
#endif
841
}
842

843
Product

Resources

Company