/*-1* SPDX-License-Identifier: BSD-4-Clause2*3* Copyright (c) 1982, 1986, 1989, 19934* The Regents of the University of California. All rights reserved.5* (c) UNIX System Laboratories, Inc.6* Copyright (c) 2005 Robert N. M. Watson7* All rights reserved.8*9* All or some portions of this file are derived from material licensed10* to the University of California by American Telephone and Telegraph11* Co. or Unix System Laboratories, Inc. and are reproduced herein with12* the permission of UNIX System Laboratories, Inc.13*14* Redistribution and use in source and binary forms, with or without15* modification, are permitted provided that the following conditions16* are met:17* 1. Redistributions of source code must retain the above copyright18* notice, this list of conditions and the following disclaimer.19* 2. Redistributions in binary form must reproduce the above copyright20* notice, this list of conditions and the following disclaimer in the21* documentation and/or other materials provided with the distribution.22* 3. Neither the name of the University nor the names of its contributors23* may be used to endorse or promote products derived from this software24* without specific prior written permission.25*26* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND27* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE30* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL31* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS32* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)33* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT34* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY35* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF36* SUCH DAMAGE.37*38* Copyright (c) 1994 Christopher G. Demetriou39*40* Redistribution and use in source and binary forms, with or without41* modification, are permitted provided that the following conditions42* are met:43* 1. Redistributions of source code must retain the above copyright44* notice, this list of conditions and the following disclaimer.45* 2. Redistributions in binary form must reproduce the above copyright46* notice, this list of conditions and the following disclaimer in the47* documentation and/or other materials provided with the distribution.48* 3. All advertising materials mentioning features or use of this software49* must display the following acknowledgement:50* This product includes software developed by the University of51* California, Berkeley and its contributors.52* 4. Neither the name of the University nor the names of its contributors53* may be used to endorse or promote products derived from this software54* without specific prior written permission.55*56* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND57* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE58* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE59* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE60* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL61* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS62* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)63* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT64* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY65* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF66* SUCH DAMAGE.67*/6869#include <sys/param.h>70#include <sys/systm.h>71#include <sys/acct.h>72#include <sys/fcntl.h>73#include <sys/kernel.h>74#include <sys/kthread.h>75#include <sys/limits.h>76#include <sys/lock.h>77#include <sys/malloc.h>78#include <sys/mount.h>79#include <sys/mutex.h>80#include <sys/namei.h>81#include <sys/priv.h>82#include <sys/proc.h>83#include <sys/resourcevar.h>84#include <sys/sched.h>85#include <sys/sx.h>86#include <sys/sysctl.h>87#include <sys/syslog.h>88#include <sys/sysproto.h>89#include <sys/tty.h>90#include <sys/vnode.h>9192#include <security/mac/mac_framework.h>9394_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_trailer) ==95sizeof(struct acctv2) - offsetof(struct acctv2, ac_trailer), "trailer");96_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_len2) ==97sizeof(struct acctv2) - offsetof(struct acctv2, ac_len2), "len2");9899/*100* The routines implemented in this file are described in:101* Leffler, et al.: The Design and Implementation of the 4.3BSD102* UNIX Operating System (Addison Welley, 1989)103* on pages 62-63.104* On May 2007 the historic 3 bits base 8 exponent, 13 bit fraction105* compt_t representation described in the above reference was replaced106* with that of IEEE-754 floats.107*108* Arguably, to simplify accounting operations, this mechanism should109* be replaced by one in which an accounting log file (similar to /dev/klog)110* is read by a user process, etc. However, that has its own problems.111*/112113/* Floating point definitions from <float.h>. */114#define FLT_MANT_DIG 24 /* p */115#define FLT_MAX_EXP 128 /* emax */116117/*118* Internal accounting functions.119* The former's operation is described in Leffler, et al., and the latter120* was provided by UCB with the 4.4BSD-Lite release121*/122static uint32_t encode_timeval(struct timeval);123static uint32_t encode_long(long);124static void acctwatch(void);125static void acct_thread(void *);126static int acct_disable(struct thread *, int);127128/*129* Accounting vnode pointer, saved vnode pointer, and flags for each.130* acct_sx protects against changes to the active vnode and credentials131* while accounting records are being committed to disk.132*/133static int acct_configured;134static int acct_suspended;135static struct vnode *acct_vp;136static struct ucred *acct_cred;137static int acct_flags;138static struct sx acct_sx;139140SX_SYSINIT(acct, &acct_sx, "acct_sx");141142/*143* State of the accounting kthread.144*/145static int acct_state;146147#define ACCT_RUNNING 1 /* Accounting kthread is running. */148#define ACCT_EXITREQ 2 /* Accounting kthread should exit. */149150/*151* Values associated with enabling and disabling accounting152*/153static int acctsuspend = 2; /* stop accounting when < 2% free space left */154SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW,155&acctsuspend, 0, "percentage of free disk space below which accounting stops");156157static int acctresume = 4; /* resume when free space risen to > 4% */158SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW,159&acctresume, 0, "percentage of free disk space above which accounting resumes");160161static int acctchkfreq = 15; /* frequency (in seconds) to check space */162163static int164sysctl_acct_chkfreq(SYSCTL_HANDLER_ARGS)165{166int error, value;167168/* Write out the old value. */169error = SYSCTL_OUT(req, &acctchkfreq, sizeof(int));170if (error || req->newptr == NULL)171return (error);172173/* Read in and verify the new value. */174error = SYSCTL_IN(req, &value, sizeof(int));175if (error)176return (error);177if (value <= 0)178return (EINVAL);179acctchkfreq = value;180return (0);181}182SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq,183CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &acctchkfreq, 0,184sysctl_acct_chkfreq, "I",185"frequency for checking the free space");186187SYSCTL_INT(_kern, OID_AUTO, acct_configured, CTLFLAG_RD, &acct_configured, 0,188"Accounting configured or not");189190SYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0,191"Accounting suspended or not");192193/*194* Accounting system call. Written based on the specification and previous195* implementation done by Mark Tinguely.196*/197int198sys_acct(struct thread *td, struct acct_args *uap)199{200struct nameidata nd;201int error, flags, replacing;202203error = priv_check(td, PRIV_ACCT);204if (error)205return (error);206207/*208* If accounting is to be started to a file, open that file for209* appending and make sure it's a 'normal'.210*/211if (uap->path != NULL) {212NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,213uap->path);214flags = FWRITE | O_APPEND;215error = vn_open(&nd, &flags, 0, NULL);216if (error)217return (error);218NDFREE_PNBUF(&nd);219#ifdef MAC220error = mac_system_check_acct(td->td_ucred, nd.ni_vp);221if (error) {222VOP_UNLOCK(nd.ni_vp);223vn_close(nd.ni_vp, flags, td->td_ucred, td);224return (error);225}226#endif227VOP_UNLOCK(nd.ni_vp);228if (nd.ni_vp->v_type != VREG) {229vn_close(nd.ni_vp, flags, td->td_ucred, td);230return (EACCES);231}232#ifdef MAC233} else {234error = mac_system_check_acct(td->td_ucred, NULL);235if (error)236return (error);237#endif238}239240/*241* Disallow concurrent access to the accounting vnode while we swap242* it out, in order to prevent access after close.243*/244sx_xlock(&acct_sx);245246/*247* Don't log spurious disable/enable messages if we are248* switching from one accounting file to another due to log249* rotation.250*/251replacing = (acct_vp != NULL && uap->path != NULL);252253/*254* If accounting was previously enabled, kill the old space-watcher,255* close the file, and (if no new file was specified, leave). Reset256* the suspended state regardless of whether accounting remains257* enabled.258*/259acct_suspended = 0;260if (acct_vp != NULL)261error = acct_disable(td, !replacing);262if (uap->path == NULL) {263if (acct_state & ACCT_RUNNING) {264acct_state |= ACCT_EXITREQ;265wakeup(&acct_state);266}267sx_xunlock(&acct_sx);268return (error);269}270271/*272* Save the new accounting file vnode, and schedule the new273* free space watcher.274*/275acct_vp = nd.ni_vp;276acct_cred = crhold(td->td_ucred);277acct_flags = flags;278if (acct_state & ACCT_RUNNING)279acct_state &= ~ACCT_EXITREQ;280else {281/*282* Try to start up an accounting kthread. We may start more283* than one, but if so the extras will commit suicide as284* soon as they start up.285*/286error = kproc_create(acct_thread, NULL, NULL, 0, 0,287"accounting");288if (error) {289(void) acct_disable(td, 0);290sx_xunlock(&acct_sx);291log(LOG_NOTICE, "Unable to start accounting thread\n");292return (error);293}294}295acct_configured = 1;296sx_xunlock(&acct_sx);297if (!replacing)298log(LOG_NOTICE, "Accounting enabled\n");299return (error);300}301302/*303* Disable currently in-progress accounting by closing the vnode, dropping304* our reference to the credential, and clearing the vnode's flags.305*/306static int307acct_disable(struct thread *td, int logging)308{309int error;310311sx_assert(&acct_sx, SX_XLOCKED);312error = vn_close(acct_vp, acct_flags, acct_cred, td);313crfree(acct_cred);314acct_configured = 0;315acct_vp = NULL;316acct_cred = NULL;317acct_flags = 0;318if (logging)319log(LOG_NOTICE, "Accounting disabled\n");320return (error);321}322323/*324* Write out process accounting information, on process exit.325* Data to be written out is specified in Leffler, et al.326* and are enumerated below. (They're also noted in the system327* "acct.h" header file.)328*/329int330acct_process(struct thread *td)331{332struct acctv3 acct;333struct timeval ut, st, tmp;334struct proc *p;335struct rusage ru;336int t, ret;337338/*339* Lockless check of accounting condition before doing the hard340* work.341*/342if (acct_vp == NULL || acct_suspended)343return (0);344345memset(&acct, 0, sizeof(acct));346347sx_slock(&acct_sx);348349/*350* If accounting isn't enabled, don't bother. Have to check again351* once we own the lock in case we raced with disabling of accounting352* by another thread.353*/354if (acct_vp == NULL || acct_suspended) {355sx_sunlock(&acct_sx);356return (0);357}358359p = td->td_proc;360td->td_pflags2 |= TDP2_ACCT;361362/*363* Get process accounting information.364*/365366sx_slock(&proctree_lock);367PROC_LOCK(p);368369/* (1) The terminal from which the process was started */370if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)371acct.ac_tty = tty_udev(p->p_pgrp->pg_session->s_ttyp);372else373acct.ac_tty = NODEV;374sx_sunlock(&proctree_lock);375376/* (2) The name of the command that ran */377bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);378379/* (3) The amount of user and system time that was used */380rufetchcalc(p, &ru, &ut, &st);381acct.ac_utime = encode_timeval(ut);382acct.ac_stime = encode_timeval(st);383384/* (4) The elapsed time the command ran (and its starting time) */385getboottime(&tmp);386timevaladd(&tmp, &p->p_stats->p_start);387acct.ac_btime = tmp.tv_sec;388microuptime(&tmp);389timevalsub(&tmp, &p->p_stats->p_start);390acct.ac_etime = encode_timeval(tmp);391392/* (5) The average amount of memory used */393tmp = ut;394timevaladd(&tmp, &st);395/* Convert tmp (i.e. u + s) into hz units to match ru_i*. */396t = tmp.tv_sec * hz + tmp.tv_usec / tick;397if (t)398acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss +399+ ru.ru_isrss) / t);400else401acct.ac_mem = 0;402403/* (6) The number of disk I/O operations done */404acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock);405406/* (7) The UID and GID of the process */407acct.ac_uid = p->p_ucred->cr_ruid;408acct.ac_gid = p->p_ucred->cr_rgid;409410/* (8) The boolean flags that tell how the process terminated, etc. */411acct.ac_flagx = p->p_acflag;412413PROC_UNLOCK(p);414415/* Setup ancillary structure fields. */416acct.ac_flagx |= ANVER;417acct.ac_zero = 0;418acct.ac_version = 3;419acct.ac_len = acct.ac_len2 = sizeof(acct);420421/*422* Write the accounting information to the file.423*/424ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct),425(off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED,426NULL, td);427sx_sunlock(&acct_sx);428td->td_pflags2 &= ~TDP2_ACCT;429return (ret);430}431432/* FLOAT_CONVERSION_START (Regression testing; don't remove this line.) */433434/* Convert timevals and longs into IEEE-754 bit patterns. */435436/* Mantissa mask (MSB is implied, so subtract 1). */437#define MANT_MASK ((1 << (FLT_MANT_DIG - 1)) - 1)438439/*440* We calculate integer values to a precision of approximately441* 28 bits.442* This is high-enough precision to fill the 24 float bits443* and low-enough to avoid overflowing the 32 int bits.444*/445#define CALC_BITS 28446447/* log_2(1000000). */448#define LOG2_1M 20449450/*451* Convert the elements of a timeval into a 32-bit word holding452* the bits of a IEEE-754 float.453* The float value represents the timeval's value in microsecond units.454*/455static uint32_t456encode_timeval(struct timeval tv)457{458int log2_s;459int val, exp; /* Unnormalized value and exponent */460int norm_exp; /* Normalized exponent */461int shift;462463/*464* First calculate value and exponent to about CALC_BITS precision.465* Note that the following conditionals have been ordered so that466* the most common cases appear first.467*/468if (tv.tv_sec == 0) {469if (tv.tv_usec == 0)470return (0);471exp = 0;472val = tv.tv_usec;473} else {474/*475* Calculate the value to a precision of approximately476* CALC_BITS.477*/478log2_s = fls(tv.tv_sec) - 1;479if (log2_s + LOG2_1M < CALC_BITS) {480exp = 0;481val = 1000000 * tv.tv_sec + tv.tv_usec;482} else {483exp = log2_s + LOG2_1M - CALC_BITS;484val = (unsigned int)(((uint64_t)1000000 * tv.tv_sec +485tv.tv_usec) >> exp);486}487}488/* Now normalize and pack the value into an IEEE-754 float. */489norm_exp = fls(val) - 1;490shift = FLT_MANT_DIG - norm_exp - 1;491#ifdef ACCT_DEBUG492printf("val=%d exp=%d shift=%d log2(val)=%d\n",493val, exp, shift, norm_exp);494printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,495((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));496#endif497return (((FLT_MAX_EXP - 1 + exp + norm_exp) << (FLT_MANT_DIG - 1)) |498((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));499}500501/*502* Convert a non-negative long value into the bit pattern of503* an IEEE-754 float value.504*/505static uint32_t506encode_long(long val)507{508int norm_exp; /* Normalized exponent */509int shift;510511if (val == 0)512return (0);513if (val < 0) {514log(LOG_NOTICE,515"encode_long: negative value %ld in accounting record\n",516val);517val = LONG_MAX;518}519norm_exp = fls(val) - 1;520shift = FLT_MANT_DIG - norm_exp - 1;521#ifdef ACCT_DEBUG522printf("val=%d shift=%d log2(val)=%d\n",523val, shift, norm_exp);524printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,525((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));526#endif527return (((FLT_MAX_EXP - 1 + norm_exp) << (FLT_MANT_DIG - 1)) |528((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));529}530531/* FLOAT_CONVERSION_END (Regression testing; don't remove this line.) */532533/*534* Periodically check the filesystem to see if accounting535* should be turned on or off. Beware the case where the vnode536* has been vgone()'d out from underneath us, e.g. when the file537* system containing the accounting file has been forcibly unmounted.538*/539/* ARGSUSED */540static void541acctwatch(void)542{543struct statfs *sp;544545sx_assert(&acct_sx, SX_XLOCKED);546547/*548* If accounting was disabled before our kthread was scheduled,549* then acct_vp might be NULL. If so, just ask our kthread to550* exit and return.551*/552if (acct_vp == NULL) {553acct_state |= ACCT_EXITREQ;554return;555}556557/*558* If our vnode is no longer valid, tear it down and signal the559* accounting thread to die.560*/561if (acct_vp->v_type == VBAD) {562(void) acct_disable(NULL, 1);563acct_state |= ACCT_EXITREQ;564return;565}566567/*568* Stopping here is better than continuing, maybe it will be VBAD569* next time around.570*/571sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);572if (VFS_STATFS(acct_vp->v_mount, sp) < 0) {573free(sp, M_STATFS);574return;575}576if (acct_suspended) {577if (sp->f_bavail > (int64_t)(acctresume * sp->f_blocks /578100)) {579acct_suspended = 0;580log(LOG_NOTICE, "Accounting resumed\n");581}582} else {583if (sp->f_bavail <= (int64_t)(acctsuspend * sp->f_blocks /584100)) {585acct_suspended = 1;586log(LOG_NOTICE, "Accounting suspended\n");587}588}589free(sp, M_STATFS);590}591592/*593* The main loop for the dedicated kernel thread that periodically calls594* acctwatch().595*/596static void597acct_thread(void *dummy)598{599u_char pri;600601/* This is a low-priority kernel thread. */602pri = PRI_MAX_KERN;603thread_lock(curthread);604sched_prio(curthread, pri);605thread_unlock(curthread);606607/* If another accounting kthread is already running, just die. */608sx_xlock(&acct_sx);609if (acct_state & ACCT_RUNNING) {610sx_xunlock(&acct_sx);611kproc_exit(0);612}613acct_state |= ACCT_RUNNING;614615/* Loop until we are asked to exit. */616while (!(acct_state & ACCT_EXITREQ)) {617/* Perform our periodic checks. */618acctwatch();619620/*621* We check this flag again before sleeping since the622* acctwatch() might have shut down accounting and asked us623* to exit.624*/625if (!(acct_state & ACCT_EXITREQ)) {626sx_sleep(&acct_state, &acct_sx, 0, "-",627acctchkfreq * hz);628}629}630631/*632* Acknowledge the exit request and shutdown. We clear both the633* exit request and running flags.634*/635acct_state = 0;636sx_xunlock(&acct_sx);637kproc_exit(0);638}639640641