Path: blob/main/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c
48775 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.3* Copyright (C) 2007 The Regents of the University of California.4* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).5* Written by Brian Behlendorf <[email protected]>.6* UCRL-CODE-2351977*8* This file is part of the SPL, Solaris Porting Layer.9*10* The SPL is free software; you can redistribute it and/or modify it11* under the terms of the GNU General Public License as published by the12* Free Software Foundation; either version 2 of the License, or (at your13* option) any later version.14*15* The SPL is distributed in the hope that it will be useful, but WITHOUT16* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or17* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License18* for more details.19*20* You should have received a copy of the GNU General Public License along21* with the SPL. If not, see <http://www.gnu.org/licenses/>.22*23* Solaris Porting Layer (SPL) Condition Variables Implementation.24*/2526#include <sys/condvar.h>27#include <sys/time.h>28#include <sys/sysmacros.h>29#include <linux/hrtimer.h>30#include <linux/compiler_compat.h>31#include <linux/mod_compat.h>3233#include <linux/sched.h>34#include <linux/sched/signal.h>3536#define MAX_HRTIMEOUT_SLACK_US 100037static unsigned int spl_schedule_hrtimeout_slack_us = 0;3839static int40param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)41{42unsigned long val;43int error;4445error = kstrtoul(buf, 0, &val);46if (error)47return (error);4849if (val > MAX_HRTIMEOUT_SLACK_US)50return (-EINVAL);5152error = param_set_uint(buf, kp);53if (error < 0)54return (error);5556return (0);57}5859module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,60param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);61MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,62"schedule_hrtimeout_range() delta/slack value in us, default(0)");6364void65__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)66{67ASSERT(cvp);68ASSERT0P(name);69ASSERT(type == CV_DEFAULT);70ASSERT0P(arg);7172cvp->cv_magic = CV_MAGIC;73init_waitqueue_head(&cvp->cv_event);74init_waitqueue_head(&cvp->cv_destroy);75atomic_set(&cvp->cv_waiters, 0);76atomic_set(&cvp->cv_refs, 1);77cvp->cv_mutex = NULL;78}79EXPORT_SYMBOL(__cv_init);8081static int82cv_destroy_wakeup(kcondvar_t *cvp)83{84if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {85ASSERT0P(cvp->cv_mutex);86ASSERT(!waitqueue_active(&cvp->cv_event));87return (1);88}8990return (0);91}9293void94__cv_destroy(kcondvar_t *cvp)95{96ASSERT(cvp);97ASSERT(cvp->cv_magic == CV_MAGIC);9899cvp->cv_magic = CV_DESTROY;100atomic_dec(&cvp->cv_refs);101102/* Block until all waiters are woken and references dropped. */103while (cv_destroy_wakeup(cvp) == 0)104wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);105106ASSERT0P(cvp->cv_mutex);107ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);108ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);109ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);110}111EXPORT_SYMBOL(__cv_destroy);112113static void114cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)115{116DEFINE_WAIT(wait);117kmutex_t *m;118119ASSERT(cvp);120ASSERT(mp);121ASSERT(cvp->cv_magic == CV_MAGIC);122ASSERT(mutex_owned(mp));123atomic_inc(&cvp->cv_refs);124125m = READ_ONCE(cvp->cv_mutex);126if (!m)127m = xchg(&cvp->cv_mutex, mp);128/* Ensure the same mutex is used by all callers */129ASSERT(m == NULL || m == mp);130131prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);132atomic_inc(&cvp->cv_waiters);133134/*135* Mutex should be dropped after prepare_to_wait() this136* ensures we're linked in to the waiters list and avoids the137* race where 'cvp->cv_waiters > 0' but the list is empty.138*/139mutex_exit(mp);140if (io)141io_schedule();142else143schedule();144145/* No more waiters a different mutex could be used */146if (atomic_dec_and_test(&cvp->cv_waiters)) {147/*148* This is set without any lock, so it's racy. But this is149* just for debug anyway, so make it best-effort150*/151cvp->cv_mutex = NULL;152wake_up(&cvp->cv_destroy);153}154155finish_wait(&cvp->cv_event, &wait);156atomic_dec(&cvp->cv_refs);157158/*159* Hold mutex after we release the cvp, otherwise we could dead lock160* with a thread holding the mutex and call cv_destroy.161*/162mutex_enter(mp);163}164165void166__cv_wait(kcondvar_t *cvp, kmutex_t *mp)167{168cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);169}170EXPORT_SYMBOL(__cv_wait);171172void173__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)174{175cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);176}177EXPORT_SYMBOL(__cv_wait_io);178179int180__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)181{182cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);183184return (signal_pending(current) ? 0 : 1);185}186EXPORT_SYMBOL(__cv_wait_io_sig);187188int189__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)190{191cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);192193return (signal_pending(current) ? 0 : 1);194}195EXPORT_SYMBOL(__cv_wait_sig);196197void198__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)199{200sigset_t blocked, saved;201202sigfillset(&blocked);203(void) sigprocmask(SIG_BLOCK, &blocked, &saved);204cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);205(void) sigprocmask(SIG_SETMASK, &saved, NULL);206}207EXPORT_SYMBOL(__cv_wait_idle);208209/*210* 'expire_time' argument is an absolute wall clock time in jiffies.211* Return value is time left (expire_time - now) or -1 if timeout occurred.212*/213static clock_t214__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,215int state, int io)216{217DEFINE_WAIT(wait);218kmutex_t *m;219clock_t time_left;220221ASSERT(cvp);222ASSERT(mp);223ASSERT(cvp->cv_magic == CV_MAGIC);224ASSERT(mutex_owned(mp));225226/* XXX - Does not handle jiffie wrap properly */227time_left = expire_time - jiffies;228if (time_left <= 0)229return (-1);230231atomic_inc(&cvp->cv_refs);232m = READ_ONCE(cvp->cv_mutex);233if (!m)234m = xchg(&cvp->cv_mutex, mp);235/* Ensure the same mutex is used by all callers */236ASSERT(m == NULL || m == mp);237238prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);239atomic_inc(&cvp->cv_waiters);240241/*242* Mutex should be dropped after prepare_to_wait() this243* ensures we're linked in to the waiters list and avoids the244* race where 'cvp->cv_waiters > 0' but the list is empty.245*/246mutex_exit(mp);247if (io)248time_left = io_schedule_timeout(time_left);249else250time_left = schedule_timeout(time_left);251252/* No more waiters a different mutex could be used */253if (atomic_dec_and_test(&cvp->cv_waiters)) {254/*255* This is set without any lock, so it's racy. But this is256* just for debug anyway, so make it best-effort257*/258cvp->cv_mutex = NULL;259wake_up(&cvp->cv_destroy);260}261262finish_wait(&cvp->cv_event, &wait);263atomic_dec(&cvp->cv_refs);264265/*266* Hold mutex after we release the cvp, otherwise we could dead lock267* with a thread holding the mutex and call cv_destroy.268*/269mutex_enter(mp);270return (time_left > 0 ? 1 : -1);271}272273int274__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)275{276return (__cv_timedwait_common(cvp, mp, exp_time,277TASK_UNINTERRUPTIBLE, 0));278}279EXPORT_SYMBOL(__cv_timedwait);280281int282__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)283{284return (__cv_timedwait_common(cvp, mp, exp_time,285TASK_UNINTERRUPTIBLE, 1));286}287EXPORT_SYMBOL(__cv_timedwait_io);288289int290__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)291{292int rc;293294rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);295return (signal_pending(current) ? 0 : rc);296}297EXPORT_SYMBOL(__cv_timedwait_sig);298299int300__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)301{302sigset_t blocked, saved;303int rc;304305sigfillset(&blocked);306(void) sigprocmask(SIG_BLOCK, &blocked, &saved);307rc = __cv_timedwait_common(cvp, mp, exp_time,308TASK_INTERRUPTIBLE, 0);309(void) sigprocmask(SIG_SETMASK, &saved, NULL);310311return (rc);312}313EXPORT_SYMBOL(__cv_timedwait_idle);314/*315* 'expire_time' argument is an absolute clock time in nanoseconds.316* Return value is time left (expire_time - now) or -1 if timeout occurred.317*/318static clock_t319__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,320hrtime_t res, int state)321{322DEFINE_WAIT(wait);323kmutex_t *m;324hrtime_t time_left;325ktime_t ktime_left;326u64 slack = 0;327int rc;328329ASSERT(cvp);330ASSERT(mp);331ASSERT(cvp->cv_magic == CV_MAGIC);332ASSERT(mutex_owned(mp));333334time_left = expire_time - gethrtime();335if (time_left <= 0)336return (-1);337338atomic_inc(&cvp->cv_refs);339m = READ_ONCE(cvp->cv_mutex);340if (!m)341m = xchg(&cvp->cv_mutex, mp);342/* Ensure the same mutex is used by all callers */343ASSERT(m == NULL || m == mp);344345prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);346atomic_inc(&cvp->cv_waiters);347348/*349* Mutex should be dropped after prepare_to_wait() this350* ensures we're linked in to the waiters list and avoids the351* race where 'cvp->cv_waiters > 0' but the list is empty.352*/353mutex_exit(mp);354355ktime_left = ktime_set(0, time_left);356slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),357MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);358rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);359360/* No more waiters a different mutex could be used */361if (atomic_dec_and_test(&cvp->cv_waiters)) {362/*363* This is set without any lock, so it's racy. But this is364* just for debug anyway, so make it best-effort365*/366cvp->cv_mutex = NULL;367wake_up(&cvp->cv_destroy);368}369370finish_wait(&cvp->cv_event, &wait);371atomic_dec(&cvp->cv_refs);372373mutex_enter(mp);374return (rc == -EINTR ? 1 : -1);375}376377/*378* Compatibility wrapper for the cv_timedwait_hires() Illumos interface.379*/380static int381cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,382hrtime_t res, int flag, int state)383{384if (!(flag & CALLOUT_FLAG_ABSOLUTE))385tim += gethrtime();386387return (__cv_timedwait_hires(cvp, mp, tim, res, state));388}389390int391cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,392int flag)393{394return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,395TASK_UNINTERRUPTIBLE));396}397EXPORT_SYMBOL(cv_timedwait_hires);398399int400cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,401hrtime_t res, int flag)402{403int rc;404405rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,406TASK_INTERRUPTIBLE);407return (signal_pending(current) ? 0 : rc);408}409EXPORT_SYMBOL(cv_timedwait_sig_hires);410411int412cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,413hrtime_t res, int flag)414{415sigset_t blocked, saved;416int rc;417418sigfillset(&blocked);419(void) sigprocmask(SIG_BLOCK, &blocked, &saved);420rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,421TASK_INTERRUPTIBLE);422(void) sigprocmask(SIG_SETMASK, &saved, NULL);423424return (rc);425}426EXPORT_SYMBOL(cv_timedwait_idle_hires);427428void429__cv_signal(kcondvar_t *cvp)430{431ASSERT(cvp);432ASSERT(cvp->cv_magic == CV_MAGIC);433atomic_inc(&cvp->cv_refs);434435/*436* All waiters are added with WQ_FLAG_EXCLUSIVE so only one437* waiter will be set runnable with each call to wake_up().438* Additionally wake_up() holds a spin_lock associated with439* the wait queue to ensure we don't race waking up processes.440*/441if (atomic_read(&cvp->cv_waiters) > 0)442wake_up(&cvp->cv_event);443444atomic_dec(&cvp->cv_refs);445}446EXPORT_SYMBOL(__cv_signal);447448void449__cv_broadcast(kcondvar_t *cvp)450{451ASSERT(cvp);452ASSERT(cvp->cv_magic == CV_MAGIC);453atomic_inc(&cvp->cv_refs);454455/*456* Wake_up_all() will wake up all waiters even those which457* have the WQ_FLAG_EXCLUSIVE flag set.458*/459if (atomic_read(&cvp->cv_waiters) > 0)460wake_up_all(&cvp->cv_event);461462atomic_dec(&cvp->cv_refs);463}464EXPORT_SYMBOL(__cv_broadcast);465466467