Path: blob/main/sys/compat/linuxkpi/common/src/linux_rcu.c
39586 views
/*-1* Copyright (c) 2016 Matthew Macy ([email protected])2* Copyright (c) 2017-2021 Hans Petter Selasky ([email protected])3* All rights reserved.4* Copyright (c) 2024 The FreeBSD Foundation5*6* Portions of this software were developed by Björn Zeeb7* under sponsorship from the FreeBSD Foundation.8*9* Redistribution and use in source and binary forms, with or without10* modification, are permitted provided that the following conditions11* are met:12* 1. Redistributions of source code must retain the above copyright13* notice unmodified, this list of conditions, and the following14* disclaimer.15* 2. Redistributions in binary form must reproduce the above copyright16* notice, this list of conditions and the following disclaimer in the17* documentation and/or other materials provided with the distribution.18*19* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR20* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES21* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.22* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,23* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT24* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,25* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY26* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT27* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF28* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.29*/3031#include <sys/types.h>32#include <sys/systm.h>33#include <sys/malloc.h>34#include <sys/kernel.h>35#include <sys/lock.h>36#include <sys/mutex.h>37#include <sys/proc.h>38#include <sys/sched.h>39#include <sys/smp.h>40#include <sys/queue.h>41#include <sys/taskqueue.h>42#include <sys/kdb.h>4344#include <ck_epoch.h>4546#include <linux/rcupdate.h>47#include <linux/sched.h>48#include <linux/srcu.h>49#include <linux/slab.h>50#include <linux/kernel.h>51#include <linux/compat.h>52#include <linux/llist.h>53#include <linux/irq_work.h>5455/*56* By defining CONFIG_NO_RCU_SKIP LinuxKPI RCU locks and asserts will57* not be skipped during panic().58*/59#ifdef CONFIG_NO_RCU_SKIP60#define RCU_SKIP(void) 061#else62#define RCU_SKIP(void) unlikely(SCHEDULER_STOPPED() || kdb_active)63#endif6465struct callback_head {66union {67STAILQ_ENTRY(callback_head) entry;68struct llist_node node;69};70rcu_callback_t func;71};7273struct linux_epoch_head {74struct llist_head cb_head;75struct task task;76} __aligned(CACHE_LINE_SIZE);7778struct linux_epoch_record {79ck_epoch_record_t epoch_record;80TAILQ_HEAD(, task_struct) ts_head;81int cpuid;82int type;83} __aligned(CACHE_LINE_SIZE);8485/*86* Verify that "struct rcu_head" is big enough to hold "struct87* callback_head". This has been done to avoid having to add special88* compile flags for including ck_epoch.h to all clients of the89* LinuxKPI.90*/91CTASSERT(sizeof(struct rcu_head) == sizeof(struct callback_head));9293/*94* Verify that "rcu_section[0]" has the same size as95* "ck_epoch_section_t". This has been done to avoid having to add96* special compile flags for including ck_epoch.h to all clients of97* the LinuxKPI.98*/99CTASSERT(sizeof(((struct task_struct *)0)->rcu_section[0] ==100sizeof(ck_epoch_section_t)));101102/*103* Verify that "epoch_record" is at beginning of "struct104* linux_epoch_record":105*/106CTASSERT(offsetof(struct linux_epoch_record, epoch_record) == 0);107108CTASSERT(TS_RCU_TYPE_MAX == RCU_TYPE_MAX);109110static ck_epoch_t linux_epoch[RCU_TYPE_MAX];111static struct linux_epoch_head linux_epoch_head[RCU_TYPE_MAX];112DPCPU_DEFINE_STATIC(struct linux_epoch_record, linux_epoch_record[RCU_TYPE_MAX]);113114static void linux_rcu_cleaner_func(void *, int);115116static void117linux_rcu_runtime_init(void *arg __unused)118{119struct linux_epoch_head *head;120int i;121int j;122123for (j = 0; j != RCU_TYPE_MAX; j++) {124ck_epoch_init(&linux_epoch[j]);125126head = &linux_epoch_head[j];127128TASK_INIT(&head->task, 0, linux_rcu_cleaner_func, head);129init_llist_head(&head->cb_head);130131CPU_FOREACH(i) {132struct linux_epoch_record *record;133134record = &DPCPU_ID_GET(i, linux_epoch_record[j]);135136record->cpuid = i;137record->type = j;138ck_epoch_register(&linux_epoch[j],139&record->epoch_record, NULL);140TAILQ_INIT(&record->ts_head);141}142}143}144SYSINIT(linux_rcu_runtime, SI_SUB_CPU, SI_ORDER_ANY, linux_rcu_runtime_init, NULL);145146static void147linux_rcu_cleaner_func(void *context, int pending __unused)148{149struct linux_epoch_head *head = context;150struct callback_head *rcu;151STAILQ_HEAD(, callback_head) tmp_head;152struct llist_node *node, *next;153uintptr_t offset;154155/* move current callbacks into own queue */156STAILQ_INIT(&tmp_head);157llist_for_each_safe(node, next, llist_del_all(&head->cb_head)) {158rcu = container_of(node, struct callback_head, node);159/* re-reverse list to restore chronological order */160STAILQ_INSERT_HEAD(&tmp_head, rcu, entry);161}162163/* synchronize */164linux_synchronize_rcu(head - linux_epoch_head);165166/* dispatch all callbacks, if any */167while ((rcu = STAILQ_FIRST(&tmp_head)) != NULL) {168STAILQ_REMOVE_HEAD(&tmp_head, entry);169170offset = (uintptr_t)rcu->func;171172if (offset < LINUX_KFREE_RCU_OFFSET_MAX)173kfree((char *)rcu - offset);174else175rcu->func((struct rcu_head *)rcu);176}177}178179void180linux_rcu_read_lock(unsigned type)181{182struct linux_epoch_record *record;183struct task_struct *ts;184185MPASS(type < RCU_TYPE_MAX);186187if (RCU_SKIP())188return;189190ts = current;191192/* assert valid refcount */193MPASS(ts->rcu_recurse[type] != INT_MAX);194195if (++(ts->rcu_recurse[type]) != 1)196return;197198/*199* Pin thread to current CPU so that the unlock code gets the200* same per-CPU epoch record:201*/202sched_pin();203204record = &DPCPU_GET(linux_epoch_record[type]);205206/*207* Use a critical section to prevent recursion inside208* ck_epoch_begin(). Else this function supports recursion.209*/210critical_enter();211ck_epoch_begin(&record->epoch_record,212(ck_epoch_section_t *)&ts->rcu_section[type]);213TAILQ_INSERT_TAIL(&record->ts_head, ts, rcu_entry[type]);214critical_exit();215}216217void218linux_rcu_read_unlock(unsigned type)219{220struct linux_epoch_record *record;221struct task_struct *ts;222223MPASS(type < RCU_TYPE_MAX);224225if (RCU_SKIP())226return;227228ts = current;229230/* assert valid refcount */231MPASS(ts->rcu_recurse[type] > 0);232233if (--(ts->rcu_recurse[type]) != 0)234return;235236record = &DPCPU_GET(linux_epoch_record[type]);237238/*239* Use a critical section to prevent recursion inside240* ck_epoch_end(). Else this function supports recursion.241*/242critical_enter();243ck_epoch_end(&record->epoch_record,244(ck_epoch_section_t *)&ts->rcu_section[type]);245TAILQ_REMOVE(&record->ts_head, ts, rcu_entry[type]);246critical_exit();247248sched_unpin();249}250251bool252linux_rcu_read_lock_held(unsigned type)253{254#ifdef INVARINATS255struct linux_epoch_record *record __diagused;256struct task_struct *ts;257258MPASS(type < RCU_TYPE_MAX);259260if (RCU_SKIP())261return (false);262263if (__current_unallocated(curthread))264return (false);265266ts = current;267if (ts->rcu_recurse[type] == 0)268return (false);269270MPASS(curthread->td_pinned != 0);271MPASS((record = &DPCPU_GET(linux_epoch_record[type])) &&272record->epoch_record.active != 0);273#endif274275return (true);276}277278static void279linux_synchronize_rcu_cb(ck_epoch_t *epoch __unused, ck_epoch_record_t *epoch_record, void *arg __unused)280{281struct linux_epoch_record *record =282container_of(epoch_record, struct linux_epoch_record, epoch_record);283struct thread *td = curthread;284struct task_struct *ts;285286/* check if blocked on the current CPU */287if (record->cpuid == PCPU_GET(cpuid)) {288bool is_sleeping = 0;289u_char prio = 0;290291/*292* Find the lowest priority or sleeping thread which293* is blocking synchronization on this CPU core. All294* the threads in the queue are CPU-pinned and cannot295* go anywhere while the current thread is locked.296*/297TAILQ_FOREACH(ts, &record->ts_head, rcu_entry[record->type]) {298if (ts->task_thread->td_priority > prio)299prio = ts->task_thread->td_priority;300is_sleeping |= (ts->task_thread->td_inhibitors != 0);301}302303if (is_sleeping) {304thread_unlock(td);305pause("W", 1);306thread_lock(td);307} else {308/* set new thread priority */309sched_prio(td, prio);310/* task switch */311mi_switch(SW_VOL | SWT_RELINQUISH);312/*313* It is important the thread lock is dropped314* while yielding to allow other threads to315* acquire the lock pointed to by316* TDQ_LOCKPTR(td). Currently mi_switch() will317* unlock the thread lock before318* returning. Else a deadlock like situation319* might happen.320*/321thread_lock(td);322}323} else {324/*325* To avoid spinning move execution to the other CPU326* which is blocking synchronization. Set highest327* thread priority so that code gets run. The thread328* priority will be restored later.329*/330sched_prio(td, 0);331sched_bind(td, record->cpuid);332}333}334335void336linux_synchronize_rcu(unsigned type)337{338struct thread *td;339int was_bound;340int old_cpu;341int old_pinned;342u_char old_prio;343344MPASS(type < RCU_TYPE_MAX);345346if (RCU_SKIP())347return;348349WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,350"linux_synchronize_rcu() can sleep");351352td = curthread;353DROP_GIANT();354355/*356* Synchronizing RCU might change the CPU core this function357* is running on. Save current values:358*/359thread_lock(td);360361old_cpu = PCPU_GET(cpuid);362old_pinned = td->td_pinned;363old_prio = td->td_priority;364was_bound = sched_is_bound(td);365sched_unbind(td);366td->td_pinned = 0;367sched_bind(td, old_cpu);368369ck_epoch_synchronize_wait(&linux_epoch[type],370&linux_synchronize_rcu_cb, NULL);371372/* restore CPU binding, if any */373if (was_bound != 0) {374sched_bind(td, old_cpu);375} else {376/* get thread back to initial CPU, if any */377if (old_pinned != 0)378sched_bind(td, old_cpu);379sched_unbind(td);380}381/* restore pinned after bind */382td->td_pinned = old_pinned;383384/* restore thread priority */385sched_prio(td, old_prio);386thread_unlock(td);387388PICKUP_GIANT();389}390391void392linux_rcu_barrier(unsigned type)393{394struct linux_epoch_head *head;395396MPASS(type < RCU_TYPE_MAX);397398/*399* This function is not obligated to wait for a grace period.400* It only waits for RCU callbacks that have already been posted.401* If there are no RCU callbacks posted, rcu_barrier() can return402* immediately.403*/404head = &linux_epoch_head[type];405406/* wait for callbacks to complete */407taskqueue_drain(linux_irq_work_tq, &head->task);408}409410void411linux_call_rcu(unsigned type, struct rcu_head *context, rcu_callback_t func)412{413struct callback_head *rcu;414struct linux_epoch_head *head;415416MPASS(type < RCU_TYPE_MAX);417418rcu = (struct callback_head *)context;419head = &linux_epoch_head[type];420421rcu->func = func;422llist_add(&rcu->node, &head->cb_head);423taskqueue_enqueue(linux_irq_work_tq, &head->task);424}425426int427init_srcu_struct(struct srcu_struct *srcu)428{429return (0);430}431432void433cleanup_srcu_struct(struct srcu_struct *srcu)434{435}436437int438srcu_read_lock(struct srcu_struct *srcu)439{440linux_rcu_read_lock(RCU_TYPE_SLEEPABLE);441return (0);442}443444void445srcu_read_unlock(struct srcu_struct *srcu, int key __unused)446{447linux_rcu_read_unlock(RCU_TYPE_SLEEPABLE);448}449450void451synchronize_srcu(struct srcu_struct *srcu)452{453linux_synchronize_rcu(RCU_TYPE_SLEEPABLE);454}455456void457srcu_barrier(struct srcu_struct *srcu)458{459linux_rcu_barrier(RCU_TYPE_SLEEPABLE);460}461462463