// SPDX-License-Identifier: GPL-2.0-only1/*2* async.c: Asynchronous function calls for boot performance3*4* (C) Copyright 2009 Intel Corporation5* Author: Arjan van de Ven <[email protected]>6*/789/*1011Goals and Theory of Operation1213The primary goal of this feature is to reduce the kernel boot time,14by doing various independent hardware delays and discovery operations15decoupled and not strictly serialized.1617More specifically, the asynchronous function call concept allows18certain operations (primarily during system boot) to happen19asynchronously, out of order, while these operations still20have their externally visible parts happen sequentially and in-order.21(not unlike how out-of-order CPUs retire their instructions in order)2223Key to the asynchronous function call implementation is the concept of24a "sequence cookie" (which, although it has an abstracted type, can be25thought of as a monotonically incrementing number).2627The async core will assign each scheduled event such a sequence cookie and28pass this to the called functions.2930The asynchronously called function should before doing a globally visible31operation, such as registering device numbers, call the32async_synchronize_cookie() function and pass in its own cookie. The33async_synchronize_cookie() function will make sure that all asynchronous34operations that were scheduled prior to the operation corresponding with the35cookie have completed.3637Subsystem/driver initialization code that scheduled asynchronous probe38functions, but which shares global resources with other drivers/subsystems39that do not use the asynchronous call feature, need to do a full40synchronization with the async_synchronize_full() function, before returning41from their init function. This is to maintain strict ordering between the42asynchronous and synchronous parts of the kernel.4344*/4546#include <linux/async.h>47#include <linux/atomic.h>48#include <linux/export.h>49#include <linux/ktime.h>50#include <linux/pid.h>51#include <linux/sched.h>52#include <linux/slab.h>53#include <linux/wait.h>54#include <linux/workqueue.h>5556#include "workqueue_internal.h"5758static async_cookie_t next_cookie = 1;5960#define MAX_WORK 3276861#define ASYNC_COOKIE_MAX ULLONG_MAX /* infinity cookie */6263static LIST_HEAD(async_global_pending); /* pending from all registered doms */64static ASYNC_DOMAIN(async_dfl_domain);65static DEFINE_SPINLOCK(async_lock);66static struct workqueue_struct *async_wq;6768struct async_entry {69struct list_head domain_list;70struct list_head global_list;71struct work_struct work;72async_cookie_t cookie;73async_func_t func;74void *data;75struct async_domain *domain;76};7778static DECLARE_WAIT_QUEUE_HEAD(async_done);7980static atomic_t entry_count;8182static long long microseconds_since(ktime_t start)83{84ktime_t now = ktime_get();85return ktime_to_ns(ktime_sub(now, start)) >> 10;86}8788static async_cookie_t lowest_in_progress(struct async_domain *domain)89{90struct async_entry *first = NULL;91async_cookie_t ret = ASYNC_COOKIE_MAX;92unsigned long flags;9394spin_lock_irqsave(&async_lock, flags);9596if (domain) {97if (!list_empty(&domain->pending))98first = list_first_entry(&domain->pending,99struct async_entry, domain_list);100} else {101if (!list_empty(&async_global_pending))102first = list_first_entry(&async_global_pending,103struct async_entry, global_list);104}105106if (first)107ret = first->cookie;108109spin_unlock_irqrestore(&async_lock, flags);110return ret;111}112113/*114* pick the first pending entry and run it115*/116static void async_run_entry_fn(struct work_struct *work)117{118struct async_entry *entry =119container_of(work, struct async_entry, work);120unsigned long flags;121ktime_t calltime;122123/* 1) run (and print duration) */124pr_debug("calling %lli_%pS @ %i\n", (long long)entry->cookie,125entry->func, task_pid_nr(current));126calltime = ktime_get();127128entry->func(entry->data, entry->cookie);129130pr_debug("initcall %lli_%pS returned after %lld usecs\n",131(long long)entry->cookie, entry->func,132microseconds_since(calltime));133134/* 2) remove self from the pending queues */135spin_lock_irqsave(&async_lock, flags);136list_del_init(&entry->domain_list);137list_del_init(&entry->global_list);138139/* 3) free the entry */140kfree(entry);141atomic_dec(&entry_count);142143spin_unlock_irqrestore(&async_lock, flags);144145/* 4) wake up any waiters */146wake_up(&async_done);147}148149static async_cookie_t __async_schedule_node_domain(async_func_t func,150void *data, int node,151struct async_domain *domain,152struct async_entry *entry)153{154async_cookie_t newcookie;155unsigned long flags;156157INIT_LIST_HEAD(&entry->domain_list);158INIT_LIST_HEAD(&entry->global_list);159INIT_WORK(&entry->work, async_run_entry_fn);160entry->func = func;161entry->data = data;162entry->domain = domain;163164spin_lock_irqsave(&async_lock, flags);165166/* allocate cookie and queue */167newcookie = entry->cookie = next_cookie++;168169list_add_tail(&entry->domain_list, &domain->pending);170if (domain->registered)171list_add_tail(&entry->global_list, &async_global_pending);172173atomic_inc(&entry_count);174spin_unlock_irqrestore(&async_lock, flags);175176/* schedule for execution */177queue_work_node(node, async_wq, &entry->work);178179return newcookie;180}181182/**183* async_schedule_node_domain - NUMA specific version of async_schedule_domain184* @func: function to execute asynchronously185* @data: data pointer to pass to the function186* @node: NUMA node that we want to schedule this on or close to187* @domain: the domain188*189* Returns an async_cookie_t that may be used for checkpointing later.190* @domain may be used in the async_synchronize_*_domain() functions to191* wait within a certain synchronization domain rather than globally.192*193* Note: This function may be called from atomic or non-atomic contexts.194*195* The node requested will be honored on a best effort basis. If the node196* has no CPUs associated with it then the work is distributed among all197* available CPUs.198*/199async_cookie_t async_schedule_node_domain(async_func_t func, void *data,200int node, struct async_domain *domain)201{202struct async_entry *entry;203unsigned long flags;204async_cookie_t newcookie;205206/* allow irq-off callers */207entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);208209/*210* If we're out of memory or if there's too much work211* pending already, we execute synchronously.212*/213if (!entry || atomic_read(&entry_count) > MAX_WORK) {214kfree(entry);215spin_lock_irqsave(&async_lock, flags);216newcookie = next_cookie++;217spin_unlock_irqrestore(&async_lock, flags);218219/* low on memory.. run synchronously */220func(data, newcookie);221return newcookie;222}223224return __async_schedule_node_domain(func, data, node, domain, entry);225}226EXPORT_SYMBOL_GPL(async_schedule_node_domain);227228/**229* async_schedule_node - NUMA specific version of async_schedule230* @func: function to execute asynchronously231* @data: data pointer to pass to the function232* @node: NUMA node that we want to schedule this on or close to233*234* Returns an async_cookie_t that may be used for checkpointing later.235* Note: This function may be called from atomic or non-atomic contexts.236*237* The node requested will be honored on a best effort basis. If the node238* has no CPUs associated with it then the work is distributed among all239* available CPUs.240*/241async_cookie_t async_schedule_node(async_func_t func, void *data, int node)242{243return async_schedule_node_domain(func, data, node, &async_dfl_domain);244}245EXPORT_SYMBOL_GPL(async_schedule_node);246247/**248* async_schedule_dev_nocall - A simplified variant of async_schedule_dev()249* @func: function to execute asynchronously250* @dev: device argument to be passed to function251*252* @dev is used as both the argument for the function and to provide NUMA253* context for where to run the function.254*255* If the asynchronous execution of @func is scheduled successfully, return256* true. Otherwise, do nothing and return false, unlike async_schedule_dev()257* that will run the function synchronously then.258*/259bool async_schedule_dev_nocall(async_func_t func, struct device *dev)260{261struct async_entry *entry;262263entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL);264265/* Give up if there is no memory or too much work. */266if (!entry || atomic_read(&entry_count) > MAX_WORK) {267kfree(entry);268return false;269}270271__async_schedule_node_domain(func, dev, dev_to_node(dev),272&async_dfl_domain, entry);273return true;274}275276/**277* async_synchronize_full - synchronize all asynchronous function calls278*279* This function waits until all asynchronous function calls have been done.280*/281void async_synchronize_full(void)282{283async_synchronize_full_domain(NULL);284}285EXPORT_SYMBOL_GPL(async_synchronize_full);286287/**288* async_synchronize_full_domain - synchronize all asynchronous function within a certain domain289* @domain: the domain to synchronize290*291* This function waits until all asynchronous function calls for the292* synchronization domain specified by @domain have been done.293*/294void async_synchronize_full_domain(struct async_domain *domain)295{296async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain);297}298EXPORT_SYMBOL_GPL(async_synchronize_full_domain);299300/**301* async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing302* @cookie: async_cookie_t to use as checkpoint303* @domain: the domain to synchronize (%NULL for all registered domains)304*305* This function waits until all asynchronous function calls for the306* synchronization domain specified by @domain submitted prior to @cookie307* have been done.308*/309void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)310{311ktime_t starttime;312313pr_debug("async_waiting @ %i\n", task_pid_nr(current));314starttime = ktime_get();315316wait_event(async_done, lowest_in_progress(domain) >= cookie);317318pr_debug("async_continuing @ %i after %lli usec\n", task_pid_nr(current),319microseconds_since(starttime));320}321EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);322323/**324* async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing325* @cookie: async_cookie_t to use as checkpoint326*327* This function waits until all asynchronous function calls prior to @cookie328* have been done.329*/330void async_synchronize_cookie(async_cookie_t cookie)331{332async_synchronize_cookie_domain(cookie, &async_dfl_domain);333}334EXPORT_SYMBOL_GPL(async_synchronize_cookie);335336/**337* current_is_async - is %current an async worker task?338*339* Returns %true if %current is an async worker task.340*/341bool current_is_async(void)342{343struct worker *worker = current_wq_worker();344345return worker && worker->current_func == async_run_entry_fn;346}347EXPORT_SYMBOL_GPL(current_is_async);348349void __init async_init(void)350{351/*352* Async can schedule a number of interdependent work items. However,353* unbound workqueues can handle only upto min_active interdependent354* work items. The default min_active of 8 isn't sufficient for async355* and can lead to stalls. Let's use a dedicated workqueue with raised356* min_active.357*/358async_wq = alloc_workqueue("async", WQ_UNBOUND, 0);359BUG_ON(!async_wq);360workqueue_set_min_active(async_wq, WQ_DFL_ACTIVE);361}362363364