Path: blob/master/drivers/cpufreq/cpufreq_conservative.c
15109 views
/*1* drivers/cpufreq/cpufreq_conservative.c2*3* Copyright (C) 2001 Russell King4* (C) 2003 Venkatesh Pallipadi <[email protected]>.5* Jun Nakajima <[email protected]>6* (C) 2009 Alexander Clouter <[email protected]>7*8* This program is free software; you can redistribute it and/or modify9* it under the terms of the GNU General Public License version 2 as10* published by the Free Software Foundation.11*/1213#include <linux/kernel.h>14#include <linux/module.h>15#include <linux/init.h>16#include <linux/cpufreq.h>17#include <linux/cpu.h>18#include <linux/jiffies.h>19#include <linux/kernel_stat.h>20#include <linux/mutex.h>21#include <linux/hrtimer.h>22#include <linux/tick.h>23#include <linux/ktime.h>24#include <linux/sched.h>2526/*27* dbs is used in this file as a shortform for demandbased switching28* It helps to keep variable names smaller, simpler29*/3031#define DEF_FREQUENCY_UP_THRESHOLD (80)32#define DEF_FREQUENCY_DOWN_THRESHOLD (20)3334/*35* The polling frequency of this governor depends on the capability of36* the processor. Default polling frequency is 1000 times the transition37* latency of the processor. The governor will work on any processor with38* transition latency <= 10mS, using appropriate sampling39* rate.40* For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)41* this governor will not work.42* All times here are in uS.43*/44#define MIN_SAMPLING_RATE_RATIO (2)4546static unsigned int min_sampling_rate;4748#define LATENCY_MULTIPLIER (1000)49#define MIN_LATENCY_MULTIPLIER (100)50#define DEF_SAMPLING_DOWN_FACTOR (1)51#define MAX_SAMPLING_DOWN_FACTOR (10)52#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)5354static void do_dbs_timer(struct work_struct *work);5556struct cpu_dbs_info_s {57cputime64_t prev_cpu_idle;58cputime64_t prev_cpu_wall;59cputime64_t prev_cpu_nice;60struct cpufreq_policy *cur_policy;61struct delayed_work work;62unsigned int down_skip;63unsigned int requested_freq;64int cpu;65unsigned int enable:1;66/*67* percpu mutex that serializes governor limit change with68* do_dbs_timer invocation. We do not want do_dbs_timer to run69* when user is changing the governor or limits.70*/71struct mutex timer_mutex;72};73static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);7475static unsigned int dbs_enable; /* number of CPUs using this policy */7677/*78* dbs_mutex protects dbs_enable in governor start/stop.79*/80static DEFINE_MUTEX(dbs_mutex);8182static struct dbs_tuners {83unsigned int sampling_rate;84unsigned int sampling_down_factor;85unsigned int up_threshold;86unsigned int down_threshold;87unsigned int ignore_nice;88unsigned int freq_step;89} dbs_tuners_ins = {90.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,91.down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,92.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,93.ignore_nice = 0,94.freq_step = 5,95};9697static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,98cputime64_t *wall)99{100cputime64_t idle_time;101cputime64_t cur_wall_time;102cputime64_t busy_time;103104cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());105busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,106kstat_cpu(cpu).cpustat.system);107108busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);109busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);110busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);111busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);112113idle_time = cputime64_sub(cur_wall_time, busy_time);114if (wall)115*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);116117return (cputime64_t)jiffies_to_usecs(idle_time);118}119120static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)121{122u64 idle_time = get_cpu_idle_time_us(cpu, wall);123124if (idle_time == -1ULL)125return get_cpu_idle_time_jiffy(cpu, wall);126127return idle_time;128}129130/* keep track of frequency transitions */131static int132dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,133void *data)134{135struct cpufreq_freqs *freq = data;136struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,137freq->cpu);138139struct cpufreq_policy *policy;140141if (!this_dbs_info->enable)142return 0;143144policy = this_dbs_info->cur_policy;145146/*147* we only care if our internally tracked freq moves outside148* the 'valid' ranges of freqency available to us otherwise149* we do not change it150*/151if (this_dbs_info->requested_freq > policy->max152|| this_dbs_info->requested_freq < policy->min)153this_dbs_info->requested_freq = freq->new;154155return 0;156}157158static struct notifier_block dbs_cpufreq_notifier_block = {159.notifier_call = dbs_cpufreq_notifier160};161162/************************** sysfs interface ************************/163static ssize_t show_sampling_rate_min(struct kobject *kobj,164struct attribute *attr, char *buf)165{166return sprintf(buf, "%u\n", min_sampling_rate);167}168169define_one_global_ro(sampling_rate_min);170171/* cpufreq_conservative Governor Tunables */172#define show_one(file_name, object) \173static ssize_t show_##file_name \174(struct kobject *kobj, struct attribute *attr, char *buf) \175{ \176return sprintf(buf, "%u\n", dbs_tuners_ins.object); \177}178show_one(sampling_rate, sampling_rate);179show_one(sampling_down_factor, sampling_down_factor);180show_one(up_threshold, up_threshold);181show_one(down_threshold, down_threshold);182show_one(ignore_nice_load, ignore_nice);183show_one(freq_step, freq_step);184185static ssize_t store_sampling_down_factor(struct kobject *a,186struct attribute *b,187const char *buf, size_t count)188{189unsigned int input;190int ret;191ret = sscanf(buf, "%u", &input);192193if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)194return -EINVAL;195196dbs_tuners_ins.sampling_down_factor = input;197return count;198}199200static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,201const char *buf, size_t count)202{203unsigned int input;204int ret;205ret = sscanf(buf, "%u", &input);206207if (ret != 1)208return -EINVAL;209210dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);211return count;212}213214static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,215const char *buf, size_t count)216{217unsigned int input;218int ret;219ret = sscanf(buf, "%u", &input);220221if (ret != 1 || input > 100 ||222input <= dbs_tuners_ins.down_threshold)223return -EINVAL;224225dbs_tuners_ins.up_threshold = input;226return count;227}228229static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,230const char *buf, size_t count)231{232unsigned int input;233int ret;234ret = sscanf(buf, "%u", &input);235236/* cannot be lower than 11 otherwise freq will not fall */237if (ret != 1 || input < 11 || input > 100 ||238input >= dbs_tuners_ins.up_threshold)239return -EINVAL;240241dbs_tuners_ins.down_threshold = input;242return count;243}244245static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,246const char *buf, size_t count)247{248unsigned int input;249int ret;250251unsigned int j;252253ret = sscanf(buf, "%u", &input);254if (ret != 1)255return -EINVAL;256257if (input > 1)258input = 1;259260if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */261return count;262263dbs_tuners_ins.ignore_nice = input;264265/* we need to re-evaluate prev_cpu_idle */266for_each_online_cpu(j) {267struct cpu_dbs_info_s *dbs_info;268dbs_info = &per_cpu(cs_cpu_dbs_info, j);269dbs_info->prev_cpu_idle = get_cpu_idle_time(j,270&dbs_info->prev_cpu_wall);271if (dbs_tuners_ins.ignore_nice)272dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;273}274return count;275}276277static ssize_t store_freq_step(struct kobject *a, struct attribute *b,278const char *buf, size_t count)279{280unsigned int input;281int ret;282ret = sscanf(buf, "%u", &input);283284if (ret != 1)285return -EINVAL;286287if (input > 100)288input = 100;289290/* no need to test here if freq_step is zero as the user might actually291* want this, they would be crazy though :) */292dbs_tuners_ins.freq_step = input;293return count;294}295296define_one_global_rw(sampling_rate);297define_one_global_rw(sampling_down_factor);298define_one_global_rw(up_threshold);299define_one_global_rw(down_threshold);300define_one_global_rw(ignore_nice_load);301define_one_global_rw(freq_step);302303static struct attribute *dbs_attributes[] = {304&sampling_rate_min.attr,305&sampling_rate.attr,306&sampling_down_factor.attr,307&up_threshold.attr,308&down_threshold.attr,309&ignore_nice_load.attr,310&freq_step.attr,311NULL312};313314static struct attribute_group dbs_attr_group = {315.attrs = dbs_attributes,316.name = "conservative",317};318319/************************** sysfs end ************************/320321static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)322{323unsigned int load = 0;324unsigned int max_load = 0;325unsigned int freq_target;326327struct cpufreq_policy *policy;328unsigned int j;329330policy = this_dbs_info->cur_policy;331332/*333* Every sampling_rate, we check, if current idle time is less334* than 20% (default), then we try to increase frequency335* Every sampling_rate*sampling_down_factor, we check, if current336* idle time is more than 80%, then we try to decrease frequency337*338* Any frequency increase takes it to the maximum frequency.339* Frequency reduction happens at minimum steps of340* 5% (default) of maximum frequency341*/342343/* Get Absolute Load */344for_each_cpu(j, policy->cpus) {345struct cpu_dbs_info_s *j_dbs_info;346cputime64_t cur_wall_time, cur_idle_time;347unsigned int idle_time, wall_time;348349j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);350351cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);352353wall_time = (unsigned int) cputime64_sub(cur_wall_time,354j_dbs_info->prev_cpu_wall);355j_dbs_info->prev_cpu_wall = cur_wall_time;356357idle_time = (unsigned int) cputime64_sub(cur_idle_time,358j_dbs_info->prev_cpu_idle);359j_dbs_info->prev_cpu_idle = cur_idle_time;360361if (dbs_tuners_ins.ignore_nice) {362cputime64_t cur_nice;363unsigned long cur_nice_jiffies;364365cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,366j_dbs_info->prev_cpu_nice);367/*368* Assumption: nice time between sampling periods will369* be less than 2^32 jiffies for 32 bit sys370*/371cur_nice_jiffies = (unsigned long)372cputime64_to_jiffies64(cur_nice);373374j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;375idle_time += jiffies_to_usecs(cur_nice_jiffies);376}377378if (unlikely(!wall_time || wall_time < idle_time))379continue;380381load = 100 * (wall_time - idle_time) / wall_time;382383if (load > max_load)384max_load = load;385}386387/*388* break out if we 'cannot' reduce the speed as the user might389* want freq_step to be zero390*/391if (dbs_tuners_ins.freq_step == 0)392return;393394/* Check for frequency increase */395if (max_load > dbs_tuners_ins.up_threshold) {396this_dbs_info->down_skip = 0;397398/* if we are already at full speed then break out early */399if (this_dbs_info->requested_freq == policy->max)400return;401402freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;403404/* max freq cannot be less than 100. But who knows.... */405if (unlikely(freq_target == 0))406freq_target = 5;407408this_dbs_info->requested_freq += freq_target;409if (this_dbs_info->requested_freq > policy->max)410this_dbs_info->requested_freq = policy->max;411412__cpufreq_driver_target(policy, this_dbs_info->requested_freq,413CPUFREQ_RELATION_H);414return;415}416417/*418* The optimal frequency is the frequency that is the lowest that419* can support the current CPU usage without triggering the up420* policy. To be safe, we focus 10 points under the threshold.421*/422if (max_load < (dbs_tuners_ins.down_threshold - 10)) {423freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;424425this_dbs_info->requested_freq -= freq_target;426if (this_dbs_info->requested_freq < policy->min)427this_dbs_info->requested_freq = policy->min;428429/*430* if we cannot reduce the frequency anymore, break out early431*/432if (policy->cur == policy->min)433return;434435__cpufreq_driver_target(policy, this_dbs_info->requested_freq,436CPUFREQ_RELATION_H);437return;438}439}440441static void do_dbs_timer(struct work_struct *work)442{443struct cpu_dbs_info_s *dbs_info =444container_of(work, struct cpu_dbs_info_s, work.work);445unsigned int cpu = dbs_info->cpu;446447/* We want all CPUs to do sampling nearly on same jiffy */448int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);449450delay -= jiffies % delay;451452mutex_lock(&dbs_info->timer_mutex);453454dbs_check_cpu(dbs_info);455456schedule_delayed_work_on(cpu, &dbs_info->work, delay);457mutex_unlock(&dbs_info->timer_mutex);458}459460static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)461{462/* We want all CPUs to do sampling nearly on same jiffy */463int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);464delay -= jiffies % delay;465466dbs_info->enable = 1;467INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);468schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);469}470471static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)472{473dbs_info->enable = 0;474cancel_delayed_work_sync(&dbs_info->work);475}476477static int cpufreq_governor_dbs(struct cpufreq_policy *policy,478unsigned int event)479{480unsigned int cpu = policy->cpu;481struct cpu_dbs_info_s *this_dbs_info;482unsigned int j;483int rc;484485this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);486487switch (event) {488case CPUFREQ_GOV_START:489if ((!cpu_online(cpu)) || (!policy->cur))490return -EINVAL;491492mutex_lock(&dbs_mutex);493494for_each_cpu(j, policy->cpus) {495struct cpu_dbs_info_s *j_dbs_info;496j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);497j_dbs_info->cur_policy = policy;498499j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,500&j_dbs_info->prev_cpu_wall);501if (dbs_tuners_ins.ignore_nice) {502j_dbs_info->prev_cpu_nice =503kstat_cpu(j).cpustat.nice;504}505}506this_dbs_info->down_skip = 0;507this_dbs_info->requested_freq = policy->cur;508509mutex_init(&this_dbs_info->timer_mutex);510dbs_enable++;511/*512* Start the timerschedule work, when this governor513* is used for first time514*/515if (dbs_enable == 1) {516unsigned int latency;517/* policy latency is in nS. Convert it to uS first */518latency = policy->cpuinfo.transition_latency / 1000;519if (latency == 0)520latency = 1;521522rc = sysfs_create_group(cpufreq_global_kobject,523&dbs_attr_group);524if (rc) {525mutex_unlock(&dbs_mutex);526return rc;527}528529/*530* conservative does not implement micro like ondemand531* governor, thus we are bound to jiffes/HZ532*/533min_sampling_rate =534MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);535/* Bring kernel and HW constraints together */536min_sampling_rate = max(min_sampling_rate,537MIN_LATENCY_MULTIPLIER * latency);538dbs_tuners_ins.sampling_rate =539max(min_sampling_rate,540latency * LATENCY_MULTIPLIER);541542cpufreq_register_notifier(543&dbs_cpufreq_notifier_block,544CPUFREQ_TRANSITION_NOTIFIER);545}546mutex_unlock(&dbs_mutex);547548dbs_timer_init(this_dbs_info);549550break;551552case CPUFREQ_GOV_STOP:553dbs_timer_exit(this_dbs_info);554555mutex_lock(&dbs_mutex);556dbs_enable--;557mutex_destroy(&this_dbs_info->timer_mutex);558559/*560* Stop the timerschedule work, when this governor561* is used for first time562*/563if (dbs_enable == 0)564cpufreq_unregister_notifier(565&dbs_cpufreq_notifier_block,566CPUFREQ_TRANSITION_NOTIFIER);567568mutex_unlock(&dbs_mutex);569if (!dbs_enable)570sysfs_remove_group(cpufreq_global_kobject,571&dbs_attr_group);572573break;574575case CPUFREQ_GOV_LIMITS:576mutex_lock(&this_dbs_info->timer_mutex);577if (policy->max < this_dbs_info->cur_policy->cur)578__cpufreq_driver_target(579this_dbs_info->cur_policy,580policy->max, CPUFREQ_RELATION_H);581else if (policy->min > this_dbs_info->cur_policy->cur)582__cpufreq_driver_target(583this_dbs_info->cur_policy,584policy->min, CPUFREQ_RELATION_L);585mutex_unlock(&this_dbs_info->timer_mutex);586587break;588}589return 0;590}591592#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE593static594#endif595struct cpufreq_governor cpufreq_gov_conservative = {596.name = "conservative",597.governor = cpufreq_governor_dbs,598.max_transition_latency = TRANSITION_LATENCY_LIMIT,599.owner = THIS_MODULE,600};601602static int __init cpufreq_gov_dbs_init(void)603{604return cpufreq_register_governor(&cpufreq_gov_conservative);605}606607static void __exit cpufreq_gov_dbs_exit(void)608{609cpufreq_unregister_governor(&cpufreq_gov_conservative);610}611612613MODULE_AUTHOR("Alexander Clouter <[email protected]>");614MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "615"Low Latency Frequency Transition capable processors "616"optimised for use in a battery environment");617MODULE_LICENSE("GPL");618619#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE620fs_initcall(cpufreq_gov_dbs_init);621#else622module_init(cpufreq_gov_dbs_init);623#endif624module_exit(cpufreq_gov_dbs_exit);625626627