Path: blob/master/drivers/macintosh/windfarm_pm112.c
15109 views
/*1* Windfarm PowerMac thermal control.2* Control loops for machines with SMU and PPC970MP processors.3*4* Copyright (C) 2005 Paul Mackerras, IBM Corp. <[email protected]>5* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.6*7* Use and redistribute under the terms of the GNU GPL v2.8*/9#include <linux/types.h>10#include <linux/errno.h>11#include <linux/kernel.h>12#include <linux/device.h>13#include <linux/platform_device.h>14#include <linux/reboot.h>15#include <asm/prom.h>16#include <asm/smu.h>1718#include "windfarm.h"19#include "windfarm_pid.h"2021#define VERSION "0.2"2223#define DEBUG24#undef LOTSA_DEBUG2526#ifdef DEBUG27#define DBG(args...) printk(args)28#else29#define DBG(args...) do { } while(0)30#endif3132#ifdef LOTSA_DEBUG33#define DBG_LOTS(args...) printk(args)34#else35#define DBG_LOTS(args...) do { } while(0)36#endif3738/* define this to force CPU overtemp to 60 degree, useful for testing39* the overtemp code40*/41#undef HACKED_OVERTEMP4243/* We currently only handle 2 chips, 4 cores... */44#define NR_CHIPS 245#define NR_CORES 446#define NR_CPU_FANS 3 * NR_CHIPS4748/* Controls and sensors */49static struct wf_sensor *sens_cpu_temp[NR_CORES];50static struct wf_sensor *sens_cpu_power[NR_CORES];51static struct wf_sensor *hd_temp;52static struct wf_sensor *slots_power;53static struct wf_sensor *u4_temp;5455static struct wf_control *cpu_fans[NR_CPU_FANS];56static char *cpu_fan_names[NR_CPU_FANS] = {57"cpu-rear-fan-0",58"cpu-rear-fan-1",59"cpu-front-fan-0",60"cpu-front-fan-1",61"cpu-pump-0",62"cpu-pump-1",63};64static struct wf_control *cpufreq_clamp;6566/* Second pump isn't required (and isn't actually present) */67#define CPU_FANS_REQD (NR_CPU_FANS - 2)68#define FIRST_PUMP 469#define LAST_PUMP 57071/* We keep a temperature history for average calculation of 180s */72#define CPU_TEMP_HIST_SIZE 1807374/* Scale factor for fan speed, *100 */75static int cpu_fan_scale[NR_CPU_FANS] = {76100,77100,7897, /* inlet fans run at 97% of exhaust fan */7997,80100, /* updated later */81100, /* updated later */82};8384static struct wf_control *backside_fan;85static struct wf_control *slots_fan;86static struct wf_control *drive_bay_fan;8788/* PID loop state */89static struct wf_cpu_pid_state cpu_pid[NR_CORES];90static u32 cpu_thist[CPU_TEMP_HIST_SIZE];91static int cpu_thist_pt;92static s64 cpu_thist_total;93static s32 cpu_all_tmax = 100 << 16;94static int cpu_last_target;95static struct wf_pid_state backside_pid;96static int backside_tick;97static struct wf_pid_state slots_pid;98static int slots_started;99static struct wf_pid_state drive_bay_pid;100static int drive_bay_tick;101102static int nr_cores;103static int have_all_controls;104static int have_all_sensors;105static int started;106107static int failure_state;108#define FAILURE_SENSOR 1109#define FAILURE_FAN 2110#define FAILURE_PERM 4111#define FAILURE_LOW_OVERTEMP 8112#define FAILURE_HIGH_OVERTEMP 16113114/* Overtemp values */115#define LOW_OVER_AVERAGE 0116#define LOW_OVER_IMMEDIATE (10 << 16)117#define LOW_OVER_CLEAR ((-10) << 16)118#define HIGH_OVER_IMMEDIATE (14 << 16)119#define HIGH_OVER_AVERAGE (10 << 16)120#define HIGH_OVER_IMMEDIATE (14 << 16)121122123/* Implementation... */124static int create_cpu_loop(int cpu)125{126int chip = cpu / 2;127int core = cpu & 1;128struct smu_sdbp_header *hdr;129struct smu_sdbp_cpupiddata *piddata;130struct wf_cpu_pid_param pid;131struct wf_control *main_fan = cpu_fans[0];132s32 tmax;133int fmin;134135/* Get PID params from the appropriate SAT */136hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);137if (hdr == NULL) {138printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");139return -EINVAL;140}141piddata = (struct smu_sdbp_cpupiddata *)&hdr[1];142143/* Get FVT params to get Tmax; if not found, assume default */144hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);145if (hdr) {146struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)&hdr[1];147tmax = fvt->maxtemp << 16;148} else149tmax = 95 << 16; /* default to 95 degrees C */150151/* We keep a global tmax for overtemp calculations */152if (tmax < cpu_all_tmax)153cpu_all_tmax = tmax;154155/*156* Darwin has a minimum fan speed of 1000 rpm for the 4-way and157* 515 for the 2-way. That appears to be overkill, so for now,158* impose a minimum of 750 or 515.159*/160fmin = (nr_cores > 2) ? 750 : 515;161162/* Initialize PID loop */163pid.interval = 1; /* seconds */164pid.history_len = piddata->history_len;165pid.gd = piddata->gd;166pid.gp = piddata->gp;167pid.gr = piddata->gr / piddata->history_len;168pid.pmaxadj = (piddata->max_power << 16) - (piddata->power_adj << 8);169pid.ttarget = tmax - (piddata->target_temp_delta << 16);170pid.tmax = tmax;171pid.min = main_fan->ops->get_min(main_fan);172pid.max = main_fan->ops->get_max(main_fan);173if (pid.min < fmin)174pid.min = fmin;175176wf_cpu_pid_init(&cpu_pid[cpu], &pid);177return 0;178}179180static void cpu_max_all_fans(void)181{182int i;183184/* We max all CPU fans in case of a sensor error. We also do the185* cpufreq clamping now, even if it's supposedly done later by the186* generic code anyway, we do it earlier here to react faster187*/188if (cpufreq_clamp)189wf_control_set_max(cpufreq_clamp);190for (i = 0; i < NR_CPU_FANS; ++i)191if (cpu_fans[i])192wf_control_set_max(cpu_fans[i]);193}194195static int cpu_check_overtemp(s32 temp)196{197int new_state = 0;198s32 t_avg, t_old;199200/* First check for immediate overtemps */201if (temp >= (cpu_all_tmax + LOW_OVER_IMMEDIATE)) {202new_state |= FAILURE_LOW_OVERTEMP;203if ((failure_state & FAILURE_LOW_OVERTEMP) == 0)204printk(KERN_ERR "windfarm: Overtemp due to immediate CPU"205" temperature !\n");206}207if (temp >= (cpu_all_tmax + HIGH_OVER_IMMEDIATE)) {208new_state |= FAILURE_HIGH_OVERTEMP;209if ((failure_state & FAILURE_HIGH_OVERTEMP) == 0)210printk(KERN_ERR "windfarm: Critical overtemp due to"211" immediate CPU temperature !\n");212}213214/* We calculate a history of max temperatures and use that for the215* overtemp management216*/217t_old = cpu_thist[cpu_thist_pt];218cpu_thist[cpu_thist_pt] = temp;219cpu_thist_pt = (cpu_thist_pt + 1) % CPU_TEMP_HIST_SIZE;220cpu_thist_total -= t_old;221cpu_thist_total += temp;222t_avg = cpu_thist_total / CPU_TEMP_HIST_SIZE;223224DBG_LOTS("t_avg = %d.%03d (out: %d.%03d, in: %d.%03d)\n",225FIX32TOPRINT(t_avg), FIX32TOPRINT(t_old), FIX32TOPRINT(temp));226227/* Now check for average overtemps */228if (t_avg >= (cpu_all_tmax + LOW_OVER_AVERAGE)) {229new_state |= FAILURE_LOW_OVERTEMP;230if ((failure_state & FAILURE_LOW_OVERTEMP) == 0)231printk(KERN_ERR "windfarm: Overtemp due to average CPU"232" temperature !\n");233}234if (t_avg >= (cpu_all_tmax + HIGH_OVER_AVERAGE)) {235new_state |= FAILURE_HIGH_OVERTEMP;236if ((failure_state & FAILURE_HIGH_OVERTEMP) == 0)237printk(KERN_ERR "windfarm: Critical overtemp due to"238" average CPU temperature !\n");239}240241/* Now handle overtemp conditions. We don't currently use the windfarm242* overtemp handling core as it's not fully suited to the needs of those243* new machine. This will be fixed later.244*/245if (new_state) {246/* High overtemp -> immediate shutdown */247if (new_state & FAILURE_HIGH_OVERTEMP)248machine_power_off();249if ((failure_state & new_state) != new_state)250cpu_max_all_fans();251failure_state |= new_state;252} else if ((failure_state & FAILURE_LOW_OVERTEMP) &&253(temp < (cpu_all_tmax + LOW_OVER_CLEAR))) {254printk(KERN_ERR "windfarm: Overtemp condition cleared !\n");255failure_state &= ~FAILURE_LOW_OVERTEMP;256}257258return failure_state & (FAILURE_LOW_OVERTEMP | FAILURE_HIGH_OVERTEMP);259}260261static void cpu_fans_tick(void)262{263int err, cpu;264s32 greatest_delta = 0;265s32 temp, power, t_max = 0;266int i, t, target = 0;267struct wf_sensor *sr;268struct wf_control *ct;269struct wf_cpu_pid_state *sp;270271DBG_LOTS(KERN_DEBUG);272for (cpu = 0; cpu < nr_cores; ++cpu) {273/* Get CPU core temperature */274sr = sens_cpu_temp[cpu];275err = sr->ops->get_value(sr, &temp);276if (err) {277DBG("\n");278printk(KERN_WARNING "windfarm: CPU %d temperature "279"sensor error %d\n", cpu, err);280failure_state |= FAILURE_SENSOR;281cpu_max_all_fans();282return;283}284285/* Keep track of highest temp */286t_max = max(t_max, temp);287288/* Get CPU power */289sr = sens_cpu_power[cpu];290err = sr->ops->get_value(sr, &power);291if (err) {292DBG("\n");293printk(KERN_WARNING "windfarm: CPU %d power "294"sensor error %d\n", cpu, err);295failure_state |= FAILURE_SENSOR;296cpu_max_all_fans();297return;298}299300/* Run PID */301sp = &cpu_pid[cpu];302t = wf_cpu_pid_run(sp, power, temp);303304if (cpu == 0 || sp->last_delta > greatest_delta) {305greatest_delta = sp->last_delta;306target = t;307}308DBG_LOTS("[%d] P=%d.%.3d T=%d.%.3d ",309cpu, FIX32TOPRINT(power), FIX32TOPRINT(temp));310}311DBG_LOTS("fans = %d, t_max = %d.%03d\n", target, FIX32TOPRINT(t_max));312313/* Darwin limits decrease to 20 per iteration */314if (target < (cpu_last_target - 20))315target = cpu_last_target - 20;316cpu_last_target = target;317for (cpu = 0; cpu < nr_cores; ++cpu)318cpu_pid[cpu].target = target;319320/* Handle possible overtemps */321if (cpu_check_overtemp(t_max))322return;323324/* Set fans */325for (i = 0; i < NR_CPU_FANS; ++i) {326ct = cpu_fans[i];327if (ct == NULL)328continue;329err = ct->ops->set_value(ct, target * cpu_fan_scale[i] / 100);330if (err) {331printk(KERN_WARNING "windfarm: fan %s reports "332"error %d\n", ct->name, err);333failure_state |= FAILURE_FAN;334break;335}336}337}338339/* Backside/U4 fan */340static struct wf_pid_param backside_param = {341.interval = 5,342.history_len = 2,343.gd = 48 << 20,344.gp = 5 << 20,345.gr = 0,346.itarget = 64 << 16,347.additive = 1,348};349350static void backside_fan_tick(void)351{352s32 temp;353int speed;354int err;355356if (!backside_fan || !u4_temp)357return;358if (!backside_tick) {359/* first time; initialize things */360printk(KERN_INFO "windfarm: Backside control loop started.\n");361backside_param.min = backside_fan->ops->get_min(backside_fan);362backside_param.max = backside_fan->ops->get_max(backside_fan);363wf_pid_init(&backside_pid, &backside_param);364backside_tick = 1;365}366if (--backside_tick > 0)367return;368backside_tick = backside_pid.param.interval;369370err = u4_temp->ops->get_value(u4_temp, &temp);371if (err) {372printk(KERN_WARNING "windfarm: U4 temp sensor error %d\n",373err);374failure_state |= FAILURE_SENSOR;375wf_control_set_max(backside_fan);376return;377}378speed = wf_pid_run(&backside_pid, temp);379DBG_LOTS("backside PID temp=%d.%.3d speed=%d\n",380FIX32TOPRINT(temp), speed);381382err = backside_fan->ops->set_value(backside_fan, speed);383if (err) {384printk(KERN_WARNING "windfarm: backside fan error %d\n", err);385failure_state |= FAILURE_FAN;386}387}388389/* Drive bay fan */390static struct wf_pid_param drive_bay_prm = {391.interval = 5,392.history_len = 2,393.gd = 30 << 20,394.gp = 5 << 20,395.gr = 0,396.itarget = 40 << 16,397.additive = 1,398};399400static void drive_bay_fan_tick(void)401{402s32 temp;403int speed;404int err;405406if (!drive_bay_fan || !hd_temp)407return;408if (!drive_bay_tick) {409/* first time; initialize things */410printk(KERN_INFO "windfarm: Drive bay control loop started.\n");411drive_bay_prm.min = drive_bay_fan->ops->get_min(drive_bay_fan);412drive_bay_prm.max = drive_bay_fan->ops->get_max(drive_bay_fan);413wf_pid_init(&drive_bay_pid, &drive_bay_prm);414drive_bay_tick = 1;415}416if (--drive_bay_tick > 0)417return;418drive_bay_tick = drive_bay_pid.param.interval;419420err = hd_temp->ops->get_value(hd_temp, &temp);421if (err) {422printk(KERN_WARNING "windfarm: drive bay temp sensor "423"error %d\n", err);424failure_state |= FAILURE_SENSOR;425wf_control_set_max(drive_bay_fan);426return;427}428speed = wf_pid_run(&drive_bay_pid, temp);429DBG_LOTS("drive_bay PID temp=%d.%.3d speed=%d\n",430FIX32TOPRINT(temp), speed);431432err = drive_bay_fan->ops->set_value(drive_bay_fan, speed);433if (err) {434printk(KERN_WARNING "windfarm: drive bay fan error %d\n", err);435failure_state |= FAILURE_FAN;436}437}438439/* PCI slots area fan */440/* This makes the fan speed proportional to the power consumed */441static struct wf_pid_param slots_param = {442.interval = 1,443.history_len = 2,444.gd = 0,445.gp = 0,446.gr = 0x1277952,447.itarget = 0,448.min = 1560,449.max = 3510,450};451452static void slots_fan_tick(void)453{454s32 power;455int speed;456int err;457458if (!slots_fan || !slots_power)459return;460if (!slots_started) {461/* first time; initialize things */462printk(KERN_INFO "windfarm: Slots control loop started.\n");463wf_pid_init(&slots_pid, &slots_param);464slots_started = 1;465}466467err = slots_power->ops->get_value(slots_power, &power);468if (err) {469printk(KERN_WARNING "windfarm: slots power sensor error %d\n",470err);471failure_state |= FAILURE_SENSOR;472wf_control_set_max(slots_fan);473return;474}475speed = wf_pid_run(&slots_pid, power);476DBG_LOTS("slots PID power=%d.%.3d speed=%d\n",477FIX32TOPRINT(power), speed);478479err = slots_fan->ops->set_value(slots_fan, speed);480if (err) {481printk(KERN_WARNING "windfarm: slots fan error %d\n", err);482failure_state |= FAILURE_FAN;483}484}485486static void set_fail_state(void)487{488int i;489490if (cpufreq_clamp)491wf_control_set_max(cpufreq_clamp);492for (i = 0; i < NR_CPU_FANS; ++i)493if (cpu_fans[i])494wf_control_set_max(cpu_fans[i]);495if (backside_fan)496wf_control_set_max(backside_fan);497if (slots_fan)498wf_control_set_max(slots_fan);499if (drive_bay_fan)500wf_control_set_max(drive_bay_fan);501}502503static void pm112_tick(void)504{505int i, last_failure;506507if (!started) {508started = 1;509printk(KERN_INFO "windfarm: CPUs control loops started.\n");510for (i = 0; i < nr_cores; ++i) {511if (create_cpu_loop(i) < 0) {512failure_state = FAILURE_PERM;513set_fail_state();514break;515}516}517DBG_LOTS("cpu_all_tmax=%d.%03d\n", FIX32TOPRINT(cpu_all_tmax));518519#ifdef HACKED_OVERTEMP520cpu_all_tmax = 60 << 16;521#endif522}523524/* Permanent failure, bail out */525if (failure_state & FAILURE_PERM)526return;527/* Clear all failure bits except low overtemp which will be eventually528* cleared by the control loop itself529*/530last_failure = failure_state;531failure_state &= FAILURE_LOW_OVERTEMP;532cpu_fans_tick();533backside_fan_tick();534slots_fan_tick();535drive_bay_fan_tick();536537DBG_LOTS("last_failure: 0x%x, failure_state: %x\n",538last_failure, failure_state);539540/* Check for failures. Any failure causes cpufreq clamping */541if (failure_state && last_failure == 0 && cpufreq_clamp)542wf_control_set_max(cpufreq_clamp);543if (failure_state == 0 && last_failure && cpufreq_clamp)544wf_control_set_min(cpufreq_clamp);545546/* That's it for now, we might want to deal with other failures547* differently in the future though548*/549}550551static void pm112_new_control(struct wf_control *ct)552{553int i, max_exhaust;554555if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) {556if (wf_get_control(ct) == 0)557cpufreq_clamp = ct;558}559560for (i = 0; i < NR_CPU_FANS; ++i) {561if (!strcmp(ct->name, cpu_fan_names[i])) {562if (cpu_fans[i] == NULL && wf_get_control(ct) == 0)563cpu_fans[i] = ct;564break;565}566}567if (i >= NR_CPU_FANS) {568/* not a CPU fan, try the others */569if (!strcmp(ct->name, "backside-fan")) {570if (backside_fan == NULL && wf_get_control(ct) == 0)571backside_fan = ct;572} else if (!strcmp(ct->name, "slots-fan")) {573if (slots_fan == NULL && wf_get_control(ct) == 0)574slots_fan = ct;575} else if (!strcmp(ct->name, "drive-bay-fan")) {576if (drive_bay_fan == NULL && wf_get_control(ct) == 0)577drive_bay_fan = ct;578}579return;580}581582for (i = 0; i < CPU_FANS_REQD; ++i)583if (cpu_fans[i] == NULL)584return;585586/* work out pump scaling factors */587max_exhaust = cpu_fans[0]->ops->get_max(cpu_fans[0]);588for (i = FIRST_PUMP; i <= LAST_PUMP; ++i)589if ((ct = cpu_fans[i]) != NULL)590cpu_fan_scale[i] =591ct->ops->get_max(ct) * 100 / max_exhaust;592593have_all_controls = 1;594}595596static void pm112_new_sensor(struct wf_sensor *sr)597{598unsigned int i;599600if (!strncmp(sr->name, "cpu-temp-", 9)) {601i = sr->name[9] - '0';602if (sr->name[10] == 0 && i < NR_CORES &&603sens_cpu_temp[i] == NULL && wf_get_sensor(sr) == 0)604sens_cpu_temp[i] = sr;605606} else if (!strncmp(sr->name, "cpu-power-", 10)) {607i = sr->name[10] - '0';608if (sr->name[11] == 0 && i < NR_CORES &&609sens_cpu_power[i] == NULL && wf_get_sensor(sr) == 0)610sens_cpu_power[i] = sr;611} else if (!strcmp(sr->name, "hd-temp")) {612if (hd_temp == NULL && wf_get_sensor(sr) == 0)613hd_temp = sr;614} else if (!strcmp(sr->name, "slots-power")) {615if (slots_power == NULL && wf_get_sensor(sr) == 0)616slots_power = sr;617} else if (!strcmp(sr->name, "backside-temp")) {618if (u4_temp == NULL && wf_get_sensor(sr) == 0)619u4_temp = sr;620} else621return;622623/* check if we have all the sensors we need */624for (i = 0; i < nr_cores; ++i)625if (sens_cpu_temp[i] == NULL || sens_cpu_power[i] == NULL)626return;627628have_all_sensors = 1;629}630631static int pm112_wf_notify(struct notifier_block *self,632unsigned long event, void *data)633{634switch (event) {635case WF_EVENT_NEW_SENSOR:636pm112_new_sensor(data);637break;638case WF_EVENT_NEW_CONTROL:639pm112_new_control(data);640break;641case WF_EVENT_TICK:642if (have_all_controls && have_all_sensors)643pm112_tick();644}645return 0;646}647648static struct notifier_block pm112_events = {649.notifier_call = pm112_wf_notify,650};651652static int wf_pm112_probe(struct platform_device *dev)653{654wf_register_client(&pm112_events);655return 0;656}657658static int __devexit wf_pm112_remove(struct platform_device *dev)659{660wf_unregister_client(&pm112_events);661/* should release all sensors and controls */662return 0;663}664665static struct platform_driver wf_pm112_driver = {666.probe = wf_pm112_probe,667.remove = __devexit_p(wf_pm112_remove),668.driver = {669.name = "windfarm",670.owner = THIS_MODULE,671},672};673674static int __init wf_pm112_init(void)675{676struct device_node *cpu;677678if (!of_machine_is_compatible("PowerMac11,2"))679return -ENODEV;680681/* Count the number of CPU cores */682nr_cores = 0;683for (cpu = NULL; (cpu = of_find_node_by_type(cpu, "cpu")) != NULL; )684++nr_cores;685686printk(KERN_INFO "windfarm: initializing for dual-core desktop G5\n");687688#ifdef MODULE689request_module("windfarm_smu_controls");690request_module("windfarm_smu_sensors");691request_module("windfarm_smu_sat");692request_module("windfarm_lm75_sensor");693request_module("windfarm_max6690_sensor");694request_module("windfarm_cpufreq_clamp");695696#endif /* MODULE */697698platform_driver_register(&wf_pm112_driver);699return 0;700}701702static void __exit wf_pm112_exit(void)703{704platform_driver_unregister(&wf_pm112_driver);705}706707module_init(wf_pm112_init);708module_exit(wf_pm112_exit);709710MODULE_AUTHOR("Paul Mackerras <[email protected]>");711MODULE_DESCRIPTION("Thermal control for PowerMac11,2");712MODULE_LICENSE("GPL");713MODULE_ALIAS("platform:windfarm");714715716