Root/
1 | /* |
2 | * acpi_pad.c ACPI Processor Aggregator Driver |
3 | * |
4 | * Copyright (c) 2009, Intel Corporation. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, |
8 | * version 2, as published by the Free Software Foundation. |
9 | * |
10 | * This program is distributed in the hope it will be useful, but WITHOUT |
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
13 | * more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along with |
16 | * this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | */ |
20 | |
21 | #include <linux/kernel.h> |
22 | #include <linux/cpumask.h> |
23 | #include <linux/module.h> |
24 | #include <linux/init.h> |
25 | #include <linux/types.h> |
26 | #include <linux/kthread.h> |
27 | #include <linux/freezer.h> |
28 | #include <linux/cpu.h> |
29 | #include <linux/clockchips.h> |
30 | #include <linux/slab.h> |
31 | #include <acpi/acpi_bus.h> |
32 | #include <acpi/acpi_drivers.h> |
33 | #include <asm/mwait.h> |
34 | |
35 | #define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" |
36 | #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" |
37 | #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 |
38 | static DEFINE_MUTEX(isolated_cpus_lock); |
39 | static DEFINE_MUTEX(round_robin_lock); |
40 | |
41 | static unsigned long power_saving_mwait_eax; |
42 | |
43 | static unsigned char tsc_detected_unstable; |
44 | static unsigned char tsc_marked_unstable; |
45 | static unsigned char lapic_detected_unstable; |
46 | static unsigned char lapic_marked_unstable; |
47 | |
48 | static void power_saving_mwait_init(void) |
49 | { |
50 | unsigned int eax, ebx, ecx, edx; |
51 | unsigned int highest_cstate = 0; |
52 | unsigned int highest_subcstate = 0; |
53 | int i; |
54 | |
55 | if (!boot_cpu_has(X86_FEATURE_MWAIT)) |
56 | return; |
57 | if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) |
58 | return; |
59 | |
60 | cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); |
61 | |
62 | if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || |
63 | !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) |
64 | return; |
65 | |
66 | edx >>= MWAIT_SUBSTATE_SIZE; |
67 | for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { |
68 | if (edx & MWAIT_SUBSTATE_MASK) { |
69 | highest_cstate = i; |
70 | highest_subcstate = edx & MWAIT_SUBSTATE_MASK; |
71 | } |
72 | } |
73 | power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | |
74 | (highest_subcstate - 1); |
75 | |
76 | #if defined(CONFIG_X86) |
77 | switch (boot_cpu_data.x86_vendor) { |
78 | case X86_VENDOR_AMD: |
79 | case X86_VENDOR_INTEL: |
80 | /* |
81 | * AMD Fam10h TSC will tick in all |
82 | * C/P/S0/S1 states when this bit is set. |
83 | */ |
84 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
85 | tsc_detected_unstable = 1; |
86 | if (!boot_cpu_has(X86_FEATURE_ARAT)) |
87 | lapic_detected_unstable = 1; |
88 | break; |
89 | default: |
90 | /* TSC & LAPIC could halt in idle */ |
91 | tsc_detected_unstable = 1; |
92 | lapic_detected_unstable = 1; |
93 | } |
94 | #endif |
95 | } |
96 | |
97 | static unsigned long cpu_weight[NR_CPUS]; |
98 | static int tsk_in_cpu[NR_CPUS] = {[0 ... NR_CPUS-1] = -1}; |
99 | static DECLARE_BITMAP(pad_busy_cpus_bits, NR_CPUS); |
100 | static void round_robin_cpu(unsigned int tsk_index) |
101 | { |
102 | struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); |
103 | cpumask_var_t tmp; |
104 | int cpu; |
105 | unsigned long min_weight = -1; |
106 | unsigned long uninitialized_var(preferred_cpu); |
107 | |
108 | if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) |
109 | return; |
110 | |
111 | mutex_lock(&round_robin_lock); |
112 | cpumask_clear(tmp); |
113 | for_each_cpu(cpu, pad_busy_cpus) |
114 | cpumask_or(tmp, tmp, topology_thread_cpumask(cpu)); |
115 | cpumask_andnot(tmp, cpu_online_mask, tmp); |
116 | /* avoid HT sibilings if possible */ |
117 | if (cpumask_empty(tmp)) |
118 | cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); |
119 | if (cpumask_empty(tmp)) { |
120 | mutex_unlock(&round_robin_lock); |
121 | return; |
122 | } |
123 | for_each_cpu(cpu, tmp) { |
124 | if (cpu_weight[cpu] < min_weight) { |
125 | min_weight = cpu_weight[cpu]; |
126 | preferred_cpu = cpu; |
127 | } |
128 | } |
129 | |
130 | if (tsk_in_cpu[tsk_index] != -1) |
131 | cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); |
132 | tsk_in_cpu[tsk_index] = preferred_cpu; |
133 | cpumask_set_cpu(preferred_cpu, pad_busy_cpus); |
134 | cpu_weight[preferred_cpu]++; |
135 | mutex_unlock(&round_robin_lock); |
136 | |
137 | set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); |
138 | } |
139 | |
140 | static void exit_round_robin(unsigned int tsk_index) |
141 | { |
142 | struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits); |
143 | cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus); |
144 | tsk_in_cpu[tsk_index] = -1; |
145 | } |
146 | |
147 | static unsigned int idle_pct = 5; /* percentage */ |
148 | static unsigned int round_robin_time = 1; /* second */ |
149 | static int power_saving_thread(void *data) |
150 | { |
151 | struct sched_param param = {.sched_priority = 1}; |
152 | int do_sleep; |
153 | unsigned int tsk_index = (unsigned long)data; |
154 | u64 last_jiffies = 0; |
155 | |
156 | sched_setscheduler(current, SCHED_RR, ¶m); |
157 | |
158 | while (!kthread_should_stop()) { |
159 | int cpu; |
160 | u64 expire_time; |
161 | |
162 | try_to_freeze(); |
163 | |
164 | /* round robin to cpus */ |
165 | if (last_jiffies + round_robin_time * HZ < jiffies) { |
166 | last_jiffies = jiffies; |
167 | round_robin_cpu(tsk_index); |
168 | } |
169 | |
170 | do_sleep = 0; |
171 | |
172 | expire_time = jiffies + HZ * (100 - idle_pct) / 100; |
173 | |
174 | while (!need_resched()) { |
175 | if (tsc_detected_unstable && !tsc_marked_unstable) { |
176 | /* TSC could halt in idle, so notify users */ |
177 | mark_tsc_unstable("TSC halts in idle"); |
178 | tsc_marked_unstable = 1; |
179 | } |
180 | if (lapic_detected_unstable && !lapic_marked_unstable) { |
181 | int i; |
182 | /* LAPIC could halt in idle, so notify users */ |
183 | for_each_online_cpu(i) |
184 | clockevents_notify( |
185 | CLOCK_EVT_NOTIFY_BROADCAST_ON, |
186 | &i); |
187 | lapic_marked_unstable = 1; |
188 | } |
189 | local_irq_disable(); |
190 | cpu = smp_processor_id(); |
191 | if (lapic_marked_unstable) |
192 | clockevents_notify( |
193 | CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); |
194 | stop_critical_timings(); |
195 | |
196 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
197 | smp_mb(); |
198 | if (!need_resched()) |
199 | __mwait(power_saving_mwait_eax, 1); |
200 | |
201 | start_critical_timings(); |
202 | if (lapic_marked_unstable) |
203 | clockevents_notify( |
204 | CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); |
205 | local_irq_enable(); |
206 | |
207 | if (jiffies > expire_time) { |
208 | do_sleep = 1; |
209 | break; |
210 | } |
211 | } |
212 | |
213 | /* |
214 | * current sched_rt has threshold for rt task running time. |
215 | * When a rt task uses 95% CPU time, the rt thread will be |
216 | * scheduled out for 5% CPU time to not starve other tasks. But |
217 | * the mechanism only works when all CPUs have RT task running, |
218 | * as if one CPU hasn't RT task, RT task from other CPUs will |
219 | * borrow CPU time from this CPU and cause RT task use > 95% |
220 | * CPU time. To make 'avoid starvation' work, takes a nap here. |
221 | */ |
222 | if (do_sleep) |
223 | schedule_timeout_killable(HZ * idle_pct / 100); |
224 | } |
225 | |
226 | exit_round_robin(tsk_index); |
227 | return 0; |
228 | } |
229 | |
230 | static struct task_struct *ps_tsks[NR_CPUS]; |
231 | static unsigned int ps_tsk_num; |
232 | static int create_power_saving_task(void) |
233 | { |
234 | int rc = -ENOMEM; |
235 | |
236 | ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread, |
237 | (void *)(unsigned long)ps_tsk_num, |
238 | "acpi_pad/%d", ps_tsk_num); |
239 | rc = IS_ERR(ps_tsks[ps_tsk_num]) ? PTR_ERR(ps_tsks[ps_tsk_num]) : 0; |
240 | if (!rc) |
241 | ps_tsk_num++; |
242 | else |
243 | ps_tsks[ps_tsk_num] = NULL; |
244 | |
245 | return rc; |
246 | } |
247 | |
248 | static void destroy_power_saving_task(void) |
249 | { |
250 | if (ps_tsk_num > 0) { |
251 | ps_tsk_num--; |
252 | kthread_stop(ps_tsks[ps_tsk_num]); |
253 | ps_tsks[ps_tsk_num] = NULL; |
254 | } |
255 | } |
256 | |
257 | static void set_power_saving_task_num(unsigned int num) |
258 | { |
259 | if (num > ps_tsk_num) { |
260 | while (ps_tsk_num < num) { |
261 | if (create_power_saving_task()) |
262 | return; |
263 | } |
264 | } else if (num < ps_tsk_num) { |
265 | while (ps_tsk_num > num) |
266 | destroy_power_saving_task(); |
267 | } |
268 | } |
269 | |
270 | static void acpi_pad_idle_cpus(unsigned int num_cpus) |
271 | { |
272 | get_online_cpus(); |
273 | |
274 | num_cpus = min_t(unsigned int, num_cpus, num_online_cpus()); |
275 | set_power_saving_task_num(num_cpus); |
276 | |
277 | put_online_cpus(); |
278 | } |
279 | |
280 | static uint32_t acpi_pad_idle_cpus_num(void) |
281 | { |
282 | return ps_tsk_num; |
283 | } |
284 | |
285 | static ssize_t acpi_pad_rrtime_store(struct device *dev, |
286 | struct device_attribute *attr, const char *buf, size_t count) |
287 | { |
288 | unsigned long num; |
289 | if (kstrtoul(buf, 0, &num)) |
290 | return -EINVAL; |
291 | if (num < 1 || num >= 100) |
292 | return -EINVAL; |
293 | mutex_lock(&isolated_cpus_lock); |
294 | round_robin_time = num; |
295 | mutex_unlock(&isolated_cpus_lock); |
296 | return count; |
297 | } |
298 | |
299 | static ssize_t acpi_pad_rrtime_show(struct device *dev, |
300 | struct device_attribute *attr, char *buf) |
301 | { |
302 | return scnprintf(buf, PAGE_SIZE, "%d\n", round_robin_time); |
303 | } |
304 | static DEVICE_ATTR(rrtime, S_IRUGO|S_IWUSR, |
305 | acpi_pad_rrtime_show, |
306 | acpi_pad_rrtime_store); |
307 | |
308 | static ssize_t acpi_pad_idlepct_store(struct device *dev, |
309 | struct device_attribute *attr, const char *buf, size_t count) |
310 | { |
311 | unsigned long num; |
312 | if (kstrtoul(buf, 0, &num)) |
313 | return -EINVAL; |
314 | if (num < 1 || num >= 100) |
315 | return -EINVAL; |
316 | mutex_lock(&isolated_cpus_lock); |
317 | idle_pct = num; |
318 | mutex_unlock(&isolated_cpus_lock); |
319 | return count; |
320 | } |
321 | |
322 | static ssize_t acpi_pad_idlepct_show(struct device *dev, |
323 | struct device_attribute *attr, char *buf) |
324 | { |
325 | return scnprintf(buf, PAGE_SIZE, "%d\n", idle_pct); |
326 | } |
327 | static DEVICE_ATTR(idlepct, S_IRUGO|S_IWUSR, |
328 | acpi_pad_idlepct_show, |
329 | acpi_pad_idlepct_store); |
330 | |
331 | static ssize_t acpi_pad_idlecpus_store(struct device *dev, |
332 | struct device_attribute *attr, const char *buf, size_t count) |
333 | { |
334 | unsigned long num; |
335 | if (kstrtoul(buf, 0, &num)) |
336 | return -EINVAL; |
337 | mutex_lock(&isolated_cpus_lock); |
338 | acpi_pad_idle_cpus(num); |
339 | mutex_unlock(&isolated_cpus_lock); |
340 | return count; |
341 | } |
342 | |
343 | static ssize_t acpi_pad_idlecpus_show(struct device *dev, |
344 | struct device_attribute *attr, char *buf) |
345 | { |
346 | int n = 0; |
347 | n = cpumask_scnprintf(buf, PAGE_SIZE-2, to_cpumask(pad_busy_cpus_bits)); |
348 | buf[n++] = '\n'; |
349 | buf[n] = '\0'; |
350 | return n; |
351 | } |
352 | static DEVICE_ATTR(idlecpus, S_IRUGO|S_IWUSR, |
353 | acpi_pad_idlecpus_show, |
354 | acpi_pad_idlecpus_store); |
355 | |
356 | static int acpi_pad_add_sysfs(struct acpi_device *device) |
357 | { |
358 | int result; |
359 | |
360 | result = device_create_file(&device->dev, &dev_attr_idlecpus); |
361 | if (result) |
362 | return -ENODEV; |
363 | result = device_create_file(&device->dev, &dev_attr_idlepct); |
364 | if (result) { |
365 | device_remove_file(&device->dev, &dev_attr_idlecpus); |
366 | return -ENODEV; |
367 | } |
368 | result = device_create_file(&device->dev, &dev_attr_rrtime); |
369 | if (result) { |
370 | device_remove_file(&device->dev, &dev_attr_idlecpus); |
371 | device_remove_file(&device->dev, &dev_attr_idlepct); |
372 | return -ENODEV; |
373 | } |
374 | return 0; |
375 | } |
376 | |
377 | static void acpi_pad_remove_sysfs(struct acpi_device *device) |
378 | { |
379 | device_remove_file(&device->dev, &dev_attr_idlecpus); |
380 | device_remove_file(&device->dev, &dev_attr_idlepct); |
381 | device_remove_file(&device->dev, &dev_attr_rrtime); |
382 | } |
383 | |
384 | /* |
385 | * Query firmware how many CPUs should be idle |
386 | * return -1 on failure |
387 | */ |
388 | static int acpi_pad_pur(acpi_handle handle) |
389 | { |
390 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; |
391 | union acpi_object *package; |
392 | int num = -1; |
393 | |
394 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer))) |
395 | return num; |
396 | |
397 | if (!buffer.length || !buffer.pointer) |
398 | return num; |
399 | |
400 | package = buffer.pointer; |
401 | |
402 | if (package->type == ACPI_TYPE_PACKAGE && |
403 | package->package.count == 2 && |
404 | package->package.elements[0].integer.value == 1) /* rev 1 */ |
405 | |
406 | num = package->package.elements[1].integer.value; |
407 | |
408 | kfree(buffer.pointer); |
409 | return num; |
410 | } |
411 | |
412 | /* Notify firmware how many CPUs are idle */ |
413 | static void acpi_pad_ost(acpi_handle handle, int stat, |
414 | uint32_t idle_cpus) |
415 | { |
416 | union acpi_object params[3] = { |
417 | {.type = ACPI_TYPE_INTEGER,}, |
418 | {.type = ACPI_TYPE_INTEGER,}, |
419 | {.type = ACPI_TYPE_BUFFER,}, |
420 | }; |
421 | struct acpi_object_list arg_list = {3, params}; |
422 | |
423 | params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY; |
424 | params[1].integer.value = stat; |
425 | params[2].buffer.length = 4; |
426 | params[2].buffer.pointer = (void *)&idle_cpus; |
427 | acpi_evaluate_object(handle, "_OST", &arg_list, NULL); |
428 | } |
429 | |
430 | static void acpi_pad_handle_notify(acpi_handle handle) |
431 | { |
432 | int num_cpus; |
433 | uint32_t idle_cpus; |
434 | |
435 | mutex_lock(&isolated_cpus_lock); |
436 | num_cpus = acpi_pad_pur(handle); |
437 | if (num_cpus < 0) { |
438 | mutex_unlock(&isolated_cpus_lock); |
439 | return; |
440 | } |
441 | acpi_pad_idle_cpus(num_cpus); |
442 | idle_cpus = acpi_pad_idle_cpus_num(); |
443 | acpi_pad_ost(handle, 0, idle_cpus); |
444 | mutex_unlock(&isolated_cpus_lock); |
445 | } |
446 | |
447 | static void acpi_pad_notify(acpi_handle handle, u32 event, |
448 | void *data) |
449 | { |
450 | struct acpi_device *device = data; |
451 | |
452 | switch (event) { |
453 | case ACPI_PROCESSOR_AGGREGATOR_NOTIFY: |
454 | acpi_pad_handle_notify(handle); |
455 | acpi_bus_generate_proc_event(device, event, 0); |
456 | acpi_bus_generate_netlink_event(device->pnp.device_class, |
457 | dev_name(&device->dev), event, 0); |
458 | break; |
459 | default: |
460 | pr_warn("Unsupported event [0x%x]\n", event); |
461 | break; |
462 | } |
463 | } |
464 | |
465 | static int acpi_pad_add(struct acpi_device *device) |
466 | { |
467 | acpi_status status; |
468 | |
469 | strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME); |
470 | strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS); |
471 | |
472 | if (acpi_pad_add_sysfs(device)) |
473 | return -ENODEV; |
474 | |
475 | status = acpi_install_notify_handler(device->handle, |
476 | ACPI_DEVICE_NOTIFY, acpi_pad_notify, device); |
477 | if (ACPI_FAILURE(status)) { |
478 | acpi_pad_remove_sysfs(device); |
479 | return -ENODEV; |
480 | } |
481 | |
482 | return 0; |
483 | } |
484 | |
485 | static int acpi_pad_remove(struct acpi_device *device) |
486 | { |
487 | mutex_lock(&isolated_cpus_lock); |
488 | acpi_pad_idle_cpus(0); |
489 | mutex_unlock(&isolated_cpus_lock); |
490 | |
491 | acpi_remove_notify_handler(device->handle, |
492 | ACPI_DEVICE_NOTIFY, acpi_pad_notify); |
493 | acpi_pad_remove_sysfs(device); |
494 | return 0; |
495 | } |
496 | |
497 | static const struct acpi_device_id pad_device_ids[] = { |
498 | {"ACPI000C", 0}, |
499 | {"", 0}, |
500 | }; |
501 | MODULE_DEVICE_TABLE(acpi, pad_device_ids); |
502 | |
503 | static struct acpi_driver acpi_pad_driver = { |
504 | .name = "processor_aggregator", |
505 | .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, |
506 | .ids = pad_device_ids, |
507 | .ops = { |
508 | .add = acpi_pad_add, |
509 | .remove = acpi_pad_remove, |
510 | }, |
511 | }; |
512 | |
513 | static int __init acpi_pad_init(void) |
514 | { |
515 | power_saving_mwait_init(); |
516 | if (power_saving_mwait_eax == 0) |
517 | return -EINVAL; |
518 | |
519 | return acpi_bus_register_driver(&acpi_pad_driver); |
520 | } |
521 | |
522 | static void __exit acpi_pad_exit(void) |
523 | { |
524 | acpi_bus_unregister_driver(&acpi_pad_driver); |
525 | } |
526 | |
527 | module_init(acpi_pad_init); |
528 | module_exit(acpi_pad_exit); |
529 | MODULE_AUTHOR("Shaohua Li<shaohua.li@intel.com>"); |
530 | MODULE_DESCRIPTION("ACPI Processor Aggregator Driver"); |
531 | MODULE_LICENSE("GPL"); |
532 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9