Root/drivers/thermal/intel_powerclamp.c

1/*
2 * intel_powerclamp.c - package c-state idle injection
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 *
6 * Authors:
7 * Arjan van de Ven <arjan@linux.intel.com>
8 * Jacob Pan <jacob.jun.pan@linux.intel.com>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 *
23 *
24 * TODO:
25 * 1. better handle wakeup from external interrupts, currently a fixed
26 * compensation is added to clamping duration when excessive amount
27 * of wakeups are observed during idle time. the reason is that in
28 * case of external interrupts without need for ack, clamping down
29 * cpu in non-irq context does not reduce irq. for majority of the
30 * cases, clamping down cpu does help reduce irq as well, we should
31 * be able to differenciate the two cases and give a quantitative
32 * solution for the irqs that we can control. perhaps based on
33 * get_cpu_iowait_time_us()
34 *
35 * 2. synchronization with other hw blocks
36 *
37 *
38 */
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/module.h>
43#include <linux/kernel.h>
44#include <linux/delay.h>
45#include <linux/kthread.h>
46#include <linux/freezer.h>
47#include <linux/cpu.h>
48#include <linux/thermal.h>
49#include <linux/slab.h>
50#include <linux/tick.h>
51#include <linux/debugfs.h>
52#include <linux/seq_file.h>
53#include <linux/sched/rt.h>
54
55#include <asm/nmi.h>
56#include <asm/msr.h>
57#include <asm/mwait.h>
58#include <asm/cpu_device_id.h>
59#include <asm/idle.h>
60#include <asm/hardirq.h>
61
62#define MAX_TARGET_RATIO (50U)
63/* For each undisturbed clamping period (no extra wake ups during idle time),
64 * we increment the confidence counter for the given target ratio.
65 * CONFIDENCE_OK defines the level where runtime calibration results are
66 * valid.
67 */
68#define CONFIDENCE_OK (3)
69/* Default idle injection duration, driver adjust sleep time to meet target
70 * idle ratio. Similar to frequency modulation.
71 */
72#define DEFAULT_DURATION_JIFFIES (6)
73
74static unsigned int target_mwait;
75static struct dentry *debug_dir;
76
77/* user selected target */
78static unsigned int set_target_ratio;
79static unsigned int current_ratio;
80static bool should_skip;
81static bool reduce_irq;
82static atomic_t idle_wakeup_counter;
83static unsigned int control_cpu; /* The cpu assigned to collect stat and update
84                  * control parameters. default to BSP but BSP
85                  * can be offlined.
86                  */
87static bool clamping;
88
89
90static struct task_struct * __percpu *powerclamp_thread;
91static struct thermal_cooling_device *cooling_dev;
92static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
93                       * clamping thread
94                       */
95
96static unsigned int duration;
97static unsigned int pkg_cstate_ratio_cur;
98static unsigned int window_size;
99
100static int duration_set(const char *arg, const struct kernel_param *kp)
101{
102    int ret = 0;
103    unsigned long new_duration;
104
105    ret = kstrtoul(arg, 10, &new_duration);
106    if (ret)
107        goto exit;
108    if (new_duration > 25 || new_duration < 6) {
109        pr_err("Out of recommended range %lu, between 6-25ms\n",
110            new_duration);
111        ret = -EINVAL;
112    }
113
114    duration = clamp(new_duration, 6ul, 25ul);
115    smp_mb();
116
117exit:
118
119    return ret;
120}
121
122static struct kernel_param_ops duration_ops = {
123    .set = duration_set,
124    .get = param_get_int,
125};
126
127
128module_param_cb(duration, &duration_ops, &duration, 0644);
129MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
130
131struct powerclamp_calibration_data {
132    unsigned long confidence; /* used for calibration, basically a counter
133                    * gets incremented each time a clamping
134                    * period is completed without extra wakeups
135                    * once that counter is reached given level,
136                    * compensation is deemed usable.
137                    */
138    unsigned long steady_comp; /* steady state compensation used when
139                    * no extra wakeups occurred.
140                    */
141    unsigned long dynamic_comp; /* compensate excessive wakeup from idle
142                     * mostly from external interrupts.
143                     */
144};
145
146static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
147
148static int window_size_set(const char *arg, const struct kernel_param *kp)
149{
150    int ret = 0;
151    unsigned long new_window_size;
152
153    ret = kstrtoul(arg, 10, &new_window_size);
154    if (ret)
155        goto exit_win;
156    if (new_window_size > 10 || new_window_size < 2) {
157        pr_err("Out of recommended window size %lu, between 2-10\n",
158            new_window_size);
159        ret = -EINVAL;
160    }
161
162    window_size = clamp(new_window_size, 2ul, 10ul);
163    smp_mb();
164
165exit_win:
166
167    return ret;
168}
169
170static struct kernel_param_ops window_size_ops = {
171    .set = window_size_set,
172    .get = param_get_int,
173};
174
175module_param_cb(window_size, &window_size_ops, &window_size, 0644);
176MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
177    "\tpowerclamp controls idle ratio within this window. larger\n"
178    "\twindow size results in slower response time but more smooth\n"
179    "\tclamping results. default to 2.");
180
181static void find_target_mwait(void)
182{
183    unsigned int eax, ebx, ecx, edx;
184    unsigned int highest_cstate = 0;
185    unsigned int highest_subcstate = 0;
186    int i;
187
188    if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
189        return;
190
191    cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
192
193    if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
194        !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
195        return;
196
197    edx >>= MWAIT_SUBSTATE_SIZE;
198    for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
199        if (edx & MWAIT_SUBSTATE_MASK) {
200            highest_cstate = i;
201            highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
202        }
203    }
204    target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
205        (highest_subcstate - 1);
206
207}
208
209static u64 pkg_state_counter(void)
210{
211    u64 val;
212    u64 count = 0;
213
214    static bool skip_c2;
215    static bool skip_c3;
216    static bool skip_c6;
217    static bool skip_c7;
218
219    if (!skip_c2) {
220        if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
221            count += val;
222        else
223            skip_c2 = true;
224    }
225
226    if (!skip_c3) {
227        if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
228            count += val;
229        else
230            skip_c3 = true;
231    }
232
233    if (!skip_c6) {
234        if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
235            count += val;
236        else
237            skip_c6 = true;
238    }
239
240    if (!skip_c7) {
241        if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
242            count += val;
243        else
244            skip_c7 = true;
245    }
246
247    return count;
248}
249
250static void noop_timer(unsigned long foo)
251{
252    /* empty... just the fact that we get the interrupt wakes us up */
253}
254
255static unsigned int get_compensation(int ratio)
256{
257    unsigned int comp = 0;
258
259    /* we only use compensation if all adjacent ones are good */
260    if (ratio == 1 &&
261        cal_data[ratio].confidence >= CONFIDENCE_OK &&
262        cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
263        cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
264        comp = (cal_data[ratio].steady_comp +
265            cal_data[ratio + 1].steady_comp +
266            cal_data[ratio + 2].steady_comp) / 3;
267    } else if (ratio == MAX_TARGET_RATIO - 1 &&
268        cal_data[ratio].confidence >= CONFIDENCE_OK &&
269        cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
270        cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
271        comp = (cal_data[ratio].steady_comp +
272            cal_data[ratio - 1].steady_comp +
273            cal_data[ratio - 2].steady_comp) / 3;
274    } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
275        cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
276        cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
277        comp = (cal_data[ratio].steady_comp +
278            cal_data[ratio - 1].steady_comp +
279            cal_data[ratio + 1].steady_comp) / 3;
280    }
281
282    /* REVISIT: simple penalty of double idle injection */
283    if (reduce_irq)
284        comp = ratio;
285    /* do not exceed limit */
286    if (comp + ratio >= MAX_TARGET_RATIO)
287        comp = MAX_TARGET_RATIO - ratio - 1;
288
289    return comp;
290}
291
292static void adjust_compensation(int target_ratio, unsigned int win)
293{
294    int delta;
295    struct powerclamp_calibration_data *d = &cal_data[target_ratio];
296
297    /*
298     * adjust compensations if confidence level has not been reached or
299     * there are too many wakeups during the last idle injection period, we
300     * cannot trust the data for compensation.
301     */
302    if (d->confidence >= CONFIDENCE_OK ||
303        atomic_read(&idle_wakeup_counter) >
304        win * num_online_cpus())
305        return;
306
307    delta = set_target_ratio - current_ratio;
308    /* filter out bad data */
309    if (delta >= 0 && delta <= (1+target_ratio/10)) {
310        if (d->steady_comp)
311            d->steady_comp =
312                roundup(delta+d->steady_comp, 2)/2;
313        else
314            d->steady_comp = delta;
315        d->confidence++;
316    }
317}
318
319static bool powerclamp_adjust_controls(unsigned int target_ratio,
320                unsigned int guard, unsigned int win)
321{
322    static u64 msr_last, tsc_last;
323    u64 msr_now, tsc_now;
324    u64 val64;
325
326    /* check result for the last window */
327    msr_now = pkg_state_counter();
328    rdtscll(tsc_now);
329
330    /* calculate pkg cstate vs tsc ratio */
331    if (!msr_last || !tsc_last)
332        current_ratio = 1;
333    else if (tsc_now-tsc_last) {
334        val64 = 100*(msr_now-msr_last);
335        do_div(val64, (tsc_now-tsc_last));
336        current_ratio = val64;
337    }
338
339    /* update record */
340    msr_last = msr_now;
341    tsc_last = tsc_now;
342
343    adjust_compensation(target_ratio, win);
344    /*
345     * too many external interrupts, set flag such
346     * that we can take measure later.
347     */
348    reduce_irq = atomic_read(&idle_wakeup_counter) >=
349        2 * win * num_online_cpus();
350
351    atomic_set(&idle_wakeup_counter, 0);
352    /* if we are above target+guard, skip */
353    return set_target_ratio + guard <= current_ratio;
354}
355
356static int clamp_thread(void *arg)
357{
358    int cpunr = (unsigned long)arg;
359    DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
360    static const struct sched_param param = {
361        .sched_priority = MAX_USER_RT_PRIO/2,
362    };
363    unsigned int count = 0;
364    unsigned int target_ratio;
365
366    set_bit(cpunr, cpu_clamping_mask);
367    set_freezable();
368    init_timer_on_stack(&wakeup_timer);
369    sched_setscheduler(current, SCHED_FIFO, &param);
370
371    while (true == clamping && !kthread_should_stop() &&
372        cpu_online(cpunr)) {
373        int sleeptime;
374        unsigned long target_jiffies;
375        unsigned int guard;
376        unsigned int compensation = 0;
377        int interval; /* jiffies to sleep for each attempt */
378        unsigned int duration_jiffies = msecs_to_jiffies(duration);
379        unsigned int window_size_now;
380
381        try_to_freeze();
382        /*
383         * make sure user selected ratio does not take effect until
384         * the next round. adjust target_ratio if user has changed
385         * target such that we can converge quickly.
386         */
387        target_ratio = set_target_ratio;
388        guard = 1 + target_ratio/20;
389        window_size_now = window_size;
390        count++;
391
392        /*
393         * systems may have different ability to enter package level
394         * c-states, thus we need to compensate the injected idle ratio
395         * to achieve the actual target reported by the HW.
396         */
397        compensation = get_compensation(target_ratio);
398        interval = duration_jiffies*100/(target_ratio+compensation);
399
400        /* align idle time */
401        target_jiffies = roundup(jiffies, interval);
402        sleeptime = target_jiffies - jiffies;
403        if (sleeptime <= 0)
404            sleeptime = 1;
405        schedule_timeout_interruptible(sleeptime);
406        /*
407         * only elected controlling cpu can collect stats and update
408         * control parameters.
409         */
410        if (cpunr == control_cpu && !(count%window_size_now)) {
411            should_skip =
412                powerclamp_adjust_controls(target_ratio,
413                            guard, window_size_now);
414            smp_mb();
415        }
416
417        if (should_skip)
418            continue;
419
420        target_jiffies = jiffies + duration_jiffies;
421        mod_timer(&wakeup_timer, target_jiffies);
422        if (unlikely(local_softirq_pending()))
423            continue;
424        /*
425         * stop tick sched during idle time, interrupts are still
426         * allowed. thus jiffies are updated properly.
427         */
428        preempt_disable();
429        tick_nohz_idle_enter();
430        /* mwait until target jiffies is reached */
431        while (time_before(jiffies, target_jiffies)) {
432            unsigned long ecx = 1;
433            unsigned long eax = target_mwait;
434
435            /*
436             * REVISIT: may call enter_idle() to notify drivers who
437             * can save power during cpu idle. same for exit_idle()
438             */
439            local_touch_nmi();
440            stop_critical_timings();
441            __monitor((void *)&current_thread_info()->flags, 0, 0);
442            cpu_relax(); /* allow HT sibling to run */
443            __mwait(eax, ecx);
444            start_critical_timings();
445            atomic_inc(&idle_wakeup_counter);
446        }
447        tick_nohz_idle_exit();
448        preempt_enable_no_resched();
449    }
450    del_timer_sync(&wakeup_timer);
451    clear_bit(cpunr, cpu_clamping_mask);
452
453    return 0;
454}
455
456/*
457 * 1 HZ polling while clamping is active, useful for userspace
458 * to monitor actual idle ratio.
459 */
460static void poll_pkg_cstate(struct work_struct *dummy);
461static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
462static void poll_pkg_cstate(struct work_struct *dummy)
463{
464    static u64 msr_last;
465    static u64 tsc_last;
466    static unsigned long jiffies_last;
467
468    u64 msr_now;
469    unsigned long jiffies_now;
470    u64 tsc_now;
471    u64 val64;
472
473    msr_now = pkg_state_counter();
474    rdtscll(tsc_now);
475    jiffies_now = jiffies;
476
477    /* calculate pkg cstate vs tsc ratio */
478    if (!msr_last || !tsc_last)
479        pkg_cstate_ratio_cur = 1;
480    else {
481        if (tsc_now - tsc_last) {
482            val64 = 100 * (msr_now - msr_last);
483            do_div(val64, (tsc_now - tsc_last));
484            pkg_cstate_ratio_cur = val64;
485        }
486    }
487
488    /* update record */
489    msr_last = msr_now;
490    jiffies_last = jiffies_now;
491    tsc_last = tsc_now;
492
493    if (true == clamping)
494        schedule_delayed_work(&poll_pkg_cstate_work, HZ);
495}
496
497static int start_power_clamp(void)
498{
499    unsigned long cpu;
500    struct task_struct *thread;
501
502    /* check if pkg cstate counter is completely 0, abort in this case */
503    if (!pkg_state_counter()) {
504        pr_err("pkg cstate counter not functional, abort\n");
505        return -EINVAL;
506    }
507
508    set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
509    /* prevent cpu hotplug */
510    get_online_cpus();
511
512    /* prefer BSP */
513    control_cpu = 0;
514    if (!cpu_online(control_cpu))
515        control_cpu = smp_processor_id();
516
517    clamping = true;
518    schedule_delayed_work(&poll_pkg_cstate_work, 0);
519
520    /* start one thread per online cpu */
521    for_each_online_cpu(cpu) {
522        struct task_struct **p =
523            per_cpu_ptr(powerclamp_thread, cpu);
524
525        thread = kthread_create_on_node(clamp_thread,
526                        (void *) cpu,
527                        cpu_to_node(cpu),
528                        "kidle_inject/%ld", cpu);
529        /* bind to cpu here */
530        if (likely(!IS_ERR(thread))) {
531            kthread_bind(thread, cpu);
532            wake_up_process(thread);
533            *p = thread;
534        }
535
536    }
537    put_online_cpus();
538
539    return 0;
540}
541
542static void end_power_clamp(void)
543{
544    int i;
545    struct task_struct *thread;
546
547    clamping = false;
548    /*
549     * make clamping visible to other cpus and give per cpu clamping threads
550     * sometime to exit, or gets killed later.
551     */
552    smp_mb();
553    msleep(20);
554    if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
555        for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
556            pr_debug("clamping thread for cpu %d alive, kill\n", i);
557            thread = *per_cpu_ptr(powerclamp_thread, i);
558            kthread_stop(thread);
559        }
560    }
561}
562
563static int powerclamp_cpu_callback(struct notifier_block *nfb,
564                unsigned long action, void *hcpu)
565{
566    unsigned long cpu = (unsigned long)hcpu;
567    struct task_struct *thread;
568    struct task_struct **percpu_thread =
569        per_cpu_ptr(powerclamp_thread, cpu);
570
571    if (false == clamping)
572        goto exit_ok;
573
574    switch (action) {
575    case CPU_ONLINE:
576        thread = kthread_create_on_node(clamp_thread,
577                        (void *) cpu,
578                        cpu_to_node(cpu),
579                        "kidle_inject/%lu", cpu);
580        if (likely(!IS_ERR(thread))) {
581            kthread_bind(thread, cpu);
582            wake_up_process(thread);
583            *percpu_thread = thread;
584        }
585        /* prefer BSP as controlling CPU */
586        if (cpu == 0) {
587            control_cpu = 0;
588            smp_mb();
589        }
590        break;
591    case CPU_DEAD:
592        if (test_bit(cpu, cpu_clamping_mask)) {
593            pr_err("cpu %lu dead but powerclamping thread is not\n",
594                cpu);
595            kthread_stop(*percpu_thread);
596        }
597        if (cpu == control_cpu) {
598            control_cpu = smp_processor_id();
599            smp_mb();
600        }
601    }
602
603exit_ok:
604    return NOTIFY_OK;
605}
606
607static struct notifier_block powerclamp_cpu_notifier = {
608    .notifier_call = powerclamp_cpu_callback,
609};
610
611static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
612                 unsigned long *state)
613{
614    *state = MAX_TARGET_RATIO;
615
616    return 0;
617}
618
619static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
620                 unsigned long *state)
621{
622    if (true == clamping)
623        *state = pkg_cstate_ratio_cur;
624    else
625        /* to save power, do not poll idle ratio while not clamping */
626        *state = -1; /* indicates invalid state */
627
628    return 0;
629}
630
631static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
632                 unsigned long new_target_ratio)
633{
634    int ret = 0;
635
636    new_target_ratio = clamp(new_target_ratio, 0UL,
637                (unsigned long) (MAX_TARGET_RATIO-1));
638    if (set_target_ratio == 0 && new_target_ratio > 0) {
639        pr_info("Start idle injection to reduce power\n");
640        set_target_ratio = new_target_ratio;
641        ret = start_power_clamp();
642        goto exit_set;
643    } else if (set_target_ratio > 0 && new_target_ratio == 0) {
644        pr_info("Stop forced idle injection\n");
645        set_target_ratio = 0;
646        end_power_clamp();
647    } else /* adjust currently running */ {
648        set_target_ratio = new_target_ratio;
649        /* make new set_target_ratio visible to other cpus */
650        smp_mb();
651    }
652
653exit_set:
654    return ret;
655}
656
657/* bind to generic thermal layer as cooling device*/
658static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
659    .get_max_state = powerclamp_get_max_state,
660    .get_cur_state = powerclamp_get_cur_state,
661    .set_cur_state = powerclamp_set_cur_state,
662};
663
664/* runs on Nehalem and later */
665static const struct x86_cpu_id intel_powerclamp_ids[] = {
666    { X86_VENDOR_INTEL, 6, 0x1a},
667    { X86_VENDOR_INTEL, 6, 0x1c},
668    { X86_VENDOR_INTEL, 6, 0x1e},
669    { X86_VENDOR_INTEL, 6, 0x1f},
670    { X86_VENDOR_INTEL, 6, 0x25},
671    { X86_VENDOR_INTEL, 6, 0x26},
672    { X86_VENDOR_INTEL, 6, 0x2a},
673    { X86_VENDOR_INTEL, 6, 0x2c},
674    { X86_VENDOR_INTEL, 6, 0x2d},
675    { X86_VENDOR_INTEL, 6, 0x2e},
676    { X86_VENDOR_INTEL, 6, 0x2f},
677    { X86_VENDOR_INTEL, 6, 0x3a},
678    {}
679};
680MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
681
682static int powerclamp_probe(void)
683{
684    if (!x86_match_cpu(intel_powerclamp_ids)) {
685        pr_err("Intel powerclamp does not run on family %d model %d\n",
686                boot_cpu_data.x86, boot_cpu_data.x86_model);
687        return -ENODEV;
688    }
689    if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
690        !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
691        !boot_cpu_has(X86_FEATURE_MWAIT) ||
692        !boot_cpu_has(X86_FEATURE_ARAT))
693        return -ENODEV;
694
695    /* find the deepest mwait value */
696    find_target_mwait();
697
698    return 0;
699}
700
701static int powerclamp_debug_show(struct seq_file *m, void *unused)
702{
703    int i = 0;
704
705    seq_printf(m, "controlling cpu: %d\n", control_cpu);
706    seq_printf(m, "pct confidence steady dynamic (compensation)\n");
707    for (i = 0; i < MAX_TARGET_RATIO; i++) {
708        seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
709            i,
710            cal_data[i].confidence,
711            cal_data[i].steady_comp,
712            cal_data[i].dynamic_comp);
713    }
714
715    return 0;
716}
717
718static int powerclamp_debug_open(struct inode *inode,
719            struct file *file)
720{
721    return single_open(file, powerclamp_debug_show, inode->i_private);
722}
723
724static const struct file_operations powerclamp_debug_fops = {
725    .open = powerclamp_debug_open,
726    .read = seq_read,
727    .llseek = seq_lseek,
728    .release = single_release,
729    .owner = THIS_MODULE,
730};
731
732static inline void powerclamp_create_debug_files(void)
733{
734    debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
735    if (!debug_dir)
736        return;
737
738    if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
739                    cal_data, &powerclamp_debug_fops))
740        goto file_error;
741
742    return;
743
744file_error:
745    debugfs_remove_recursive(debug_dir);
746}
747
748static int powerclamp_init(void)
749{
750    int retval;
751    int bitmap_size;
752
753    bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
754    cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
755    if (!cpu_clamping_mask)
756        return -ENOMEM;
757
758    /* probe cpu features and ids here */
759    retval = powerclamp_probe();
760    if (retval)
761        return retval;
762    /* set default limit, maybe adjusted during runtime based on feedback */
763    window_size = 2;
764    register_hotcpu_notifier(&powerclamp_cpu_notifier);
765    powerclamp_thread = alloc_percpu(struct task_struct *);
766    cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
767                        &powerclamp_cooling_ops);
768    if (IS_ERR(cooling_dev))
769        return -ENODEV;
770
771    if (!duration)
772        duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
773    powerclamp_create_debug_files();
774
775    return 0;
776}
777module_init(powerclamp_init);
778
779static void powerclamp_exit(void)
780{
781    unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
782    end_power_clamp();
783    free_percpu(powerclamp_thread);
784    thermal_cooling_device_unregister(cooling_dev);
785    kfree(cpu_clamping_mask);
786
787    cancel_delayed_work_sync(&poll_pkg_cstate_work);
788    debugfs_remove_recursive(debug_dir);
789}
790module_exit(powerclamp_exit);
791
792MODULE_LICENSE("GPL");
793MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
794MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
795MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
796

Archive Download this file



interactive