Root/drivers/cpufreq/powernow-k8.c

1/*
2 * (c) 2003-2012 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Maintainer:
8 * Andreas Herrmann <andreas.herrmann3@amd.com>
9 *
10 * Based on the powernow-k7.c module written by Dave Jones.
11 * (C) 2003 Dave Jones on behalf of SuSE Labs
12 * (C) 2004 Dominik Brodowski <linux@brodo.de>
13 * (C) 2004 Pavel Machek <pavel@ucw.cz>
14 * Licensed under the terms of the GNU GPL License version 2.
15 * Based upon datasheets & sample CPUs kindly provided by AMD.
16 *
17 * Valuable input gratefully received from Dave Jones, Pavel Machek,
18 * Dominik Brodowski, Jacob Shin, and others.
19 * Originally developed by Paul Devriendt.
20 *
21 * Processor information obtained from Chapter 9 (Power and Thermal
22 * Management) of the "BIOS and Kernel Developer's Guide (BKDG) for
23 * the AMD Athlon 64 and AMD Opteron Processors" and section "2.x
24 * Power Management" in BKDGs for newer AMD CPU families.
25 *
26 * Tables for specific CPUs can be inferred from AMD's processor
27 * power and thermal data sheets, (e.g. 30417.pdf, 30430.pdf, 43375.pdf)
28 */
29
30#include <linux/kernel.h>
31#include <linux/smp.h>
32#include <linux/module.h>
33#include <linux/init.h>
34#include <linux/cpufreq.h>
35#include <linux/slab.h>
36#include <linux/string.h>
37#include <linux/cpumask.h>
38#include <linux/io.h>
39#include <linux/delay.h>
40
41#include <asm/msr.h>
42#include <asm/cpu_device_id.h>
43
44#include <linux/acpi.h>
45#include <linux/mutex.h>
46#include <acpi/processor.h>
47
48#define PFX "powernow-k8: "
49#define VERSION "version 2.20.00"
50#include "powernow-k8.h"
51#include "mperf.h"
52
53/* serialize freq changes */
54static DEFINE_MUTEX(fidvid_mutex);
55
56static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
57
58static int cpu_family = CPU_OPTERON;
59
60/* array to map SW pstate number to acpi state */
61static u32 ps_to_as[8];
62
63/* core performance boost */
64static bool cpb_capable, cpb_enabled;
65static struct msr __percpu *msrs;
66
67static struct cpufreq_driver cpufreq_amd64_driver;
68
69#ifndef CONFIG_SMP
70static inline const struct cpumask *cpu_core_mask(int cpu)
71{
72    return cpumask_of(0);
73}
74#endif
75
76/* Return a frequency in MHz, given an input fid */
77static u32 find_freq_from_fid(u32 fid)
78{
79    return 800 + (fid * 100);
80}
81
82/* Return a frequency in KHz, given an input fid */
83static u32 find_khz_freq_from_fid(u32 fid)
84{
85    return 1000 * find_freq_from_fid(fid);
86}
87
88static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
89                     u32 pstate)
90{
91    return data[ps_to_as[pstate]].frequency;
92}
93
94/* Return the vco fid for an input fid
95 *
96 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
97 * only from corresponding high fids. This returns "high" fid corresponding to
98 * "low" one.
99 */
100static u32 convert_fid_to_vco_fid(u32 fid)
101{
102    if (fid < HI_FID_TABLE_BOTTOM)
103        return 8 + (2 * fid);
104    else
105        return fid;
106}
107
108/*
109 * Return 1 if the pending bit is set. Unless we just instructed the processor
110 * to transition to a new state, seeing this bit set is really bad news.
111 */
112static int pending_bit_stuck(void)
113{
114    u32 lo, hi;
115
116    if (cpu_family == CPU_HW_PSTATE)
117        return 0;
118
119    rdmsr(MSR_FIDVID_STATUS, lo, hi);
120    return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
121}
122
123/*
124 * Update the global current fid / vid values from the status msr.
125 * Returns 1 on error.
126 */
127static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
128{
129    u32 lo, hi;
130    u32 i = 0;
131
132    if (cpu_family == CPU_HW_PSTATE) {
133        rdmsr(MSR_PSTATE_STATUS, lo, hi);
134        i = lo & HW_PSTATE_MASK;
135        data->currpstate = i;
136
137        /*
138         * a workaround for family 11h erratum 311 might cause
139         * an "out-of-range Pstate if the core is in Pstate-0
140         */
141        if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
142            data->currpstate = HW_PSTATE_0;
143
144        return 0;
145    }
146    do {
147        if (i++ > 10000) {
148            pr_debug("detected change pending stuck\n");
149            return 1;
150        }
151        rdmsr(MSR_FIDVID_STATUS, lo, hi);
152    } while (lo & MSR_S_LO_CHANGE_PENDING);
153
154    data->currvid = hi & MSR_S_HI_CURRENT_VID;
155    data->currfid = lo & MSR_S_LO_CURRENT_FID;
156
157    return 0;
158}
159
160/* the isochronous relief time */
161static void count_off_irt(struct powernow_k8_data *data)
162{
163    udelay((1 << data->irt) * 10);
164    return;
165}
166
167/* the voltage stabilization time */
168static void count_off_vst(struct powernow_k8_data *data)
169{
170    udelay(data->vstable * VST_UNITS_20US);
171    return;
172}
173
174/* need to init the control msr to a safe value (for each cpu) */
175static void fidvid_msr_init(void)
176{
177    u32 lo, hi;
178    u8 fid, vid;
179
180    rdmsr(MSR_FIDVID_STATUS, lo, hi);
181    vid = hi & MSR_S_HI_CURRENT_VID;
182    fid = lo & MSR_S_LO_CURRENT_FID;
183    lo = fid | (vid << MSR_C_LO_VID_SHIFT);
184    hi = MSR_C_HI_STP_GNT_BENIGN;
185    pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
186    wrmsr(MSR_FIDVID_CTL, lo, hi);
187}
188
189/* write the new fid value along with the other control fields to the msr */
190static int write_new_fid(struct powernow_k8_data *data, u32 fid)
191{
192    u32 lo;
193    u32 savevid = data->currvid;
194    u32 i = 0;
195
196    if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
197        printk(KERN_ERR PFX "internal error - overflow on fid write\n");
198        return 1;
199    }
200
201    lo = fid;
202    lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
203    lo |= MSR_C_LO_INIT_FID_VID;
204
205    pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
206        fid, lo, data->plllock * PLL_LOCK_CONVERSION);
207
208    do {
209        wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
210        if (i++ > 100) {
211            printk(KERN_ERR PFX
212                "Hardware error - pending bit very stuck - "
213                "no further pstate changes possible\n");
214            return 1;
215        }
216    } while (query_current_values_with_pending_wait(data));
217
218    count_off_irt(data);
219
220    if (savevid != data->currvid) {
221        printk(KERN_ERR PFX
222            "vid change on fid trans, old 0x%x, new 0x%x\n",
223            savevid, data->currvid);
224        return 1;
225    }
226
227    if (fid != data->currfid) {
228        printk(KERN_ERR PFX
229            "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
230            data->currfid);
231        return 1;
232    }
233
234    return 0;
235}
236
237/* Write a new vid to the hardware */
238static int write_new_vid(struct powernow_k8_data *data, u32 vid)
239{
240    u32 lo;
241    u32 savefid = data->currfid;
242    int i = 0;
243
244    if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
245        printk(KERN_ERR PFX "internal error - overflow on vid write\n");
246        return 1;
247    }
248
249    lo = data->currfid;
250    lo |= (vid << MSR_C_LO_VID_SHIFT);
251    lo |= MSR_C_LO_INIT_FID_VID;
252
253    pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
254        vid, lo, STOP_GRANT_5NS);
255
256    do {
257        wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
258        if (i++ > 100) {
259            printk(KERN_ERR PFX "internal error - pending bit "
260                    "very stuck - no further pstate "
261                    "changes possible\n");
262            return 1;
263        }
264    } while (query_current_values_with_pending_wait(data));
265
266    if (savefid != data->currfid) {
267        printk(KERN_ERR PFX "fid changed on vid trans, old "
268            "0x%x new 0x%x\n",
269               savefid, data->currfid);
270        return 1;
271    }
272
273    if (vid != data->currvid) {
274        printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
275                "curr 0x%x\n",
276                vid, data->currvid);
277        return 1;
278    }
279
280    return 0;
281}
282
283/*
284 * Reduce the vid by the max of step or reqvid.
285 * Decreasing vid codes represent increasing voltages:
286 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
287 */
288static int decrease_vid_code_by_step(struct powernow_k8_data *data,
289        u32 reqvid, u32 step)
290{
291    if ((data->currvid - reqvid) > step)
292        reqvid = data->currvid - step;
293
294    if (write_new_vid(data, reqvid))
295        return 1;
296
297    count_off_vst(data);
298
299    return 0;
300}
301
302/* Change hardware pstate by single MSR write */
303static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
304{
305    wrmsr(MSR_PSTATE_CTRL, pstate, 0);
306    data->currpstate = pstate;
307    return 0;
308}
309
310/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
311static int transition_fid_vid(struct powernow_k8_data *data,
312        u32 reqfid, u32 reqvid)
313{
314    if (core_voltage_pre_transition(data, reqvid, reqfid))
315        return 1;
316
317    if (core_frequency_transition(data, reqfid))
318        return 1;
319
320    if (core_voltage_post_transition(data, reqvid))
321        return 1;
322
323    if (query_current_values_with_pending_wait(data))
324        return 1;
325
326    if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
327        printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
328                "curr 0x%x 0x%x\n",
329                smp_processor_id(),
330                reqfid, reqvid, data->currfid, data->currvid);
331        return 1;
332    }
333
334    pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
335        smp_processor_id(), data->currfid, data->currvid);
336
337    return 0;
338}
339
340/* Phase 1 - core voltage transition ... setup voltage */
341static int core_voltage_pre_transition(struct powernow_k8_data *data,
342        u32 reqvid, u32 reqfid)
343{
344    u32 rvosteps = data->rvo;
345    u32 savefid = data->currfid;
346    u32 maxvid, lo, rvomult = 1;
347
348    pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
349        "reqvid 0x%x, rvo 0x%x\n",
350        smp_processor_id(),
351        data->currfid, data->currvid, reqvid, data->rvo);
352
353    if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
354        rvomult = 2;
355    rvosteps *= rvomult;
356    rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
357    maxvid = 0x1f & (maxvid >> 16);
358    pr_debug("ph1 maxvid=0x%x\n", maxvid);
359    if (reqvid < maxvid) /* lower numbers are higher voltages */
360        reqvid = maxvid;
361
362    while (data->currvid > reqvid) {
363        pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
364            data->currvid, reqvid);
365        if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
366            return 1;
367    }
368
369    while ((rvosteps > 0) &&
370            ((rvomult * data->rvo + data->currvid) > reqvid)) {
371        if (data->currvid == maxvid) {
372            rvosteps = 0;
373        } else {
374            pr_debug("ph1: changing vid for rvo, req 0x%x\n",
375                data->currvid - 1);
376            if (decrease_vid_code_by_step(data, data->currvid-1, 1))
377                return 1;
378            rvosteps--;
379        }
380    }
381
382    if (query_current_values_with_pending_wait(data))
383        return 1;
384
385    if (savefid != data->currfid) {
386        printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
387                data->currfid);
388        return 1;
389    }
390
391    pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
392        data->currfid, data->currvid);
393
394    return 0;
395}
396
397/* Phase 2 - core frequency transition */
398static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
399{
400    u32 vcoreqfid, vcocurrfid, vcofiddiff;
401    u32 fid_interval, savevid = data->currvid;
402
403    if (data->currfid == reqfid) {
404        printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
405                data->currfid);
406        return 0;
407    }
408
409    pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
410        "reqfid 0x%x\n",
411        smp_processor_id(),
412        data->currfid, data->currvid, reqfid);
413
414    vcoreqfid = convert_fid_to_vco_fid(reqfid);
415    vcocurrfid = convert_fid_to_vco_fid(data->currfid);
416    vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
417        : vcoreqfid - vcocurrfid;
418
419    if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
420        vcofiddiff = 0;
421
422    while (vcofiddiff > 2) {
423        (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
424
425        if (reqfid > data->currfid) {
426            if (data->currfid > LO_FID_TABLE_TOP) {
427                if (write_new_fid(data,
428                        data->currfid + fid_interval))
429                    return 1;
430            } else {
431                if (write_new_fid
432                    (data,
433                     2 + convert_fid_to_vco_fid(data->currfid)))
434                    return 1;
435            }
436        } else {
437            if (write_new_fid(data, data->currfid - fid_interval))
438                return 1;
439        }
440
441        vcocurrfid = convert_fid_to_vco_fid(data->currfid);
442        vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
443            : vcoreqfid - vcocurrfid;
444    }
445
446    if (write_new_fid(data, reqfid))
447        return 1;
448
449    if (query_current_values_with_pending_wait(data))
450        return 1;
451
452    if (data->currfid != reqfid) {
453        printk(KERN_ERR PFX
454            "ph2: mismatch, failed fid transition, "
455            "curr 0x%x, req 0x%x\n",
456            data->currfid, reqfid);
457        return 1;
458    }
459
460    if (savevid != data->currvid) {
461        printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
462            savevid, data->currvid);
463        return 1;
464    }
465
466    pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
467        data->currfid, data->currvid);
468
469    return 0;
470}
471
472/* Phase 3 - core voltage transition flow ... jump to the final vid. */
473static int core_voltage_post_transition(struct powernow_k8_data *data,
474        u32 reqvid)
475{
476    u32 savefid = data->currfid;
477    u32 savereqvid = reqvid;
478
479    pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
480        smp_processor_id(),
481        data->currfid, data->currvid);
482
483    if (reqvid != data->currvid) {
484        if (write_new_vid(data, reqvid))
485            return 1;
486
487        if (savefid != data->currfid) {
488            printk(KERN_ERR PFX
489                   "ph3: bad fid change, save 0x%x, curr 0x%x\n",
490                   savefid, data->currfid);
491            return 1;
492        }
493
494        if (data->currvid != reqvid) {
495            printk(KERN_ERR PFX
496                   "ph3: failed vid transition\n, "
497                   "req 0x%x, curr 0x%x",
498                   reqvid, data->currvid);
499            return 1;
500        }
501    }
502
503    if (query_current_values_with_pending_wait(data))
504        return 1;
505
506    if (savereqvid != data->currvid) {
507        pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
508        return 1;
509    }
510
511    if (savefid != data->currfid) {
512        pr_debug("ph3 failed, currfid changed 0x%x\n",
513            data->currfid);
514        return 1;
515    }
516
517    pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
518        data->currfid, data->currvid);
519
520    return 0;
521}
522
523static const struct x86_cpu_id powernow_k8_ids[] = {
524    /* IO based frequency switching */
525    { X86_VENDOR_AMD, 0xf },
526    /* MSR based frequency switching supported */
527    X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
528    {}
529};
530MODULE_DEVICE_TABLE(x86cpu, powernow_k8_ids);
531
532static void check_supported_cpu(void *_rc)
533{
534    u32 eax, ebx, ecx, edx;
535    int *rc = _rc;
536
537    *rc = -ENODEV;
538
539    eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
540
541    if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
542        if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
543            ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
544            printk(KERN_INFO PFX
545                "Processor cpuid %x not supported\n", eax);
546            return;
547        }
548
549        eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
550        if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
551            printk(KERN_INFO PFX
552                   "No frequency change capabilities detected\n");
553            return;
554        }
555
556        cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
557        if ((edx & P_STATE_TRANSITION_CAPABLE)
558            != P_STATE_TRANSITION_CAPABLE) {
559            printk(KERN_INFO PFX
560                "Power state transitions not supported\n");
561            return;
562        }
563    } else { /* must be a HW Pstate capable processor */
564        cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
565        if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
566            cpu_family = CPU_HW_PSTATE;
567        else
568            return;
569    }
570
571    *rc = 0;
572}
573
574static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
575        u8 maxvid)
576{
577    unsigned int j;
578    u8 lastfid = 0xff;
579
580    for (j = 0; j < data->numps; j++) {
581        if (pst[j].vid > LEAST_VID) {
582            printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
583                   j, pst[j].vid);
584            return -EINVAL;
585        }
586        if (pst[j].vid < data->rvo) {
587            /* vid + rvo >= 0 */
588            printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
589                   " %d\n", j);
590            return -ENODEV;
591        }
592        if (pst[j].vid < maxvid + data->rvo) {
593            /* vid + rvo >= maxvid */
594            printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
595                   " %d\n", j);
596            return -ENODEV;
597        }
598        if (pst[j].fid > MAX_FID) {
599            printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
600                   " %d\n", j);
601            return -ENODEV;
602        }
603        if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
604            /* Only first fid is allowed to be in "low" range */
605            printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
606                   "0x%x\n", j, pst[j].fid);
607            return -EINVAL;
608        }
609        if (pst[j].fid < lastfid)
610            lastfid = pst[j].fid;
611    }
612    if (lastfid & 1) {
613        printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
614        return -EINVAL;
615    }
616    if (lastfid > LO_FID_TABLE_TOP)
617        printk(KERN_INFO FW_BUG PFX
618            "first fid not from lo freq table\n");
619
620    return 0;
621}
622
623static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
624        unsigned int entry)
625{
626    powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
627}
628
629static void print_basics(struct powernow_k8_data *data)
630{
631    int j;
632    for (j = 0; j < data->numps; j++) {
633        if (data->powernow_table[j].frequency !=
634                CPUFREQ_ENTRY_INVALID) {
635            if (cpu_family == CPU_HW_PSTATE) {
636                printk(KERN_INFO PFX
637                    " %d : pstate %d (%d MHz)\n", j,
638                    data->powernow_table[j].index,
639                    data->powernow_table[j].frequency/1000);
640            } else {
641                printk(KERN_INFO PFX
642                    "fid 0x%x (%d MHz), vid 0x%x\n",
643                    data->powernow_table[j].index & 0xff,
644                    data->powernow_table[j].frequency/1000,
645                    data->powernow_table[j].index >> 8);
646            }
647        }
648    }
649    if (data->batps)
650        printk(KERN_INFO PFX "Only %d pstates on battery\n",
651                data->batps);
652}
653
654static u32 freq_from_fid_did(u32 fid, u32 did)
655{
656    u32 mhz = 0;
657
658    if (boot_cpu_data.x86 == 0x10)
659        mhz = (100 * (fid + 0x10)) >> did;
660    else if (boot_cpu_data.x86 == 0x11)
661        mhz = (100 * (fid + 8)) >> did;
662    else
663        BUG();
664
665    return mhz * 1000;
666}
667
668static int fill_powernow_table(struct powernow_k8_data *data,
669        struct pst_s *pst, u8 maxvid)
670{
671    struct cpufreq_frequency_table *powernow_table;
672    unsigned int j;
673
674    if (data->batps) {
675        /* use ACPI support to get full speed on mains power */
676        printk(KERN_WARNING PFX
677            "Only %d pstates usable (use ACPI driver for full "
678            "range\n", data->batps);
679        data->numps = data->batps;
680    }
681
682    for (j = 1; j < data->numps; j++) {
683        if (pst[j-1].fid >= pst[j].fid) {
684            printk(KERN_ERR PFX "PST out of sequence\n");
685            return -EINVAL;
686        }
687    }
688
689    if (data->numps < 2) {
690        printk(KERN_ERR PFX "no p states to transition\n");
691        return -ENODEV;
692    }
693
694    if (check_pst_table(data, pst, maxvid))
695        return -EINVAL;
696
697    powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
698        * (data->numps + 1)), GFP_KERNEL);
699    if (!powernow_table) {
700        printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
701        return -ENOMEM;
702    }
703
704    for (j = 0; j < data->numps; j++) {
705        int freq;
706        powernow_table[j].index = pst[j].fid; /* lower 8 bits */
707        powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
708        freq = find_khz_freq_from_fid(pst[j].fid);
709        powernow_table[j].frequency = freq;
710    }
711    powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
712    powernow_table[data->numps].index = 0;
713
714    if (query_current_values_with_pending_wait(data)) {
715        kfree(powernow_table);
716        return -EIO;
717    }
718
719    pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
720    data->powernow_table = powernow_table;
721    if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
722        print_basics(data);
723
724    for (j = 0; j < data->numps; j++)
725        if ((pst[j].fid == data->currfid) &&
726            (pst[j].vid == data->currvid))
727            return 0;
728
729    pr_debug("currfid/vid do not match PST, ignoring\n");
730    return 0;
731}
732
733/* Find and validate the PSB/PST table in BIOS. */
734static int find_psb_table(struct powernow_k8_data *data)
735{
736    struct psb_s *psb;
737    unsigned int i;
738    u32 mvs;
739    u8 maxvid;
740    u32 cpst = 0;
741    u32 thiscpuid;
742
743    for (i = 0xc0000; i < 0xffff0; i += 0x10) {
744        /* Scan BIOS looking for the signature. */
745        /* It can not be at ffff0 - it is too big. */
746
747        psb = phys_to_virt(i);
748        if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
749            continue;
750
751        pr_debug("found PSB header at 0x%p\n", psb);
752
753        pr_debug("table vers: 0x%x\n", psb->tableversion);
754        if (psb->tableversion != PSB_VERSION_1_4) {
755            printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
756            return -ENODEV;
757        }
758
759        pr_debug("flags: 0x%x\n", psb->flags1);
760        if (psb->flags1) {
761            printk(KERN_ERR FW_BUG PFX "unknown flags\n");
762            return -ENODEV;
763        }
764
765        data->vstable = psb->vstable;
766        pr_debug("voltage stabilization time: %d(*20us)\n",
767                data->vstable);
768
769        pr_debug("flags2: 0x%x\n", psb->flags2);
770        data->rvo = psb->flags2 & 3;
771        data->irt = ((psb->flags2) >> 2) & 3;
772        mvs = ((psb->flags2) >> 4) & 3;
773        data->vidmvs = 1 << mvs;
774        data->batps = ((psb->flags2) >> 6) & 3;
775
776        pr_debug("ramp voltage offset: %d\n", data->rvo);
777        pr_debug("isochronous relief time: %d\n", data->irt);
778        pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
779
780        pr_debug("numpst: 0x%x\n", psb->num_tables);
781        cpst = psb->num_tables;
782        if ((psb->cpuid == 0x00000fc0) ||
783            (psb->cpuid == 0x00000fe0)) {
784            thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
785            if ((thiscpuid == 0x00000fc0) ||
786                (thiscpuid == 0x00000fe0))
787                cpst = 1;
788        }
789        if (cpst != 1) {
790            printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
791            return -ENODEV;
792        }
793
794        data->plllock = psb->plllocktime;
795        pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
796        pr_debug("maxfid: 0x%x\n", psb->maxfid);
797        pr_debug("maxvid: 0x%x\n", psb->maxvid);
798        maxvid = psb->maxvid;
799
800        data->numps = psb->numps;
801        pr_debug("numpstates: 0x%x\n", data->numps);
802        return fill_powernow_table(data,
803                (struct pst_s *)(psb+1), maxvid);
804    }
805    /*
806     * If you see this message, complain to BIOS manufacturer. If
807     * he tells you "we do not support Linux" or some similar
808     * nonsense, remember that Windows 2000 uses the same legacy
809     * mechanism that the old Linux PSB driver uses. Tell them it
810     * is broken with Windows 2000.
811     *
812     * The reference to the AMD documentation is chapter 9 in the
813     * BIOS and Kernel Developer's Guide, which is available on
814     * www.amd.com
815     */
816    printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
817    printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
818        " and Cool'N'Quiet support is enabled in BIOS setup\n");
819    return -ENODEV;
820}
821
822static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
823        unsigned int index)
824{
825    u64 control;
826
827    if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
828        return;
829
830    control = data->acpi_data.states[index].control;
831    data->irt = (control >> IRT_SHIFT) & IRT_MASK;
832    data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
833    data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
834    data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
835    data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
836    data->vstable = (control >> VST_SHIFT) & VST_MASK;
837}
838
839static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
840{
841    struct cpufreq_frequency_table *powernow_table;
842    int ret_val = -ENODEV;
843    u64 control, status;
844
845    if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
846        pr_debug("register performance failed: bad ACPI data\n");
847        return -EIO;
848    }
849
850    /* verify the data contained in the ACPI structures */
851    if (data->acpi_data.state_count <= 1) {
852        pr_debug("No ACPI P-States\n");
853        goto err_out;
854    }
855
856    control = data->acpi_data.control_register.space_id;
857    status = data->acpi_data.status_register.space_id;
858
859    if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
860        (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
861        pr_debug("Invalid control/status registers (%llx - %llx)\n",
862            control, status);
863        goto err_out;
864    }
865
866    /* fill in data->powernow_table */
867    powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
868        * (data->acpi_data.state_count + 1)), GFP_KERNEL);
869    if (!powernow_table) {
870        pr_debug("powernow_table memory alloc failure\n");
871        goto err_out;
872    }
873
874    /* fill in data */
875    data->numps = data->acpi_data.state_count;
876    powernow_k8_acpi_pst_values(data, 0);
877
878    if (cpu_family == CPU_HW_PSTATE)
879        ret_val = fill_powernow_table_pstate(data, powernow_table);
880    else
881        ret_val = fill_powernow_table_fidvid(data, powernow_table);
882    if (ret_val)
883        goto err_out_mem;
884
885    powernow_table[data->acpi_data.state_count].frequency =
886        CPUFREQ_TABLE_END;
887    powernow_table[data->acpi_data.state_count].index = 0;
888    data->powernow_table = powernow_table;
889
890    if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
891        print_basics(data);
892
893    /* notify BIOS that we exist */
894    acpi_processor_notify_smm(THIS_MODULE);
895
896    if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
897        printk(KERN_ERR PFX
898                "unable to alloc powernow_k8_data cpumask\n");
899        ret_val = -ENOMEM;
900        goto err_out_mem;
901    }
902
903    return 0;
904
905err_out_mem:
906    kfree(powernow_table);
907
908err_out:
909    acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
910
911    /* data->acpi_data.state_count informs us at ->exit()
912     * whether ACPI was used */
913    data->acpi_data.state_count = 0;
914
915    return ret_val;
916}
917
918static int fill_powernow_table_pstate(struct powernow_k8_data *data,
919        struct cpufreq_frequency_table *powernow_table)
920{
921    int i;
922    u32 hi = 0, lo = 0;
923    rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
924    data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
925
926    for (i = 0; i < data->acpi_data.state_count; i++) {
927        u32 index;
928
929        index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
930        if (index > data->max_hw_pstate) {
931            printk(KERN_ERR PFX "invalid pstate %d - "
932                    "bad value %d.\n", i, index);
933            printk(KERN_ERR PFX "Please report to BIOS "
934                    "manufacturer\n");
935            invalidate_entry(powernow_table, i);
936            continue;
937        }
938
939        ps_to_as[index] = i;
940
941        /* Frequency may be rounded for these */
942        if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
943                 || boot_cpu_data.x86 == 0x11) {
944
945            rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
946            if (!(hi & HW_PSTATE_VALID_MASK)) {
947                pr_debug("invalid pstate %d, ignoring\n", index);
948                invalidate_entry(powernow_table, i);
949                continue;
950            }
951
952            powernow_table[i].frequency =
953                freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
954        } else
955            powernow_table[i].frequency =
956                data->acpi_data.states[i].core_frequency * 1000;
957
958        powernow_table[i].index = index;
959    }
960    return 0;
961}
962
963static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
964        struct cpufreq_frequency_table *powernow_table)
965{
966    int i;
967
968    for (i = 0; i < data->acpi_data.state_count; i++) {
969        u32 fid;
970        u32 vid;
971        u32 freq, index;
972        u64 status, control;
973
974        if (data->exttype) {
975            status = data->acpi_data.states[i].status;
976            fid = status & EXT_FID_MASK;
977            vid = (status >> VID_SHIFT) & EXT_VID_MASK;
978        } else {
979            control = data->acpi_data.states[i].control;
980            fid = control & FID_MASK;
981            vid = (control >> VID_SHIFT) & VID_MASK;
982        }
983
984        pr_debug(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
985
986        index = fid | (vid<<8);
987        powernow_table[i].index = index;
988
989        freq = find_khz_freq_from_fid(fid);
990        powernow_table[i].frequency = freq;
991
992        /* verify frequency is OK */
993        if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
994            pr_debug("invalid freq %u kHz, ignoring\n", freq);
995            invalidate_entry(powernow_table, i);
996            continue;
997        }
998
999        /* verify voltage is OK -
1000         * BIOSs are using "off" to indicate invalid */
1001        if (vid == VID_OFF) {
1002            pr_debug("invalid vid %u, ignoring\n", vid);
1003            invalidate_entry(powernow_table, i);
1004            continue;
1005        }
1006
1007        if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
1008            printk(KERN_INFO PFX "invalid freq entries "
1009                "%u kHz vs. %u kHz\n", freq,
1010                (unsigned int)
1011                (data->acpi_data.states[i].core_frequency
1012                 * 1000));
1013            invalidate_entry(powernow_table, i);
1014            continue;
1015        }
1016    }
1017    return 0;
1018}
1019
1020static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
1021{
1022    if (data->acpi_data.state_count)
1023        acpi_processor_unregister_performance(&data->acpi_data,
1024                data->cpu);
1025    free_cpumask_var(data->acpi_data.shared_cpu_map);
1026}
1027
1028static int get_transition_latency(struct powernow_k8_data *data)
1029{
1030    int max_latency = 0;
1031    int i;
1032    for (i = 0; i < data->acpi_data.state_count; i++) {
1033        int cur_latency = data->acpi_data.states[i].transition_latency
1034            + data->acpi_data.states[i].bus_master_latency;
1035        if (cur_latency > max_latency)
1036            max_latency = cur_latency;
1037    }
1038    if (max_latency == 0) {
1039        /*
1040         * Fam 11h and later may return 0 as transition latency. This
1041         * is intended and means "very fast". While cpufreq core and
1042         * governors currently can handle that gracefully, better set it
1043         * to 1 to avoid problems in the future.
1044         */
1045        if (boot_cpu_data.x86 < 0x11)
1046            printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1047                "latency\n");
1048        max_latency = 1;
1049    }
1050    /* value in usecs, needs to be in nanoseconds */
1051    return 1000 * max_latency;
1052}
1053
1054/* Take a frequency, and issue the fid/vid transition command */
1055static int transition_frequency_fidvid(struct powernow_k8_data *data,
1056        unsigned int index)
1057{
1058    u32 fid = 0;
1059    u32 vid = 0;
1060    int res, i;
1061    struct cpufreq_freqs freqs;
1062
1063    pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
1064
1065    /* fid/vid correctness check for k8 */
1066    /* fid are the lower 8 bits of the index we stored into
1067     * the cpufreq frequency table in find_psb_table, vid
1068     * are the upper 8 bits.
1069     */
1070    fid = data->powernow_table[index].index & 0xFF;
1071    vid = (data->powernow_table[index].index & 0xFF00) >> 8;
1072
1073    pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
1074
1075    if (query_current_values_with_pending_wait(data))
1076        return 1;
1077
1078    if ((data->currvid == vid) && (data->currfid == fid)) {
1079        pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
1080            fid, vid);
1081        return 0;
1082    }
1083
1084    pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1085        smp_processor_id(), fid, vid);
1086    freqs.old = find_khz_freq_from_fid(data->currfid);
1087    freqs.new = find_khz_freq_from_fid(fid);
1088
1089    for_each_cpu(i, data->available_cores) {
1090        freqs.cpu = i;
1091        cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1092    }
1093
1094    res = transition_fid_vid(data, fid, vid);
1095    if (res)
1096        return res;
1097
1098    freqs.new = find_khz_freq_from_fid(data->currfid);
1099
1100    for_each_cpu(i, data->available_cores) {
1101        freqs.cpu = i;
1102        cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1103    }
1104    return res;
1105}
1106
1107/* Take a frequency, and issue the hardware pstate transition command */
1108static int transition_frequency_pstate(struct powernow_k8_data *data,
1109        unsigned int index)
1110{
1111    u32 pstate = 0;
1112    int res, i;
1113    struct cpufreq_freqs freqs;
1114
1115    pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
1116
1117    /* get MSR index for hardware pstate transition */
1118    pstate = index & HW_PSTATE_MASK;
1119    if (pstate > data->max_hw_pstate)
1120        return -EINVAL;
1121
1122    freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1123            data->currpstate);
1124    freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1125
1126    for_each_cpu(i, data->available_cores) {
1127        freqs.cpu = i;
1128        cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1129    }
1130
1131    res = transition_pstate(data, pstate);
1132    freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1133
1134    for_each_cpu(i, data->available_cores) {
1135        freqs.cpu = i;
1136        cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1137    }
1138    return res;
1139}
1140
1141struct powernowk8_target_arg {
1142    struct cpufreq_policy *pol;
1143    unsigned targfreq;
1144    unsigned relation;
1145};
1146
1147static long powernowk8_target_fn(void *arg)
1148{
1149    struct powernowk8_target_arg *pta = arg;
1150    struct cpufreq_policy *pol = pta->pol;
1151    unsigned targfreq = pta->targfreq;
1152    unsigned relation = pta->relation;
1153    struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1154    u32 checkfid;
1155    u32 checkvid;
1156    unsigned int newstate;
1157    int ret;
1158
1159    if (!data)
1160        return -EINVAL;
1161
1162    checkfid = data->currfid;
1163    checkvid = data->currvid;
1164
1165    if (pending_bit_stuck()) {
1166        printk(KERN_ERR PFX "failing targ, change pending bit set\n");
1167        return -EIO;
1168    }
1169
1170    pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
1171        pol->cpu, targfreq, pol->min, pol->max, relation);
1172
1173    if (query_current_values_with_pending_wait(data))
1174        return -EIO;
1175
1176    if (cpu_family != CPU_HW_PSTATE) {
1177        pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
1178        data->currfid, data->currvid);
1179
1180        if ((checkvid != data->currvid) ||
1181            (checkfid != data->currfid)) {
1182            printk(KERN_INFO PFX
1183                "error - out of sync, fix 0x%x 0x%x, "
1184                "vid 0x%x 0x%x\n",
1185                checkfid, data->currfid,
1186                checkvid, data->currvid);
1187        }
1188    }
1189
1190    if (cpufreq_frequency_table_target(pol, data->powernow_table,
1191                targfreq, relation, &newstate))
1192        return -EIO;
1193
1194    mutex_lock(&fidvid_mutex);
1195
1196    powernow_k8_acpi_pst_values(data, newstate);
1197
1198    if (cpu_family == CPU_HW_PSTATE)
1199        ret = transition_frequency_pstate(data,
1200            data->powernow_table[newstate].index);
1201    else
1202        ret = transition_frequency_fidvid(data, newstate);
1203    if (ret) {
1204        printk(KERN_ERR PFX "transition frequency failed\n");
1205        mutex_unlock(&fidvid_mutex);
1206        return 1;
1207    }
1208    mutex_unlock(&fidvid_mutex);
1209
1210    if (cpu_family == CPU_HW_PSTATE)
1211        pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1212                data->powernow_table[newstate].index);
1213    else
1214        pol->cur = find_khz_freq_from_fid(data->currfid);
1215
1216    return 0;
1217}
1218
1219/* Driver entry point to switch to the target frequency */
1220static int powernowk8_target(struct cpufreq_policy *pol,
1221        unsigned targfreq, unsigned relation)
1222{
1223    struct powernowk8_target_arg pta = { .pol = pol, .targfreq = targfreq,
1224                         .relation = relation };
1225
1226    /*
1227     * Must run on @pol->cpu. cpufreq core is responsible for ensuring
1228     * that we're bound to the current CPU and pol->cpu stays online.
1229     */
1230    if (smp_processor_id() == pol->cpu)
1231        return powernowk8_target_fn(&pta);
1232    else
1233        return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta);
1234}
1235
1236/* Driver entry point to verify the policy and range of frequencies */
1237static int powernowk8_verify(struct cpufreq_policy *pol)
1238{
1239    struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1240
1241    if (!data)
1242        return -EINVAL;
1243
1244    return cpufreq_frequency_table_verify(pol, data->powernow_table);
1245}
1246
1247struct init_on_cpu {
1248    struct powernow_k8_data *data;
1249    int rc;
1250};
1251
1252static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1253{
1254    struct init_on_cpu *init_on_cpu = _init_on_cpu;
1255
1256    if (pending_bit_stuck()) {
1257        printk(KERN_ERR PFX "failing init, change pending bit set\n");
1258        init_on_cpu->rc = -ENODEV;
1259        return;
1260    }
1261
1262    if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1263        init_on_cpu->rc = -ENODEV;
1264        return;
1265    }
1266
1267    if (cpu_family == CPU_OPTERON)
1268        fidvid_msr_init();
1269
1270    init_on_cpu->rc = 0;
1271}
1272
1273/* per CPU init entry point to the driver */
1274static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1275{
1276    static const char ACPI_PSS_BIOS_BUG_MSG[] =
1277        KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1278        FW_BUG PFX "Try again with latest BIOS.\n";
1279    struct powernow_k8_data *data;
1280    struct init_on_cpu init_on_cpu;
1281    int rc;
1282    struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
1283
1284    if (!cpu_online(pol->cpu))
1285        return -ENODEV;
1286
1287    smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1288    if (rc)
1289        return -ENODEV;
1290
1291    data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
1292    if (!data) {
1293        printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
1294        return -ENOMEM;
1295    }
1296
1297    data->cpu = pol->cpu;
1298    data->currpstate = HW_PSTATE_INVALID;
1299
1300    if (powernow_k8_cpu_init_acpi(data)) {
1301        /*
1302         * Use the PSB BIOS structure. This is only available on
1303         * an UP version, and is deprecated by AMD.
1304         */
1305        if (num_online_cpus() != 1) {
1306            printk_once(ACPI_PSS_BIOS_BUG_MSG);
1307            goto err_out;
1308        }
1309        if (pol->cpu != 0) {
1310            printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1311                   "CPU other than CPU0. Complain to your BIOS "
1312                   "vendor.\n");
1313            goto err_out;
1314        }
1315        rc = find_psb_table(data);
1316        if (rc)
1317            goto err_out;
1318
1319        /* Take a crude guess here.
1320         * That guess was in microseconds, so multiply with 1000 */
1321        pol->cpuinfo.transition_latency = (
1322             ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1323             ((1 << data->irt) * 30)) * 1000;
1324    } else /* ACPI _PSS objects available */
1325        pol->cpuinfo.transition_latency = get_transition_latency(data);
1326
1327    /* only run on specific CPU from here on */
1328    init_on_cpu.data = data;
1329    smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1330                 &init_on_cpu, 1);
1331    rc = init_on_cpu.rc;
1332    if (rc != 0)
1333        goto err_out_exit_acpi;
1334
1335    if (cpu_family == CPU_HW_PSTATE)
1336        cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1337    else
1338        cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1339    data->available_cores = pol->cpus;
1340
1341    if (cpu_family == CPU_HW_PSTATE)
1342        pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1343                data->currpstate);
1344    else
1345        pol->cur = find_khz_freq_from_fid(data->currfid);
1346    pr_debug("policy current frequency %d kHz\n", pol->cur);
1347
1348    /* min/max the cpu is capable of */
1349    if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1350        printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
1351        powernow_k8_cpu_exit_acpi(data);
1352        kfree(data->powernow_table);
1353        kfree(data);
1354        return -EINVAL;
1355    }
1356
1357    /* Check for APERF/MPERF support in hardware */
1358    if (cpu_has(c, X86_FEATURE_APERFMPERF))
1359        cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
1360
1361    cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1362
1363    if (cpu_family == CPU_HW_PSTATE)
1364        pr_debug("cpu_init done, current pstate 0x%x\n",
1365                data->currpstate);
1366    else
1367        pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
1368            data->currfid, data->currvid);
1369
1370    per_cpu(powernow_data, pol->cpu) = data;
1371
1372    return 0;
1373
1374err_out_exit_acpi:
1375    powernow_k8_cpu_exit_acpi(data);
1376
1377err_out:
1378    kfree(data);
1379    return -ENODEV;
1380}
1381
1382static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1383{
1384    struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1385
1386    if (!data)
1387        return -EINVAL;
1388
1389    powernow_k8_cpu_exit_acpi(data);
1390
1391    cpufreq_frequency_table_put_attr(pol->cpu);
1392
1393    kfree(data->powernow_table);
1394    kfree(data);
1395    per_cpu(powernow_data, pol->cpu) = NULL;
1396
1397    return 0;
1398}
1399
1400static void query_values_on_cpu(void *_err)
1401{
1402    int *err = _err;
1403    struct powernow_k8_data *data = __this_cpu_read(powernow_data);
1404
1405    *err = query_current_values_with_pending_wait(data);
1406}
1407
1408static unsigned int powernowk8_get(unsigned int cpu)
1409{
1410    struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1411    unsigned int khz = 0;
1412    int err;
1413
1414    if (!data)
1415        return 0;
1416
1417    smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1418    if (err)
1419        goto out;
1420
1421    if (cpu_family == CPU_HW_PSTATE)
1422        khz = find_khz_freq_from_pstate(data->powernow_table,
1423                        data->currpstate);
1424    else
1425        khz = find_khz_freq_from_fid(data->currfid);
1426
1427
1428out:
1429    return khz;
1430}
1431
1432static void _cpb_toggle_msrs(bool t)
1433{
1434    int cpu;
1435
1436    get_online_cpus();
1437
1438    rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1439
1440    for_each_cpu(cpu, cpu_online_mask) {
1441        struct msr *reg = per_cpu_ptr(msrs, cpu);
1442        if (t)
1443            reg->l &= ~BIT(25);
1444        else
1445            reg->l |= BIT(25);
1446    }
1447    wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1448
1449    put_online_cpus();
1450}
1451
1452/*
1453 * Switch on/off core performance boosting.
1454 *
1455 * 0=disable
1456 * 1=enable.
1457 */
1458static void cpb_toggle(bool t)
1459{
1460    if (!cpb_capable)
1461        return;
1462
1463    if (t && !cpb_enabled) {
1464        cpb_enabled = true;
1465        _cpb_toggle_msrs(t);
1466        printk(KERN_INFO PFX "Core Boosting enabled.\n");
1467    } else if (!t && cpb_enabled) {
1468        cpb_enabled = false;
1469        _cpb_toggle_msrs(t);
1470        printk(KERN_INFO PFX "Core Boosting disabled.\n");
1471    }
1472}
1473
1474static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
1475                 size_t count)
1476{
1477    int ret = -EINVAL;
1478    unsigned long val = 0;
1479
1480    ret = strict_strtoul(buf, 10, &val);
1481    if (!ret && (val == 0 || val == 1) && cpb_capable)
1482        cpb_toggle(val);
1483    else
1484        return -EINVAL;
1485
1486    return count;
1487}
1488
1489static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
1490{
1491    return sprintf(buf, "%u\n", cpb_enabled);
1492}
1493
1494#define define_one_rw(_name) \
1495static struct freq_attr _name = \
1496__ATTR(_name, 0644, show_##_name, store_##_name)
1497
1498define_one_rw(cpb);
1499
1500static struct freq_attr *powernow_k8_attr[] = {
1501    &cpufreq_freq_attr_scaling_available_freqs,
1502    &cpb,
1503    NULL,
1504};
1505
1506static struct cpufreq_driver cpufreq_amd64_driver = {
1507    .verify = powernowk8_verify,
1508    .target = powernowk8_target,
1509    .bios_limit = acpi_processor_get_bios_limit,
1510    .init = powernowk8_cpu_init,
1511    .exit = __devexit_p(powernowk8_cpu_exit),
1512    .get = powernowk8_get,
1513    .name = "powernow-k8",
1514    .owner = THIS_MODULE,
1515    .attr = powernow_k8_attr,
1516};
1517
1518/*
1519 * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
1520 * cannot block the remaining ones from boosting. On the CPU_UP path we
1521 * simply keep the boost-disable flag in sync with the current global
1522 * state.
1523 */
1524static int cpb_notify(struct notifier_block *nb, unsigned long action,
1525              void *hcpu)
1526{
1527    unsigned cpu = (long)hcpu;
1528    u32 lo, hi;
1529
1530    switch (action) {
1531    case CPU_UP_PREPARE:
1532    case CPU_UP_PREPARE_FROZEN:
1533
1534        if (!cpb_enabled) {
1535            rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1536            lo |= BIT(25);
1537            wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1538        }
1539        break;
1540
1541    case CPU_DOWN_PREPARE:
1542    case CPU_DOWN_PREPARE_FROZEN:
1543        rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1544        lo &= ~BIT(25);
1545        wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1546        break;
1547
1548    default:
1549        break;
1550    }
1551
1552    return NOTIFY_OK;
1553}
1554
1555static struct notifier_block cpb_nb = {
1556    .notifier_call = cpb_notify,
1557};
1558
1559/* driver entry point for init */
1560static int __cpuinit powernowk8_init(void)
1561{
1562    unsigned int i, supported_cpus = 0, cpu;
1563    int rv;
1564
1565    if (!x86_match_cpu(powernow_k8_ids))
1566        return -ENODEV;
1567
1568    for_each_online_cpu(i) {
1569        int rc;
1570        smp_call_function_single(i, check_supported_cpu, &rc, 1);
1571        if (rc == 0)
1572            supported_cpus++;
1573    }
1574
1575    if (supported_cpus != num_online_cpus())
1576        return -ENODEV;
1577
1578    printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
1579        num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
1580
1581    if (boot_cpu_has(X86_FEATURE_CPB)) {
1582
1583        cpb_capable = true;
1584
1585        msrs = msrs_alloc();
1586        if (!msrs) {
1587            printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1588            return -ENOMEM;
1589        }
1590
1591        register_cpu_notifier(&cpb_nb);
1592
1593        rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1594
1595        for_each_cpu(cpu, cpu_online_mask) {
1596            struct msr *reg = per_cpu_ptr(msrs, cpu);
1597            cpb_enabled |= !(!!(reg->l & BIT(25)));
1598        }
1599
1600        printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
1601            (cpb_enabled ? "on" : "off"));
1602    }
1603
1604    rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1605    if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1606        unregister_cpu_notifier(&cpb_nb);
1607        msrs_free(msrs);
1608        msrs = NULL;
1609    }
1610    return rv;
1611}
1612
1613/* driver entry point for term */
1614static void __exit powernowk8_exit(void)
1615{
1616    pr_debug("exit\n");
1617
1618    if (boot_cpu_has(X86_FEATURE_CPB)) {
1619        msrs_free(msrs);
1620        msrs = NULL;
1621
1622        unregister_cpu_notifier(&cpb_nb);
1623    }
1624
1625    cpufreq_unregister_driver(&cpufreq_amd64_driver);
1626}
1627
1628MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1629        "Mark Langsdorf <mark.langsdorf@amd.com>");
1630MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1631MODULE_LICENSE("GPL");
1632
1633late_initcall(powernowk8_init);
1634module_exit(powernowk8_exit);
1635

Archive Download this file



interactive