Root/drivers/iommu/intel_irq_remapping.c

1#include <linux/interrupt.h>
2#include <linux/dmar.h>
3#include <linux/spinlock.h>
4#include <linux/slab.h>
5#include <linux/jiffies.h>
6#include <linux/hpet.h>
7#include <linux/pci.h>
8#include <linux/irq.h>
9#include <asm/io_apic.h>
10#include <asm/smp.h>
11#include <asm/cpu.h>
12#include <linux/intel-iommu.h>
13#include <acpi/acpi.h>
14#include <asm/irq_remapping.h>
15#include <asm/pci-direct.h>
16#include <asm/msidef.h>
17
18#include "irq_remapping.h"
19
20struct ioapic_scope {
21    struct intel_iommu *iommu;
22    unsigned int id;
23    unsigned int bus; /* PCI bus number */
24    unsigned int devfn; /* PCI devfn number */
25};
26
27struct hpet_scope {
28    struct intel_iommu *iommu;
29    u8 id;
30    unsigned int bus;
31    unsigned int devfn;
32};
33
34#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
35#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
36
37static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
38static struct hpet_scope ir_hpet[MAX_HPET_TBS];
39static int ir_ioapic_num, ir_hpet_num;
40
41static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
42
43static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
44{
45    struct irq_cfg *cfg = irq_get_chip_data(irq);
46    return cfg ? &cfg->irq_2_iommu : NULL;
47}
48
49int get_irte(int irq, struct irte *entry)
50{
51    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
52    unsigned long flags;
53    int index;
54
55    if (!entry || !irq_iommu)
56        return -1;
57
58    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
59
60    index = irq_iommu->irte_index + irq_iommu->sub_handle;
61    *entry = *(irq_iommu->iommu->ir_table->base + index);
62
63    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
64    return 0;
65}
66
67static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
68{
69    struct ir_table *table = iommu->ir_table;
70    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
71    u16 index, start_index;
72    unsigned int mask = 0;
73    unsigned long flags;
74    int i;
75
76    if (!count || !irq_iommu)
77        return -1;
78
79    /*
80     * start the IRTE search from index 0.
81     */
82    index = start_index = 0;
83
84    if (count > 1) {
85        count = __roundup_pow_of_two(count);
86        mask = ilog2(count);
87    }
88
89    if (mask > ecap_max_handle_mask(iommu->ecap)) {
90        printk(KERN_ERR
91               "Requested mask %x exceeds the max invalidation handle"
92               " mask value %Lx\n", mask,
93               ecap_max_handle_mask(iommu->ecap));
94        return -1;
95    }
96
97    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
98    do {
99        for (i = index; i < index + count; i++)
100            if (table->base[i].present)
101                break;
102        /* empty index found */
103        if (i == index + count)
104            break;
105
106        index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
107
108        if (index == start_index) {
109            raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
110            printk(KERN_ERR "can't allocate an IRTE\n");
111            return -1;
112        }
113    } while (1);
114
115    for (i = index; i < index + count; i++)
116        table->base[i].present = 1;
117
118    irq_iommu->iommu = iommu;
119    irq_iommu->irte_index = index;
120    irq_iommu->sub_handle = 0;
121    irq_iommu->irte_mask = mask;
122
123    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
124
125    return index;
126}
127
128static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
129{
130    struct qi_desc desc;
131
132    desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
133           | QI_IEC_SELECTIVE;
134    desc.high = 0;
135
136    return qi_submit_sync(&desc, iommu);
137}
138
139static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
140{
141    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
142    unsigned long flags;
143    int index;
144
145    if (!irq_iommu)
146        return -1;
147
148    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
149    *sub_handle = irq_iommu->sub_handle;
150    index = irq_iommu->irte_index;
151    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
152    return index;
153}
154
155static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
156{
157    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
158    unsigned long flags;
159
160    if (!irq_iommu)
161        return -1;
162
163    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
164
165    irq_iommu->iommu = iommu;
166    irq_iommu->irte_index = index;
167    irq_iommu->sub_handle = subhandle;
168    irq_iommu->irte_mask = 0;
169
170    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
171
172    return 0;
173}
174
175static int modify_irte(int irq, struct irte *irte_modified)
176{
177    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
178    struct intel_iommu *iommu;
179    unsigned long flags;
180    struct irte *irte;
181    int rc, index;
182
183    if (!irq_iommu)
184        return -1;
185
186    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
187
188    iommu = irq_iommu->iommu;
189
190    index = irq_iommu->irte_index + irq_iommu->sub_handle;
191    irte = &iommu->ir_table->base[index];
192
193    set_64bit(&irte->low, irte_modified->low);
194    set_64bit(&irte->high, irte_modified->high);
195    __iommu_flush_cache(iommu, irte, sizeof(*irte));
196
197    rc = qi_flush_iec(iommu, index, 0);
198    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
199
200    return rc;
201}
202
203static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
204{
205    int i;
206
207    for (i = 0; i < MAX_HPET_TBS; i++)
208        if (ir_hpet[i].id == hpet_id)
209            return ir_hpet[i].iommu;
210    return NULL;
211}
212
213static struct intel_iommu *map_ioapic_to_ir(int apic)
214{
215    int i;
216
217    for (i = 0; i < MAX_IO_APICS; i++)
218        if (ir_ioapic[i].id == apic)
219            return ir_ioapic[i].iommu;
220    return NULL;
221}
222
223static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
224{
225    struct dmar_drhd_unit *drhd;
226
227    drhd = dmar_find_matched_drhd_unit(dev);
228    if (!drhd)
229        return NULL;
230
231    return drhd->iommu;
232}
233
234static int clear_entries(struct irq_2_iommu *irq_iommu)
235{
236    struct irte *start, *entry, *end;
237    struct intel_iommu *iommu;
238    int index;
239
240    if (irq_iommu->sub_handle)
241        return 0;
242
243    iommu = irq_iommu->iommu;
244    index = irq_iommu->irte_index + irq_iommu->sub_handle;
245
246    start = iommu->ir_table->base + index;
247    end = start + (1 << irq_iommu->irte_mask);
248
249    for (entry = start; entry < end; entry++) {
250        set_64bit(&entry->low, 0);
251        set_64bit(&entry->high, 0);
252    }
253
254    return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
255}
256
257static int free_irte(int irq)
258{
259    struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
260    unsigned long flags;
261    int rc;
262
263    if (!irq_iommu)
264        return -1;
265
266    raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
267
268    rc = clear_entries(irq_iommu);
269
270    irq_iommu->iommu = NULL;
271    irq_iommu->irte_index = 0;
272    irq_iommu->sub_handle = 0;
273    irq_iommu->irte_mask = 0;
274
275    raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
276
277    return rc;
278}
279
280/*
281 * source validation type
282 */
283#define SVT_NO_VERIFY 0x0 /* no verification is required */
284#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */
285#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
286
287/*
288 * source-id qualifier
289 */
290#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
291#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
292                  * the third least significant bit
293                  */
294#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
295                  * the second and third least significant bits
296                  */
297#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
298                  * the least three significant bits
299                  */
300
301/*
302 * set SVT, SQ and SID fields of irte to verify
303 * source ids of interrupt requests
304 */
305static void set_irte_sid(struct irte *irte, unsigned int svt,
306             unsigned int sq, unsigned int sid)
307{
308    if (disable_sourceid_checking)
309        svt = SVT_NO_VERIFY;
310    irte->svt = svt;
311    irte->sq = sq;
312    irte->sid = sid;
313}
314
315static int set_ioapic_sid(struct irte *irte, int apic)
316{
317    int i;
318    u16 sid = 0;
319
320    if (!irte)
321        return -1;
322
323    for (i = 0; i < MAX_IO_APICS; i++) {
324        if (ir_ioapic[i].id == apic) {
325            sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
326            break;
327        }
328    }
329
330    if (sid == 0) {
331        pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
332        return -1;
333    }
334
335    set_irte_sid(irte, 1, 0, sid);
336
337    return 0;
338}
339
340static int set_hpet_sid(struct irte *irte, u8 id)
341{
342    int i;
343    u16 sid = 0;
344
345    if (!irte)
346        return -1;
347
348    for (i = 0; i < MAX_HPET_TBS; i++) {
349        if (ir_hpet[i].id == id) {
350            sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
351            break;
352        }
353    }
354
355    if (sid == 0) {
356        pr_warning("Failed to set source-id of HPET block (%d)\n", id);
357        return -1;
358    }
359
360    /*
361     * Should really use SQ_ALL_16. Some platforms are broken.
362     * While we figure out the right quirks for these broken platforms, use
363     * SQ_13_IGNORE_3 for now.
364     */
365    set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
366
367    return 0;
368}
369
370static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
371{
372    struct pci_dev *bridge;
373
374    if (!irte || !dev)
375        return -1;
376
377    /* PCIe device or Root Complex integrated PCI device */
378    if (pci_is_pcie(dev) || !dev->bus->parent) {
379        set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
380                 (dev->bus->number << 8) | dev->devfn);
381        return 0;
382    }
383
384    bridge = pci_find_upstream_pcie_bridge(dev);
385    if (bridge) {
386        if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
387            set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
388                (bridge->bus->number << 8) | dev->bus->number);
389        else /* this is a legacy PCI bridge */
390            set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
391                (bridge->bus->number << 8) | bridge->devfn);
392    }
393
394    return 0;
395}
396
397static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
398{
399    u64 addr;
400    u32 sts;
401    unsigned long flags;
402
403    addr = virt_to_phys((void *)iommu->ir_table->base);
404
405    raw_spin_lock_irqsave(&iommu->register_lock, flags);
406
407    dmar_writeq(iommu->reg + DMAR_IRTA_REG,
408            (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
409
410    /* Set interrupt-remapping table pointer */
411    iommu->gcmd |= DMA_GCMD_SIRTP;
412    writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
413
414    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
415              readl, (sts & DMA_GSTS_IRTPS), sts);
416    raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
417
418    /*
419     * global invalidation of interrupt entry cache before enabling
420     * interrupt-remapping.
421     */
422    qi_global_iec(iommu);
423
424    raw_spin_lock_irqsave(&iommu->register_lock, flags);
425
426    /* Enable interrupt-remapping */
427    iommu->gcmd |= DMA_GCMD_IRE;
428    writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
429
430    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
431              readl, (sts & DMA_GSTS_IRES), sts);
432
433    raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
434}
435
436
437static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
438{
439    struct ir_table *ir_table;
440    struct page *pages;
441
442    ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
443                         GFP_ATOMIC);
444
445    if (!iommu->ir_table)
446        return -ENOMEM;
447
448    pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
449                 INTR_REMAP_PAGE_ORDER);
450
451    if (!pages) {
452        printk(KERN_ERR "failed to allocate pages of order %d\n",
453               INTR_REMAP_PAGE_ORDER);
454        kfree(iommu->ir_table);
455        return -ENOMEM;
456    }
457
458    ir_table->base = page_address(pages);
459
460    iommu_set_irq_remapping(iommu, mode);
461    return 0;
462}
463
464/*
465 * Disable Interrupt Remapping.
466 */
467static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
468{
469    unsigned long flags;
470    u32 sts;
471
472    if (!ecap_ir_support(iommu->ecap))
473        return;
474
475    /*
476     * global invalidation of interrupt entry cache before disabling
477     * interrupt-remapping.
478     */
479    qi_global_iec(iommu);
480
481    raw_spin_lock_irqsave(&iommu->register_lock, flags);
482
483    sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
484    if (!(sts & DMA_GSTS_IRES))
485        goto end;
486
487    iommu->gcmd &= ~DMA_GCMD_IRE;
488    writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
489
490    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
491              readl, !(sts & DMA_GSTS_IRES), sts);
492
493end:
494    raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
495}
496
497static int __init dmar_x2apic_optout(void)
498{
499    struct acpi_table_dmar *dmar;
500    dmar = (struct acpi_table_dmar *)dmar_tbl;
501    if (!dmar || no_x2apic_optout)
502        return 0;
503    return dmar->flags & DMAR_X2APIC_OPT_OUT;
504}
505
506static int __init intel_irq_remapping_supported(void)
507{
508    struct dmar_drhd_unit *drhd;
509
510    if (disable_irq_remap)
511        return 0;
512
513    if (!dmar_ir_support())
514        return 0;
515
516    for_each_drhd_unit(drhd) {
517        struct intel_iommu *iommu = drhd->iommu;
518
519        if (!ecap_ir_support(iommu->ecap))
520            return 0;
521    }
522
523    return 1;
524}
525
526static int __init intel_enable_irq_remapping(void)
527{
528    struct dmar_drhd_unit *drhd;
529    int setup = 0;
530    int eim = 0;
531
532    if (parse_ioapics_under_ir() != 1) {
533        printk(KERN_INFO "Not enable interrupt remapping\n");
534        return -1;
535    }
536
537    if (x2apic_supported()) {
538        eim = !dmar_x2apic_optout();
539        WARN(!eim, KERN_WARNING
540               "Your BIOS is broken and requested that x2apic be disabled\n"
541               "This will leave your machine vulnerable to irq-injection attacks\n"
542               "Use 'intremap=no_x2apic_optout' to override BIOS request\n");
543    }
544
545    for_each_drhd_unit(drhd) {
546        struct intel_iommu *iommu = drhd->iommu;
547
548        /*
549         * If the queued invalidation is already initialized,
550         * shouldn't disable it.
551         */
552        if (iommu->qi)
553            continue;
554
555        /*
556         * Clear previous faults.
557         */
558        dmar_fault(-1, iommu);
559
560        /*
561         * Disable intr remapping and queued invalidation, if already
562         * enabled prior to OS handover.
563         */
564        iommu_disable_irq_remapping(iommu);
565
566        dmar_disable_qi(iommu);
567    }
568
569    /*
570     * check for the Interrupt-remapping support
571     */
572    for_each_drhd_unit(drhd) {
573        struct intel_iommu *iommu = drhd->iommu;
574
575        if (!ecap_ir_support(iommu->ecap))
576            continue;
577
578        if (eim && !ecap_eim_support(iommu->ecap)) {
579            printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
580                   " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
581            return -1;
582        }
583    }
584
585    /*
586     * Enable queued invalidation for all the DRHD's.
587     */
588    for_each_drhd_unit(drhd) {
589        int ret;
590        struct intel_iommu *iommu = drhd->iommu;
591        ret = dmar_enable_qi(iommu);
592
593        if (ret) {
594            printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
595                   " invalidation, ecap %Lx, ret %d\n",
596                   drhd->reg_base_addr, iommu->ecap, ret);
597            return -1;
598        }
599    }
600
601    /*
602     * Setup Interrupt-remapping for all the DRHD's now.
603     */
604    for_each_drhd_unit(drhd) {
605        struct intel_iommu *iommu = drhd->iommu;
606
607        if (!ecap_ir_support(iommu->ecap))
608            continue;
609
610        if (intel_setup_irq_remapping(iommu, eim))
611            goto error;
612
613        setup = 1;
614    }
615
616    if (!setup)
617        goto error;
618
619    irq_remapping_enabled = 1;
620    pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
621
622    return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
623
624error:
625    /*
626     * handle error condition gracefully here!
627     */
628    return -1;
629}
630
631static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
632                      struct intel_iommu *iommu)
633{
634    struct acpi_dmar_pci_path *path;
635    u8 bus;
636    int count;
637
638    bus = scope->bus;
639    path = (struct acpi_dmar_pci_path *)(scope + 1);
640    count = (scope->length - sizeof(struct acpi_dmar_device_scope))
641        / sizeof(struct acpi_dmar_pci_path);
642
643    while (--count > 0) {
644        /*
645         * Access PCI directly due to the PCI
646         * subsystem isn't initialized yet.
647         */
648        bus = read_pci_config_byte(bus, path->dev, path->fn,
649                       PCI_SECONDARY_BUS);
650        path++;
651    }
652    ir_hpet[ir_hpet_num].bus = bus;
653    ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn);
654    ir_hpet[ir_hpet_num].iommu = iommu;
655    ir_hpet[ir_hpet_num].id = scope->enumeration_id;
656    ir_hpet_num++;
657}
658
659static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
660                      struct intel_iommu *iommu)
661{
662    struct acpi_dmar_pci_path *path;
663    u8 bus;
664    int count;
665
666    bus = scope->bus;
667    path = (struct acpi_dmar_pci_path *)(scope + 1);
668    count = (scope->length - sizeof(struct acpi_dmar_device_scope))
669        / sizeof(struct acpi_dmar_pci_path);
670
671    while (--count > 0) {
672        /*
673         * Access PCI directly due to the PCI
674         * subsystem isn't initialized yet.
675         */
676        bus = read_pci_config_byte(bus, path->dev, path->fn,
677                       PCI_SECONDARY_BUS);
678        path++;
679    }
680
681    ir_ioapic[ir_ioapic_num].bus = bus;
682    ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
683    ir_ioapic[ir_ioapic_num].iommu = iommu;
684    ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
685    ir_ioapic_num++;
686}
687
688static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
689                      struct intel_iommu *iommu)
690{
691    struct acpi_dmar_hardware_unit *drhd;
692    struct acpi_dmar_device_scope *scope;
693    void *start, *end;
694
695    drhd = (struct acpi_dmar_hardware_unit *)header;
696
697    start = (void *)(drhd + 1);
698    end = ((void *)drhd) + header->length;
699
700    while (start < end) {
701        scope = start;
702        if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
703            if (ir_ioapic_num == MAX_IO_APICS) {
704                printk(KERN_WARNING "Exceeded Max IO APICS\n");
705                return -1;
706            }
707
708            printk(KERN_INFO "IOAPIC id %d under DRHD base "
709                   " 0x%Lx IOMMU %d\n", scope->enumeration_id,
710                   drhd->address, iommu->seq_id);
711
712            ir_parse_one_ioapic_scope(scope, iommu);
713        } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
714            if (ir_hpet_num == MAX_HPET_TBS) {
715                printk(KERN_WARNING "Exceeded Max HPET blocks\n");
716                return -1;
717            }
718
719            printk(KERN_INFO "HPET id %d under DRHD base"
720                   " 0x%Lx\n", scope->enumeration_id,
721                   drhd->address);
722
723            ir_parse_one_hpet_scope(scope, iommu);
724        }
725        start += scope->length;
726    }
727
728    return 0;
729}
730
731/*
732 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
733 * hardware unit.
734 */
735int __init parse_ioapics_under_ir(void)
736{
737    struct dmar_drhd_unit *drhd;
738    int ir_supported = 0;
739    int ioapic_idx;
740
741    for_each_drhd_unit(drhd) {
742        struct intel_iommu *iommu = drhd->iommu;
743
744        if (ecap_ir_support(iommu->ecap)) {
745            if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
746                return -1;
747
748            ir_supported = 1;
749        }
750    }
751
752    if (!ir_supported)
753        return 0;
754
755    for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
756        int ioapic_id = mpc_ioapic_id(ioapic_idx);
757        if (!map_ioapic_to_ir(ioapic_id)) {
758            pr_err(FW_BUG "ioapic %d has no mapping iommu, "
759                   "interrupt remapping will be disabled\n",
760                   ioapic_id);
761            return -1;
762        }
763    }
764
765    return 1;
766}
767
768int __init ir_dev_scope_init(void)
769{
770    if (!irq_remapping_enabled)
771        return 0;
772
773    return dmar_dev_scope_init();
774}
775rootfs_initcall(ir_dev_scope_init);
776
777static void disable_irq_remapping(void)
778{
779    struct dmar_drhd_unit *drhd;
780    struct intel_iommu *iommu = NULL;
781
782    /*
783     * Disable Interrupt-remapping for all the DRHD's now.
784     */
785    for_each_iommu(iommu, drhd) {
786        if (!ecap_ir_support(iommu->ecap))
787            continue;
788
789        iommu_disable_irq_remapping(iommu);
790    }
791}
792
793static int reenable_irq_remapping(int eim)
794{
795    struct dmar_drhd_unit *drhd;
796    int setup = 0;
797    struct intel_iommu *iommu = NULL;
798
799    for_each_iommu(iommu, drhd)
800        if (iommu->qi)
801            dmar_reenable_qi(iommu);
802
803    /*
804     * Setup Interrupt-remapping for all the DRHD's now.
805     */
806    for_each_iommu(iommu, drhd) {
807        if (!ecap_ir_support(iommu->ecap))
808            continue;
809
810        /* Set up interrupt remapping for iommu.*/
811        iommu_set_irq_remapping(iommu, eim);
812        setup = 1;
813    }
814
815    if (!setup)
816        goto error;
817
818    return 0;
819
820error:
821    /*
822     * handle error condition gracefully here!
823     */
824    return -1;
825}
826
827static void prepare_irte(struct irte *irte, int vector,
828             unsigned int dest)
829{
830    memset(irte, 0, sizeof(*irte));
831
832    irte->present = 1;
833    irte->dst_mode = apic->irq_dest_mode;
834    /*
835     * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
836     * actual level or edge trigger will be setup in the IO-APIC
837     * RTE. This will help simplify level triggered irq migration.
838     * For more details, see the comments (in io_apic.c) explainig IO-APIC
839     * irq migration in the presence of interrupt-remapping.
840    */
841    irte->trigger_mode = 0;
842    irte->dlvry_mode = apic->irq_delivery_mode;
843    irte->vector = vector;
844    irte->dest_id = IRTE_DEST(dest);
845    irte->redir_hint = 1;
846}
847
848static int intel_setup_ioapic_entry(int irq,
849                    struct IO_APIC_route_entry *route_entry,
850                    unsigned int destination, int vector,
851                    struct io_apic_irq_attr *attr)
852{
853    int ioapic_id = mpc_ioapic_id(attr->ioapic);
854    struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
855    struct IR_IO_APIC_route_entry *entry;
856    struct irte irte;
857    int index;
858
859    if (!iommu) {
860        pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
861        return -ENODEV;
862    }
863
864    entry = (struct IR_IO_APIC_route_entry *)route_entry;
865
866    index = alloc_irte(iommu, irq, 1);
867    if (index < 0) {
868        pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
869        return -ENOMEM;
870    }
871
872    prepare_irte(&irte, vector, destination);
873
874    /* Set source-id of interrupt request */
875    set_ioapic_sid(&irte, ioapic_id);
876
877    modify_irte(irq, &irte);
878
879    apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
880        "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
881        "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
882        "Avail:%X Vector:%02X Dest:%08X "
883        "SID:%04X SQ:%X SVT:%X)\n",
884        attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
885        irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
886        irte.avail, irte.vector, irte.dest_id,
887        irte.sid, irte.sq, irte.svt);
888
889    memset(entry, 0, sizeof(*entry));
890
891    entry->index2 = (index >> 15) & 0x1;
892    entry->zero = 0;
893    entry->format = 1;
894    entry->index = (index & 0x7fff);
895    /*
896     * IO-APIC RTE will be configured with virtual vector.
897     * irq handler will do the explicit EOI to the io-apic.
898     */
899    entry->vector = attr->ioapic_pin;
900    entry->mask = 0; /* enable IRQ */
901    entry->trigger = attr->trigger;
902    entry->polarity = attr->polarity;
903
904    /* Mask level triggered irqs.
905     * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
906     */
907    if (attr->trigger)
908        entry->mask = 1;
909
910    return 0;
911}
912
913/*
914 * Migrate the IO-APIC irq in the presence of intr-remapping.
915 *
916 * For both level and edge triggered, irq migration is a simple atomic
917 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
918 *
919 * For level triggered, we eliminate the io-apic RTE modification (with the
920 * updated vector information), by using a virtual vector (io-apic pin number).
921 * Real vector that is used for interrupting cpu will be coming from
922 * the interrupt-remapping table entry.
923 *
924 * As the migration is a simple atomic update of IRTE, the same mechanism
925 * is used to migrate MSI irq's in the presence of interrupt-remapping.
926 */
927static int
928intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
929              bool force)
930{
931    struct irq_cfg *cfg = data->chip_data;
932    unsigned int dest, irq = data->irq;
933    struct irte irte;
934    int err;
935
936    if (!config_enabled(CONFIG_SMP))
937        return -EINVAL;
938
939    if (!cpumask_intersects(mask, cpu_online_mask))
940        return -EINVAL;
941
942    if (get_irte(irq, &irte))
943        return -EBUSY;
944
945    err = assign_irq_vector(irq, cfg, mask);
946    if (err)
947        return err;
948
949    err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
950    if (err) {
951        if (assign_irq_vector(irq, cfg, data->affinity))
952            pr_err("Failed to recover vector for irq %d\n", irq);
953        return err;
954    }
955
956    irte.vector = cfg->vector;
957    irte.dest_id = IRTE_DEST(dest);
958
959    /*
960     * Atomically updates the IRTE with the new destination, vector
961     * and flushes the interrupt entry cache.
962     */
963    modify_irte(irq, &irte);
964
965    /*
966     * After this point, all the interrupts will start arriving
967     * at the new destination. So, time to cleanup the previous
968     * vector allocation.
969     */
970    if (cfg->move_in_progress)
971        send_cleanup_vector(cfg);
972
973    cpumask_copy(data->affinity, mask);
974    return 0;
975}
976
977static void intel_compose_msi_msg(struct pci_dev *pdev,
978                  unsigned int irq, unsigned int dest,
979                  struct msi_msg *msg, u8 hpet_id)
980{
981    struct irq_cfg *cfg;
982    struct irte irte;
983    u16 sub_handle = 0;
984    int ir_index;
985
986    cfg = irq_get_chip_data(irq);
987
988    ir_index = map_irq_to_irte_handle(irq, &sub_handle);
989    BUG_ON(ir_index == -1);
990
991    prepare_irte(&irte, cfg->vector, dest);
992
993    /* Set source-id of interrupt request */
994    if (pdev)
995        set_msi_sid(&irte, pdev);
996    else
997        set_hpet_sid(&irte, hpet_id);
998
999    modify_irte(irq, &irte);
1000
1001    msg->address_hi = MSI_ADDR_BASE_HI;
1002    msg->data = sub_handle;
1003    msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
1004              MSI_ADDR_IR_SHV |
1005              MSI_ADDR_IR_INDEX1(ir_index) |
1006              MSI_ADDR_IR_INDEX2(ir_index);
1007}
1008
1009/*
1010 * Map the PCI dev to the corresponding remapping hardware unit
1011 * and allocate 'nvec' consecutive interrupt-remapping table entries
1012 * in it.
1013 */
1014static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
1015{
1016    struct intel_iommu *iommu;
1017    int index;
1018
1019    iommu = map_dev_to_ir(dev);
1020    if (!iommu) {
1021        printk(KERN_ERR
1022               "Unable to map PCI %s to iommu\n", pci_name(dev));
1023        return -ENOENT;
1024    }
1025
1026    index = alloc_irte(iommu, irq, nvec);
1027    if (index < 0) {
1028        printk(KERN_ERR
1029               "Unable to allocate %d IRTE for PCI %s\n", nvec,
1030               pci_name(dev));
1031        return -ENOSPC;
1032    }
1033    return index;
1034}
1035
1036static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
1037                   int index, int sub_handle)
1038{
1039    struct intel_iommu *iommu;
1040
1041    iommu = map_dev_to_ir(pdev);
1042    if (!iommu)
1043        return -ENOENT;
1044    /*
1045     * setup the mapping between the irq and the IRTE
1046     * base index, the sub_handle pointing to the
1047     * appropriate interrupt remap table entry.
1048     */
1049    set_irte_irq(irq, iommu, index, sub_handle);
1050
1051    return 0;
1052}
1053
1054static int intel_setup_hpet_msi(unsigned int irq, unsigned int id)
1055{
1056    struct intel_iommu *iommu = map_hpet_to_ir(id);
1057    int index;
1058
1059    if (!iommu)
1060        return -1;
1061
1062    index = alloc_irte(iommu, irq, 1);
1063    if (index < 0)
1064        return -1;
1065
1066    return 0;
1067}
1068
1069struct irq_remap_ops intel_irq_remap_ops = {
1070    .supported = intel_irq_remapping_supported,
1071    .prepare = dmar_table_init,
1072    .enable = intel_enable_irq_remapping,
1073    .disable = disable_irq_remapping,
1074    .reenable = reenable_irq_remapping,
1075    .enable_faulting = enable_drhd_fault_handling,
1076    .setup_ioapic_entry = intel_setup_ioapic_entry,
1077    .set_affinity = intel_ioapic_set_affinity,
1078    .free_irq = free_irte,
1079    .compose_msi_msg = intel_compose_msi_msg,
1080    .msi_alloc_irq = intel_msi_alloc_irq,
1081    .msi_setup_irq = intel_msi_setup_irq,
1082    .setup_hpet_msi = intel_setup_hpet_msi,
1083};
1084

Archive Download this file



interactive