Root/drivers/iommu/dmar.c

1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 *
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
27 */
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/iova.h>
34#include <linux/intel-iommu.h>
35#include <linux/timer.h>
36#include <linux/irq.h>
37#include <linux/interrupt.h>
38#include <linux/tboot.h>
39#include <linux/dmi.h>
40#include <linux/slab.h>
41#include <asm/irq_remapping.h>
42#include <asm/iommu_table.h>
43
44/* No locks are needed as DMA remapping hardware unit
45 * list is constructed at boot time and hotplug of
46 * these units are not supported by the architecture.
47 */
48LIST_HEAD(dmar_drhd_units);
49
50struct acpi_table_header * __initdata dmar_tbl;
51static acpi_size dmar_tbl_size;
52
53static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
54{
55    /*
56     * add INCLUDE_ALL at the tail, so scan the list will find it at
57     * the very end.
58     */
59    if (drhd->include_all)
60        list_add_tail(&drhd->list, &dmar_drhd_units);
61    else
62        list_add(&drhd->list, &dmar_drhd_units);
63}
64
65static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
66                       struct pci_dev **dev, u16 segment)
67{
68    struct pci_bus *bus;
69    struct pci_dev *pdev = NULL;
70    struct acpi_dmar_pci_path *path;
71    int count;
72
73    bus = pci_find_bus(segment, scope->bus);
74    path = (struct acpi_dmar_pci_path *)(scope + 1);
75    count = (scope->length - sizeof(struct acpi_dmar_device_scope))
76        / sizeof(struct acpi_dmar_pci_path);
77
78    while (count) {
79        if (pdev)
80            pci_dev_put(pdev);
81        /*
82         * Some BIOSes list non-exist devices in DMAR table, just
83         * ignore it
84         */
85        if (!bus) {
86            pr_warn("Device scope bus [%d] not found\n", scope->bus);
87            break;
88        }
89        pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
90        if (!pdev) {
91            /* warning will be printed below */
92            break;
93        }
94        path ++;
95        count --;
96        bus = pdev->subordinate;
97    }
98    if (!pdev) {
99        pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
100            segment, scope->bus, path->dev, path->fn);
101        *dev = NULL;
102        return 0;
103    }
104    if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
105            pdev->subordinate) || (scope->entry_type == \
106            ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
107        pci_dev_put(pdev);
108        pr_warn("Device scope type does not match for %s\n",
109            pci_name(pdev));
110        return -EINVAL;
111    }
112    *dev = pdev;
113    return 0;
114}
115
116int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
117                struct pci_dev ***devices, u16 segment)
118{
119    struct acpi_dmar_device_scope *scope;
120    void * tmp = start;
121    int index;
122    int ret;
123
124    *cnt = 0;
125    while (start < end) {
126        scope = start;
127        if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
128            scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
129            (*cnt)++;
130        else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
131            pr_warn("Unsupported device scope\n");
132        }
133        start += scope->length;
134    }
135    if (*cnt == 0)
136        return 0;
137
138    *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
139    if (!*devices)
140        return -ENOMEM;
141
142    start = tmp;
143    index = 0;
144    while (start < end) {
145        scope = start;
146        if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
147            scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
148            ret = dmar_parse_one_dev_scope(scope,
149                &(*devices)[index], segment);
150            if (ret) {
151                kfree(*devices);
152                return ret;
153            }
154            index ++;
155        }
156        start += scope->length;
157    }
158
159    return 0;
160}
161
162/**
163 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
164 * structure which uniquely represent one DMA remapping hardware unit
165 * present in the platform
166 */
167static int __init
168dmar_parse_one_drhd(struct acpi_dmar_header *header)
169{
170    struct acpi_dmar_hardware_unit *drhd;
171    struct dmar_drhd_unit *dmaru;
172    int ret = 0;
173
174    drhd = (struct acpi_dmar_hardware_unit *)header;
175    dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
176    if (!dmaru)
177        return -ENOMEM;
178
179    dmaru->hdr = header;
180    dmaru->reg_base_addr = drhd->address;
181    dmaru->segment = drhd->segment;
182    dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
183
184    ret = alloc_iommu(dmaru);
185    if (ret) {
186        kfree(dmaru);
187        return ret;
188    }
189    dmar_register_drhd_unit(dmaru);
190    return 0;
191}
192
193static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
194{
195    struct acpi_dmar_hardware_unit *drhd;
196    int ret = 0;
197
198    drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
200    if (dmaru->include_all)
201        return 0;
202
203    ret = dmar_parse_dev_scope((void *)(drhd + 1),
204                ((void *)drhd) + drhd->header.length,
205                &dmaru->devices_cnt, &dmaru->devices,
206                drhd->segment);
207    if (ret) {
208        list_del(&dmaru->list);
209        kfree(dmaru);
210    }
211    return ret;
212}
213
214#ifdef CONFIG_ACPI_NUMA
215static int __init
216dmar_parse_one_rhsa(struct acpi_dmar_header *header)
217{
218    struct acpi_dmar_rhsa *rhsa;
219    struct dmar_drhd_unit *drhd;
220
221    rhsa = (struct acpi_dmar_rhsa *)header;
222    for_each_drhd_unit(drhd) {
223        if (drhd->reg_base_addr == rhsa->base_address) {
224            int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
225
226            if (!node_online(node))
227                node = -1;
228            drhd->iommu->node = node;
229            return 0;
230        }
231    }
232    WARN_TAINT(
233        1, TAINT_FIRMWARE_WORKAROUND,
234        "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
235        "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
236        drhd->reg_base_addr,
237        dmi_get_system_info(DMI_BIOS_VENDOR),
238        dmi_get_system_info(DMI_BIOS_VERSION),
239        dmi_get_system_info(DMI_PRODUCT_VERSION));
240
241    return 0;
242}
243#endif
244
245static void __init
246dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
247{
248    struct acpi_dmar_hardware_unit *drhd;
249    struct acpi_dmar_reserved_memory *rmrr;
250    struct acpi_dmar_atsr *atsr;
251    struct acpi_dmar_rhsa *rhsa;
252
253    switch (header->type) {
254    case ACPI_DMAR_TYPE_HARDWARE_UNIT:
255        drhd = container_of(header, struct acpi_dmar_hardware_unit,
256                    header);
257        pr_info("DRHD base: %#016Lx flags: %#x\n",
258            (unsigned long long)drhd->address, drhd->flags);
259        break;
260    case ACPI_DMAR_TYPE_RESERVED_MEMORY:
261        rmrr = container_of(header, struct acpi_dmar_reserved_memory,
262                    header);
263        pr_info("RMRR base: %#016Lx end: %#016Lx\n",
264            (unsigned long long)rmrr->base_address,
265            (unsigned long long)rmrr->end_address);
266        break;
267    case ACPI_DMAR_TYPE_ATSR:
268        atsr = container_of(header, struct acpi_dmar_atsr, header);
269        pr_info("ATSR flags: %#x\n", atsr->flags);
270        break;
271    case ACPI_DMAR_HARDWARE_AFFINITY:
272        rhsa = container_of(header, struct acpi_dmar_rhsa, header);
273        pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
274               (unsigned long long)rhsa->base_address,
275               rhsa->proximity_domain);
276        break;
277    }
278}
279
280/**
281 * dmar_table_detect - checks to see if the platform supports DMAR devices
282 */
283static int __init dmar_table_detect(void)
284{
285    acpi_status status = AE_OK;
286
287    /* if we could find DMAR table, then there are DMAR devices */
288    status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
289                (struct acpi_table_header **)&dmar_tbl,
290                &dmar_tbl_size);
291
292    if (ACPI_SUCCESS(status) && !dmar_tbl) {
293        pr_warn("Unable to map DMAR\n");
294        status = AE_NOT_FOUND;
295    }
296
297    return (ACPI_SUCCESS(status) ? 1 : 0);
298}
299
300/**
301 * parse_dmar_table - parses the DMA reporting table
302 */
303static int __init
304parse_dmar_table(void)
305{
306    struct acpi_table_dmar *dmar;
307    struct acpi_dmar_header *entry_header;
308    int ret = 0;
309
310    /*
311     * Do it again, earlier dmar_tbl mapping could be mapped with
312     * fixed map.
313     */
314    dmar_table_detect();
315
316    /*
317     * ACPI tables may not be DMA protected by tboot, so use DMAR copy
318     * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
319     */
320    dmar_tbl = tboot_get_dmar_table(dmar_tbl);
321
322    dmar = (struct acpi_table_dmar *)dmar_tbl;
323    if (!dmar)
324        return -ENODEV;
325
326    if (dmar->width < PAGE_SHIFT - 1) {
327        pr_warn("Invalid DMAR haw\n");
328        return -EINVAL;
329    }
330
331    pr_info("Host address width %d\n", dmar->width + 1);
332
333    entry_header = (struct acpi_dmar_header *)(dmar + 1);
334    while (((unsigned long)entry_header) <
335            (((unsigned long)dmar) + dmar_tbl->length)) {
336        /* Avoid looping forever on bad ACPI tables */
337        if (entry_header->length == 0) {
338            pr_warn("Invalid 0-length structure\n");
339            ret = -EINVAL;
340            break;
341        }
342
343        dmar_table_print_dmar_entry(entry_header);
344
345        switch (entry_header->type) {
346        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
347            ret = dmar_parse_one_drhd(entry_header);
348            break;
349        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
350            ret = dmar_parse_one_rmrr(entry_header);
351            break;
352        case ACPI_DMAR_TYPE_ATSR:
353            ret = dmar_parse_one_atsr(entry_header);
354            break;
355        case ACPI_DMAR_HARDWARE_AFFINITY:
356#ifdef CONFIG_ACPI_NUMA
357            ret = dmar_parse_one_rhsa(entry_header);
358#endif
359            break;
360        default:
361            pr_warn("Unknown DMAR structure type %d\n",
362                entry_header->type);
363            ret = 0; /* for forward compatibility */
364            break;
365        }
366        if (ret)
367            break;
368
369        entry_header = ((void *)entry_header + entry_header->length);
370    }
371    return ret;
372}
373
374static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
375              struct pci_dev *dev)
376{
377    int index;
378
379    while (dev) {
380        for (index = 0; index < cnt; index++)
381            if (dev == devices[index])
382                return 1;
383
384        /* Check our parent */
385        dev = dev->bus->self;
386    }
387
388    return 0;
389}
390
391struct dmar_drhd_unit *
392dmar_find_matched_drhd_unit(struct pci_dev *dev)
393{
394    struct dmar_drhd_unit *dmaru = NULL;
395    struct acpi_dmar_hardware_unit *drhd;
396
397    dev = pci_physfn(dev);
398
399    list_for_each_entry(dmaru, &dmar_drhd_units, list) {
400        drhd = container_of(dmaru->hdr,
401                    struct acpi_dmar_hardware_unit,
402                    header);
403
404        if (dmaru->include_all &&
405            drhd->segment == pci_domain_nr(dev->bus))
406            return dmaru;
407
408        if (dmar_pci_device_match(dmaru->devices,
409                      dmaru->devices_cnt, dev))
410            return dmaru;
411    }
412
413    return NULL;
414}
415
416int __init dmar_dev_scope_init(void)
417{
418    static int dmar_dev_scope_initialized;
419    struct dmar_drhd_unit *drhd, *drhd_n;
420    int ret = -ENODEV;
421
422    if (dmar_dev_scope_initialized)
423        return dmar_dev_scope_initialized;
424
425    if (list_empty(&dmar_drhd_units))
426        goto fail;
427
428    list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
429        ret = dmar_parse_dev(drhd);
430        if (ret)
431            goto fail;
432    }
433
434    ret = dmar_parse_rmrr_atsr_dev();
435    if (ret)
436        goto fail;
437
438    dmar_dev_scope_initialized = 1;
439    return 0;
440
441fail:
442    dmar_dev_scope_initialized = ret;
443    return ret;
444}
445
446
447int __init dmar_table_init(void)
448{
449    static int dmar_table_initialized;
450    int ret;
451
452    if (dmar_table_initialized)
453        return 0;
454
455    dmar_table_initialized = 1;
456
457    ret = parse_dmar_table();
458    if (ret) {
459        if (ret != -ENODEV)
460            pr_info("parse DMAR table failure.\n");
461        return ret;
462    }
463
464    if (list_empty(&dmar_drhd_units)) {
465        pr_info("No DMAR devices found\n");
466        return -ENODEV;
467    }
468
469    return 0;
470}
471
472static void warn_invalid_dmar(u64 addr, const char *message)
473{
474    WARN_TAINT_ONCE(
475        1, TAINT_FIRMWARE_WORKAROUND,
476        "Your BIOS is broken; DMAR reported at address %llx%s!\n"
477        "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
478        addr, message,
479        dmi_get_system_info(DMI_BIOS_VENDOR),
480        dmi_get_system_info(DMI_BIOS_VERSION),
481        dmi_get_system_info(DMI_PRODUCT_VERSION));
482}
483
484int __init check_zero_address(void)
485{
486    struct acpi_table_dmar *dmar;
487    struct acpi_dmar_header *entry_header;
488    struct acpi_dmar_hardware_unit *drhd;
489
490    dmar = (struct acpi_table_dmar *)dmar_tbl;
491    entry_header = (struct acpi_dmar_header *)(dmar + 1);
492
493    while (((unsigned long)entry_header) <
494            (((unsigned long)dmar) + dmar_tbl->length)) {
495        /* Avoid looping forever on bad ACPI tables */
496        if (entry_header->length == 0) {
497            pr_warn("Invalid 0-length structure\n");
498            return 0;
499        }
500
501        if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
502            void __iomem *addr;
503            u64 cap, ecap;
504
505            drhd = (void *)entry_header;
506            if (!drhd->address) {
507                warn_invalid_dmar(0, "");
508                goto failed;
509            }
510
511            addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
512            if (!addr ) {
513                printk("IOMMU: can't validate: %llx\n", drhd->address);
514                goto failed;
515            }
516            cap = dmar_readq(addr + DMAR_CAP_REG);
517            ecap = dmar_readq(addr + DMAR_ECAP_REG);
518            early_iounmap(addr, VTD_PAGE_SIZE);
519            if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
520                warn_invalid_dmar(drhd->address,
521                          " returns all ones");
522                goto failed;
523            }
524        }
525
526        entry_header = ((void *)entry_header + entry_header->length);
527    }
528    return 1;
529
530failed:
531    return 0;
532}
533
534int __init detect_intel_iommu(void)
535{
536    int ret;
537
538    ret = dmar_table_detect();
539    if (ret)
540        ret = check_zero_address();
541    {
542        struct acpi_table_dmar *dmar;
543
544        dmar = (struct acpi_table_dmar *) dmar_tbl;
545
546        if (ret && irq_remapping_enabled && cpu_has_x2apic &&
547            dmar->flags & 0x1)
548            pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
549
550        if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
551            iommu_detected = 1;
552            /* Make sure ACS will be enabled */
553            pci_request_acs();
554        }
555
556#ifdef CONFIG_X86
557        if (ret)
558            x86_init.iommu.iommu_init = intel_iommu_init;
559#endif
560    }
561    early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
562    dmar_tbl = NULL;
563
564    return ret ? 1 : -ENODEV;
565}
566
567
568static void unmap_iommu(struct intel_iommu *iommu)
569{
570    iounmap(iommu->reg);
571    release_mem_region(iommu->reg_phys, iommu->reg_size);
572}
573
574/**
575 * map_iommu: map the iommu's registers
576 * @iommu: the iommu to map
577 * @phys_addr: the physical address of the base resgister
578 *
579 * Memory map the iommu's registers. Start w/ a single page, and
580 * possibly expand if that turns out to be insufficent.
581 */
582static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
583{
584    int map_size, err=0;
585
586    iommu->reg_phys = phys_addr;
587    iommu->reg_size = VTD_PAGE_SIZE;
588
589    if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
590        pr_err("IOMMU: can't reserve memory\n");
591        err = -EBUSY;
592        goto out;
593    }
594
595    iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
596    if (!iommu->reg) {
597        pr_err("IOMMU: can't map the region\n");
598        err = -ENOMEM;
599        goto release;
600    }
601
602    iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
603    iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
604
605    if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
606        err = -EINVAL;
607        warn_invalid_dmar(phys_addr, " returns all ones");
608        goto unmap;
609    }
610
611    /* the registers might be more than one page */
612    map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
613             cap_max_fault_reg_offset(iommu->cap));
614    map_size = VTD_PAGE_ALIGN(map_size);
615    if (map_size > iommu->reg_size) {
616        iounmap(iommu->reg);
617        release_mem_region(iommu->reg_phys, iommu->reg_size);
618        iommu->reg_size = map_size;
619        if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
620                    iommu->name)) {
621            pr_err("IOMMU: can't reserve memory\n");
622            err = -EBUSY;
623            goto out;
624        }
625        iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
626        if (!iommu->reg) {
627            pr_err("IOMMU: can't map the region\n");
628            err = -ENOMEM;
629            goto release;
630        }
631    }
632    err = 0;
633    goto out;
634
635unmap:
636    iounmap(iommu->reg);
637release:
638    release_mem_region(iommu->reg_phys, iommu->reg_size);
639out:
640    return err;
641}
642
643int alloc_iommu(struct dmar_drhd_unit *drhd)
644{
645    struct intel_iommu *iommu;
646    u32 ver;
647    static int iommu_allocated = 0;
648    int agaw = 0;
649    int msagaw = 0;
650    int err;
651
652    if (!drhd->reg_base_addr) {
653        warn_invalid_dmar(0, "");
654        return -EINVAL;
655    }
656
657    iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
658    if (!iommu)
659        return -ENOMEM;
660
661    iommu->seq_id = iommu_allocated++;
662    sprintf (iommu->name, "dmar%d", iommu->seq_id);
663
664    err = map_iommu(iommu, drhd->reg_base_addr);
665    if (err) {
666        pr_err("IOMMU: failed to map %s\n", iommu->name);
667        goto error;
668    }
669
670    err = -EINVAL;
671    agaw = iommu_calculate_agaw(iommu);
672    if (agaw < 0) {
673        pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
674            iommu->seq_id);
675        goto err_unmap;
676    }
677    msagaw = iommu_calculate_max_sagaw(iommu);
678    if (msagaw < 0) {
679        pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
680            iommu->seq_id);
681        goto err_unmap;
682    }
683    iommu->agaw = agaw;
684    iommu->msagaw = msagaw;
685
686    iommu->node = -1;
687
688    ver = readl(iommu->reg + DMAR_VER_REG);
689    pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
690        iommu->seq_id,
691        (unsigned long long)drhd->reg_base_addr,
692        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
693        (unsigned long long)iommu->cap,
694        (unsigned long long)iommu->ecap);
695
696    raw_spin_lock_init(&iommu->register_lock);
697
698    drhd->iommu = iommu;
699    return 0;
700
701 err_unmap:
702    unmap_iommu(iommu);
703 error:
704    kfree(iommu);
705    return err;
706}
707
708void free_iommu(struct intel_iommu *iommu)
709{
710    if (!iommu)
711        return;
712
713    free_dmar_iommu(iommu);
714
715    if (iommu->reg)
716        unmap_iommu(iommu);
717
718    kfree(iommu);
719}
720
721/*
722 * Reclaim all the submitted descriptors which have completed its work.
723 */
724static inline void reclaim_free_desc(struct q_inval *qi)
725{
726    while (qi->desc_status[qi->free_tail] == QI_DONE ||
727           qi->desc_status[qi->free_tail] == QI_ABORT) {
728        qi->desc_status[qi->free_tail] = QI_FREE;
729        qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
730        qi->free_cnt++;
731    }
732}
733
734static int qi_check_fault(struct intel_iommu *iommu, int index)
735{
736    u32 fault;
737    int head, tail;
738    struct q_inval *qi = iommu->qi;
739    int wait_index = (index + 1) % QI_LENGTH;
740
741    if (qi->desc_status[wait_index] == QI_ABORT)
742        return -EAGAIN;
743
744    fault = readl(iommu->reg + DMAR_FSTS_REG);
745
746    /*
747     * If IQE happens, the head points to the descriptor associated
748     * with the error. No new descriptors are fetched until the IQE
749     * is cleared.
750     */
751    if (fault & DMA_FSTS_IQE) {
752        head = readl(iommu->reg + DMAR_IQH_REG);
753        if ((head >> DMAR_IQ_SHIFT) == index) {
754            pr_err("VT-d detected invalid descriptor: "
755                "low=%llx, high=%llx\n",
756                (unsigned long long)qi->desc[index].low,
757                (unsigned long long)qi->desc[index].high);
758            memcpy(&qi->desc[index], &qi->desc[wait_index],
759                    sizeof(struct qi_desc));
760            __iommu_flush_cache(iommu, &qi->desc[index],
761                    sizeof(struct qi_desc));
762            writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
763            return -EINVAL;
764        }
765    }
766
767    /*
768     * If ITE happens, all pending wait_desc commands are aborted.
769     * No new descriptors are fetched until the ITE is cleared.
770     */
771    if (fault & DMA_FSTS_ITE) {
772        head = readl(iommu->reg + DMAR_IQH_REG);
773        head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
774        head |= 1;
775        tail = readl(iommu->reg + DMAR_IQT_REG);
776        tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
777
778        writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
779
780        do {
781            if (qi->desc_status[head] == QI_IN_USE)
782                qi->desc_status[head] = QI_ABORT;
783            head = (head - 2 + QI_LENGTH) % QI_LENGTH;
784        } while (head != tail);
785
786        if (qi->desc_status[wait_index] == QI_ABORT)
787            return -EAGAIN;
788    }
789
790    if (fault & DMA_FSTS_ICE)
791        writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
792
793    return 0;
794}
795
796/*
797 * Submit the queued invalidation descriptor to the remapping
798 * hardware unit and wait for its completion.
799 */
800int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
801{
802    int rc;
803    struct q_inval *qi = iommu->qi;
804    struct qi_desc *hw, wait_desc;
805    int wait_index, index;
806    unsigned long flags;
807
808    if (!qi)
809        return 0;
810
811    hw = qi->desc;
812
813restart:
814    rc = 0;
815
816    raw_spin_lock_irqsave(&qi->q_lock, flags);
817    while (qi->free_cnt < 3) {
818        raw_spin_unlock_irqrestore(&qi->q_lock, flags);
819        cpu_relax();
820        raw_spin_lock_irqsave(&qi->q_lock, flags);
821    }
822
823    index = qi->free_head;
824    wait_index = (index + 1) % QI_LENGTH;
825
826    qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
827
828    hw[index] = *desc;
829
830    wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
831            QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
832    wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
833
834    hw[wait_index] = wait_desc;
835
836    __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
837    __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
838
839    qi->free_head = (qi->free_head + 2) % QI_LENGTH;
840    qi->free_cnt -= 2;
841
842    /*
843     * update the HW tail register indicating the presence of
844     * new descriptors.
845     */
846    writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
847
848    while (qi->desc_status[wait_index] != QI_DONE) {
849        /*
850         * We will leave the interrupts disabled, to prevent interrupt
851         * context to queue another cmd while a cmd is already submitted
852         * and waiting for completion on this cpu. This is to avoid
853         * a deadlock where the interrupt context can wait indefinitely
854         * for free slots in the queue.
855         */
856        rc = qi_check_fault(iommu, index);
857        if (rc)
858            break;
859
860        raw_spin_unlock(&qi->q_lock);
861        cpu_relax();
862        raw_spin_lock(&qi->q_lock);
863    }
864
865    qi->desc_status[index] = QI_DONE;
866
867    reclaim_free_desc(qi);
868    raw_spin_unlock_irqrestore(&qi->q_lock, flags);
869
870    if (rc == -EAGAIN)
871        goto restart;
872
873    return rc;
874}
875
876/*
877 * Flush the global interrupt entry cache.
878 */
879void qi_global_iec(struct intel_iommu *iommu)
880{
881    struct qi_desc desc;
882
883    desc.low = QI_IEC_TYPE;
884    desc.high = 0;
885
886    /* should never fail */
887    qi_submit_sync(&desc, iommu);
888}
889
890void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
891              u64 type)
892{
893    struct qi_desc desc;
894
895    desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
896            | QI_CC_GRAN(type) | QI_CC_TYPE;
897    desc.high = 0;
898
899    qi_submit_sync(&desc, iommu);
900}
901
902void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
903            unsigned int size_order, u64 type)
904{
905    u8 dw = 0, dr = 0;
906
907    struct qi_desc desc;
908    int ih = 0;
909
910    if (cap_write_drain(iommu->cap))
911        dw = 1;
912
913    if (cap_read_drain(iommu->cap))
914        dr = 1;
915
916    desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
917        | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
918    desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
919        | QI_IOTLB_AM(size_order);
920
921    qi_submit_sync(&desc, iommu);
922}
923
924void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
925            u64 addr, unsigned mask)
926{
927    struct qi_desc desc;
928
929    if (mask) {
930        BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
931        addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
932        desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
933    } else
934        desc.high = QI_DEV_IOTLB_ADDR(addr);
935
936    if (qdep >= QI_DEV_IOTLB_MAX_INVS)
937        qdep = 0;
938
939    desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
940           QI_DIOTLB_TYPE;
941
942    qi_submit_sync(&desc, iommu);
943}
944
945/*
946 * Disable Queued Invalidation interface.
947 */
948void dmar_disable_qi(struct intel_iommu *iommu)
949{
950    unsigned long flags;
951    u32 sts;
952    cycles_t start_time = get_cycles();
953
954    if (!ecap_qis(iommu->ecap))
955        return;
956
957    raw_spin_lock_irqsave(&iommu->register_lock, flags);
958
959    sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
960    if (!(sts & DMA_GSTS_QIES))
961        goto end;
962
963    /*
964     * Give a chance to HW to complete the pending invalidation requests.
965     */
966    while ((readl(iommu->reg + DMAR_IQT_REG) !=
967        readl(iommu->reg + DMAR_IQH_REG)) &&
968        (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
969        cpu_relax();
970
971    iommu->gcmd &= ~DMA_GCMD_QIE;
972    writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973
974    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
975              !(sts & DMA_GSTS_QIES), sts);
976end:
977    raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
978}
979
980/*
981 * Enable queued invalidation.
982 */
983static void __dmar_enable_qi(struct intel_iommu *iommu)
984{
985    u32 sts;
986    unsigned long flags;
987    struct q_inval *qi = iommu->qi;
988
989    qi->free_head = qi->free_tail = 0;
990    qi->free_cnt = QI_LENGTH;
991
992    raw_spin_lock_irqsave(&iommu->register_lock, flags);
993
994    /* write zero to the tail reg */
995    writel(0, iommu->reg + DMAR_IQT_REG);
996
997    dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
998
999    iommu->gcmd |= DMA_GCMD_QIE;
1000    writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1001
1002    /* Make sure hardware complete it */
1003    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1004
1005    raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1006}
1007
1008/*
1009 * Enable Queued Invalidation interface. This is a must to support
1010 * interrupt-remapping. Also used by DMA-remapping, which replaces
1011 * register based IOTLB invalidation.
1012 */
1013int dmar_enable_qi(struct intel_iommu *iommu)
1014{
1015    struct q_inval *qi;
1016    struct page *desc_page;
1017
1018    if (!ecap_qis(iommu->ecap))
1019        return -ENOENT;
1020
1021    /*
1022     * queued invalidation is already setup and enabled.
1023     */
1024    if (iommu->qi)
1025        return 0;
1026
1027    iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1028    if (!iommu->qi)
1029        return -ENOMEM;
1030
1031    qi = iommu->qi;
1032
1033
1034    desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1035    if (!desc_page) {
1036        kfree(qi);
1037        iommu->qi = 0;
1038        return -ENOMEM;
1039    }
1040
1041    qi->desc = page_address(desc_page);
1042
1043    qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1044    if (!qi->desc_status) {
1045        free_page((unsigned long) qi->desc);
1046        kfree(qi);
1047        iommu->qi = 0;
1048        return -ENOMEM;
1049    }
1050
1051    qi->free_head = qi->free_tail = 0;
1052    qi->free_cnt = QI_LENGTH;
1053
1054    raw_spin_lock_init(&qi->q_lock);
1055
1056    __dmar_enable_qi(iommu);
1057
1058    return 0;
1059}
1060
1061/* iommu interrupt handling. Most stuff are MSI-like. */
1062
1063enum faulttype {
1064    DMA_REMAP,
1065    INTR_REMAP,
1066    UNKNOWN,
1067};
1068
1069static const char *dma_remap_fault_reasons[] =
1070{
1071    "Software",
1072    "Present bit in root entry is clear",
1073    "Present bit in context entry is clear",
1074    "Invalid context entry",
1075    "Access beyond MGAW",
1076    "PTE Write access is not set",
1077    "PTE Read access is not set",
1078    "Next page table ptr is invalid",
1079    "Root table address invalid",
1080    "Context table ptr is invalid",
1081    "non-zero reserved fields in RTP",
1082    "non-zero reserved fields in CTP",
1083    "non-zero reserved fields in PTE",
1084};
1085
1086static const char *irq_remap_fault_reasons[] =
1087{
1088    "Detected reserved fields in the decoded interrupt-remapped request",
1089    "Interrupt index exceeded the interrupt-remapping table size",
1090    "Present field in the IRTE entry is clear",
1091    "Error accessing interrupt-remapping table pointed by IRTA_REG",
1092    "Detected reserved fields in the IRTE entry",
1093    "Blocked a compatibility format interrupt request",
1094    "Blocked an interrupt request due to source-id verification failure",
1095};
1096
1097#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1098
1099const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1100{
1101    if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1102                    ARRAY_SIZE(irq_remap_fault_reasons))) {
1103        *fault_type = INTR_REMAP;
1104        return irq_remap_fault_reasons[fault_reason - 0x20];
1105    } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1106        *fault_type = DMA_REMAP;
1107        return dma_remap_fault_reasons[fault_reason];
1108    } else {
1109        *fault_type = UNKNOWN;
1110        return "Unknown";
1111    }
1112}
1113
1114void dmar_msi_unmask(struct irq_data *data)
1115{
1116    struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1117    unsigned long flag;
1118
1119    /* unmask it */
1120    raw_spin_lock_irqsave(&iommu->register_lock, flag);
1121    writel(0, iommu->reg + DMAR_FECTL_REG);
1122    /* Read a reg to force flush the post write */
1123    readl(iommu->reg + DMAR_FECTL_REG);
1124    raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1125}
1126
1127void dmar_msi_mask(struct irq_data *data)
1128{
1129    unsigned long flag;
1130    struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1131
1132    /* mask it */
1133    raw_spin_lock_irqsave(&iommu->register_lock, flag);
1134    writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1135    /* Read a reg to force flush the post write */
1136    readl(iommu->reg + DMAR_FECTL_REG);
1137    raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1138}
1139
1140void dmar_msi_write(int irq, struct msi_msg *msg)
1141{
1142    struct intel_iommu *iommu = irq_get_handler_data(irq);
1143    unsigned long flag;
1144
1145    raw_spin_lock_irqsave(&iommu->register_lock, flag);
1146    writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1147    writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1148    writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1149    raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1150}
1151
1152void dmar_msi_read(int irq, struct msi_msg *msg)
1153{
1154    struct intel_iommu *iommu = irq_get_handler_data(irq);
1155    unsigned long flag;
1156
1157    raw_spin_lock_irqsave(&iommu->register_lock, flag);
1158    msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1159    msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1160    msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1161    raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1162}
1163
1164static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1165        u8 fault_reason, u16 source_id, unsigned long long addr)
1166{
1167    const char *reason;
1168    int fault_type;
1169
1170    reason = dmar_get_fault_reason(fault_reason, &fault_type);
1171
1172    if (fault_type == INTR_REMAP)
1173        pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1174               "fault index %llx\n"
1175            "INTR-REMAP:[fault reason %02d] %s\n",
1176            (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1177            PCI_FUNC(source_id & 0xFF), addr >> 48,
1178            fault_reason, reason);
1179    else
1180        pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1181               "fault addr %llx \n"
1182               "DMAR:[fault reason %02d] %s\n",
1183               (type ? "DMA Read" : "DMA Write"),
1184               (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1185               PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1186    return 0;
1187}
1188
1189#define PRIMARY_FAULT_REG_LEN (16)
1190irqreturn_t dmar_fault(int irq, void *dev_id)
1191{
1192    struct intel_iommu *iommu = dev_id;
1193    int reg, fault_index;
1194    u32 fault_status;
1195    unsigned long flag;
1196
1197    raw_spin_lock_irqsave(&iommu->register_lock, flag);
1198    fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1199    if (fault_status)
1200        pr_err("DRHD: handling fault status reg %x\n", fault_status);
1201
1202    /* TBD: ignore advanced fault log currently */
1203    if (!(fault_status & DMA_FSTS_PPF))
1204        goto clear_rest;
1205
1206    fault_index = dma_fsts_fault_record_index(fault_status);
1207    reg = cap_fault_reg_offset(iommu->cap);
1208    while (1) {
1209        u8 fault_reason;
1210        u16 source_id;
1211        u64 guest_addr;
1212        int type;
1213        u32 data;
1214
1215        /* highest 32 bits */
1216        data = readl(iommu->reg + reg +
1217                fault_index * PRIMARY_FAULT_REG_LEN + 12);
1218        if (!(data & DMA_FRCD_F))
1219            break;
1220
1221        fault_reason = dma_frcd_fault_reason(data);
1222        type = dma_frcd_type(data);
1223
1224        data = readl(iommu->reg + reg +
1225                fault_index * PRIMARY_FAULT_REG_LEN + 8);
1226        source_id = dma_frcd_source_id(data);
1227
1228        guest_addr = dmar_readq(iommu->reg + reg +
1229                fault_index * PRIMARY_FAULT_REG_LEN);
1230        guest_addr = dma_frcd_page_addr(guest_addr);
1231        /* clear the fault */
1232        writel(DMA_FRCD_F, iommu->reg + reg +
1233            fault_index * PRIMARY_FAULT_REG_LEN + 12);
1234
1235        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1236
1237        dmar_fault_do_one(iommu, type, fault_reason,
1238                source_id, guest_addr);
1239
1240        fault_index++;
1241        if (fault_index >= cap_num_fault_regs(iommu->cap))
1242            fault_index = 0;
1243        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1244    }
1245clear_rest:
1246    /* clear all the other faults */
1247    fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1248    writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1249
1250    raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1251    return IRQ_HANDLED;
1252}
1253
1254int dmar_set_interrupt(struct intel_iommu *iommu)
1255{
1256    int irq, ret;
1257
1258    /*
1259     * Check if the fault interrupt is already initialized.
1260     */
1261    if (iommu->irq)
1262        return 0;
1263
1264    irq = create_irq();
1265    if (!irq) {
1266        pr_err("IOMMU: no free vectors\n");
1267        return -EINVAL;
1268    }
1269
1270    irq_set_handler_data(irq, iommu);
1271    iommu->irq = irq;
1272
1273    ret = arch_setup_dmar_msi(irq);
1274    if (ret) {
1275        irq_set_handler_data(irq, NULL);
1276        iommu->irq = 0;
1277        destroy_irq(irq);
1278        return ret;
1279    }
1280
1281    ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1282    if (ret)
1283        pr_err("IOMMU: can't request irq\n");
1284    return ret;
1285}
1286
1287int __init enable_drhd_fault_handling(void)
1288{
1289    struct dmar_drhd_unit *drhd;
1290
1291    /*
1292     * Enable fault control interrupt.
1293     */
1294    for_each_drhd_unit(drhd) {
1295        int ret;
1296        struct intel_iommu *iommu = drhd->iommu;
1297        ret = dmar_set_interrupt(iommu);
1298
1299        if (ret) {
1300            pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1301                   (unsigned long long)drhd->reg_base_addr, ret);
1302            return -1;
1303        }
1304
1305        /*
1306         * Clear any previous faults.
1307         */
1308        dmar_fault(iommu->irq, iommu);
1309    }
1310
1311    return 0;
1312}
1313
1314/*
1315 * Re-enable Queued Invalidation interface.
1316 */
1317int dmar_reenable_qi(struct intel_iommu *iommu)
1318{
1319    if (!ecap_qis(iommu->ecap))
1320        return -ENOENT;
1321
1322    if (!iommu->qi)
1323        return -ENOENT;
1324
1325    /*
1326     * First disable queued invalidation.
1327     */
1328    dmar_disable_qi(iommu);
1329    /*
1330     * Then enable queued invalidation again. Since there is no pending
1331     * invalidation requests now, it's safe to re-enable queued
1332     * invalidation.
1333     */
1334    __dmar_enable_qi(iommu);
1335
1336    return 0;
1337}
1338
1339/*
1340 * Check interrupt remapping support in DMAR table description.
1341 */
1342int __init dmar_ir_support(void)
1343{
1344    struct acpi_table_dmar *dmar;
1345    dmar = (struct acpi_table_dmar *)dmar_tbl;
1346    if (!dmar)
1347        return 0;
1348    return dmar->flags & 0x1;
1349}
1350IOMMU_INIT_POST(detect_intel_iommu);
1351

Archive Download this file



interactive