Root/
1 | /* |
2 | * VFIO: IOMMU DMA mapping support for Type1 IOMMU |
3 | * |
4 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
5 | * Author: Alex Williamson <alex.williamson@redhat.com> |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. |
10 | * |
11 | * Derived from original vfio: |
12 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. |
13 | * Author: Tom Lyon, pugs@cisco.com |
14 | * |
15 | * We arbitrarily define a Type1 IOMMU as one matching the below code. |
16 | * It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel |
17 | * VT-d, but that makes it harder to re-use as theoretically anyone |
18 | * implementing a similar IOMMU could make use of this. We expect the |
19 | * IOMMU to support the IOMMU API and have few to no restrictions around |
20 | * the IOVA range that can be mapped. The Type1 IOMMU is currently |
21 | * optimized for relatively static mappings of a userspace process with |
22 | * userpsace pages pinned into memory. We also assume devices and IOMMU |
23 | * domains are PCI based as the IOMMU API is still centered around a |
24 | * device/bus interface rather than a group interface. |
25 | */ |
26 | |
27 | #include <linux/compat.h> |
28 | #include <linux/device.h> |
29 | #include <linux/fs.h> |
30 | #include <linux/iommu.h> |
31 | #include <linux/module.h> |
32 | #include <linux/mm.h> |
33 | #include <linux/pci.h> /* pci_bus_type */ |
34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> |
36 | #include <linux/uaccess.h> |
37 | #include <linux/vfio.h> |
38 | #include <linux/workqueue.h> |
39 | |
40 | #define DRIVER_VERSION "0.2" |
41 | #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" |
42 | #define DRIVER_DESC "Type1 IOMMU driver for VFIO" |
43 | |
44 | static bool allow_unsafe_interrupts; |
45 | module_param_named(allow_unsafe_interrupts, |
46 | allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); |
47 | MODULE_PARM_DESC(allow_unsafe_interrupts, |
48 | "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); |
49 | |
50 | struct vfio_iommu { |
51 | struct iommu_domain *domain; |
52 | struct mutex lock; |
53 | struct list_head dma_list; |
54 | struct list_head group_list; |
55 | bool cache; |
56 | }; |
57 | |
58 | struct vfio_dma { |
59 | struct list_head next; |
60 | dma_addr_t iova; /* Device address */ |
61 | unsigned long vaddr; /* Process virtual addr */ |
62 | long npage; /* Number of pages */ |
63 | int prot; /* IOMMU_READ/WRITE */ |
64 | }; |
65 | |
66 | struct vfio_group { |
67 | struct iommu_group *iommu_group; |
68 | struct list_head next; |
69 | }; |
70 | |
71 | /* |
72 | * This code handles mapping and unmapping of user data buffers |
73 | * into DMA'ble space using the IOMMU |
74 | */ |
75 | |
76 | #define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT) |
77 | |
78 | struct vwork { |
79 | struct mm_struct *mm; |
80 | long npage; |
81 | struct work_struct work; |
82 | }; |
83 | |
84 | /* delayed decrement/increment for locked_vm */ |
85 | static void vfio_lock_acct_bg(struct work_struct *work) |
86 | { |
87 | struct vwork *vwork = container_of(work, struct vwork, work); |
88 | struct mm_struct *mm; |
89 | |
90 | mm = vwork->mm; |
91 | down_write(&mm->mmap_sem); |
92 | mm->locked_vm += vwork->npage; |
93 | up_write(&mm->mmap_sem); |
94 | mmput(mm); |
95 | kfree(vwork); |
96 | } |
97 | |
98 | static void vfio_lock_acct(long npage) |
99 | { |
100 | struct vwork *vwork; |
101 | struct mm_struct *mm; |
102 | |
103 | if (!current->mm) |
104 | return; /* process exited */ |
105 | |
106 | if (down_write_trylock(¤t->mm->mmap_sem)) { |
107 | current->mm->locked_vm += npage; |
108 | up_write(¤t->mm->mmap_sem); |
109 | return; |
110 | } |
111 | |
112 | /* |
113 | * Couldn't get mmap_sem lock, so must setup to update |
114 | * mm->locked_vm later. If locked_vm were atomic, we |
115 | * wouldn't need this silliness |
116 | */ |
117 | vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); |
118 | if (!vwork) |
119 | return; |
120 | mm = get_task_mm(current); |
121 | if (!mm) { |
122 | kfree(vwork); |
123 | return; |
124 | } |
125 | INIT_WORK(&vwork->work, vfio_lock_acct_bg); |
126 | vwork->mm = mm; |
127 | vwork->npage = npage; |
128 | schedule_work(&vwork->work); |
129 | } |
130 | |
131 | /* |
132 | * Some mappings aren't backed by a struct page, for example an mmap'd |
133 | * MMIO range for our own or another device. These use a different |
134 | * pfn conversion and shouldn't be tracked as locked pages. |
135 | */ |
136 | static bool is_invalid_reserved_pfn(unsigned long pfn) |
137 | { |
138 | if (pfn_valid(pfn)) { |
139 | bool reserved; |
140 | struct page *tail = pfn_to_page(pfn); |
141 | struct page *head = compound_trans_head(tail); |
142 | reserved = !!(PageReserved(head)); |
143 | if (head != tail) { |
144 | /* |
145 | * "head" is not a dangling pointer |
146 | * (compound_trans_head takes care of that) |
147 | * but the hugepage may have been split |
148 | * from under us (and we may not hold a |
149 | * reference count on the head page so it can |
150 | * be reused before we run PageReferenced), so |
151 | * we've to check PageTail before returning |
152 | * what we just read. |
153 | */ |
154 | smp_rmb(); |
155 | if (PageTail(tail)) |
156 | return reserved; |
157 | } |
158 | return PageReserved(tail); |
159 | } |
160 | |
161 | return true; |
162 | } |
163 | |
164 | static int put_pfn(unsigned long pfn, int prot) |
165 | { |
166 | if (!is_invalid_reserved_pfn(pfn)) { |
167 | struct page *page = pfn_to_page(pfn); |
168 | if (prot & IOMMU_WRITE) |
169 | SetPageDirty(page); |
170 | put_page(page); |
171 | return 1; |
172 | } |
173 | return 0; |
174 | } |
175 | |
176 | /* Unmap DMA region */ |
177 | static long __vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova, |
178 | long npage, int prot) |
179 | { |
180 | long i, unlocked = 0; |
181 | |
182 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) { |
183 | unsigned long pfn; |
184 | |
185 | pfn = iommu_iova_to_phys(iommu->domain, iova) >> PAGE_SHIFT; |
186 | if (pfn) { |
187 | iommu_unmap(iommu->domain, iova, PAGE_SIZE); |
188 | unlocked += put_pfn(pfn, prot); |
189 | } |
190 | } |
191 | return unlocked; |
192 | } |
193 | |
194 | static void vfio_dma_unmap(struct vfio_iommu *iommu, dma_addr_t iova, |
195 | long npage, int prot) |
196 | { |
197 | long unlocked; |
198 | |
199 | unlocked = __vfio_dma_do_unmap(iommu, iova, npage, prot); |
200 | vfio_lock_acct(-unlocked); |
201 | } |
202 | |
203 | static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) |
204 | { |
205 | struct page *page[1]; |
206 | struct vm_area_struct *vma; |
207 | int ret = -EFAULT; |
208 | |
209 | if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { |
210 | *pfn = page_to_pfn(page[0]); |
211 | return 0; |
212 | } |
213 | |
214 | down_read(¤t->mm->mmap_sem); |
215 | |
216 | vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); |
217 | |
218 | if (vma && vma->vm_flags & VM_PFNMAP) { |
219 | *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
220 | if (is_invalid_reserved_pfn(*pfn)) |
221 | ret = 0; |
222 | } |
223 | |
224 | up_read(¤t->mm->mmap_sem); |
225 | |
226 | return ret; |
227 | } |
228 | |
229 | /* Map DMA region */ |
230 | static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova, |
231 | unsigned long vaddr, long npage, int prot) |
232 | { |
233 | dma_addr_t start = iova; |
234 | long i, locked = 0; |
235 | int ret; |
236 | |
237 | /* Verify that pages are not already mapped */ |
238 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) |
239 | if (iommu_iova_to_phys(iommu->domain, iova)) |
240 | return -EBUSY; |
241 | |
242 | iova = start; |
243 | |
244 | if (iommu->cache) |
245 | prot |= IOMMU_CACHE; |
246 | |
247 | /* |
248 | * XXX We break mappings into pages and use get_user_pages_fast to |
249 | * pin the pages in memory. It's been suggested that mlock might |
250 | * provide a more efficient mechanism, but nothing prevents the |
251 | * user from munlocking the pages, which could then allow the user |
252 | * access to random host memory. We also have no guarantee from the |
253 | * IOMMU API that the iommu driver can unmap sub-pages of previous |
254 | * mappings. This means we might lose an entire range if a single |
255 | * page within it is unmapped. Single page mappings are inefficient, |
256 | * but provide the most flexibility for now. |
257 | */ |
258 | for (i = 0; i < npage; i++, iova += PAGE_SIZE, vaddr += PAGE_SIZE) { |
259 | unsigned long pfn = 0; |
260 | |
261 | ret = vaddr_get_pfn(vaddr, prot, &pfn); |
262 | if (ret) { |
263 | __vfio_dma_do_unmap(iommu, start, i, prot); |
264 | return ret; |
265 | } |
266 | |
267 | /* |
268 | * Only add actual locked pages to accounting |
269 | * XXX We're effectively marking a page locked for every |
270 | * IOVA page even though it's possible the user could be |
271 | * backing multiple IOVAs with the same vaddr. This over- |
272 | * penalizes the user process, but we currently have no |
273 | * easy way to do this properly. |
274 | */ |
275 | if (!is_invalid_reserved_pfn(pfn)) |
276 | locked++; |
277 | |
278 | ret = iommu_map(iommu->domain, iova, |
279 | (phys_addr_t)pfn << PAGE_SHIFT, |
280 | PAGE_SIZE, prot); |
281 | if (ret) { |
282 | /* Back out mappings on error */ |
283 | put_pfn(pfn, prot); |
284 | __vfio_dma_do_unmap(iommu, start, i, prot); |
285 | return ret; |
286 | } |
287 | } |
288 | vfio_lock_acct(locked); |
289 | return 0; |
290 | } |
291 | |
292 | static inline bool ranges_overlap(dma_addr_t start1, size_t size1, |
293 | dma_addr_t start2, size_t size2) |
294 | { |
295 | if (start1 < start2) |
296 | return (start2 - start1 < size1); |
297 | else if (start2 < start1) |
298 | return (start1 - start2 < size2); |
299 | return (size1 > 0 && size2 > 0); |
300 | } |
301 | |
302 | static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, |
303 | dma_addr_t start, size_t size) |
304 | { |
305 | struct vfio_dma *dma; |
306 | |
307 | list_for_each_entry(dma, &iommu->dma_list, next) { |
308 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), |
309 | start, size)) |
310 | return dma; |
311 | } |
312 | return NULL; |
313 | } |
314 | |
315 | static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, |
316 | size_t size, struct vfio_dma *dma) |
317 | { |
318 | struct vfio_dma *split; |
319 | long npage_lo, npage_hi; |
320 | |
321 | /* Existing dma region is completely covered, unmap all */ |
322 | if (start <= dma->iova && |
323 | start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { |
324 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); |
325 | list_del(&dma->next); |
326 | npage_lo = dma->npage; |
327 | kfree(dma); |
328 | return npage_lo; |
329 | } |
330 | |
331 | /* Overlap low address of existing range */ |
332 | if (start <= dma->iova) { |
333 | size_t overlap; |
334 | |
335 | overlap = start + size - dma->iova; |
336 | npage_lo = overlap >> PAGE_SHIFT; |
337 | |
338 | vfio_dma_unmap(iommu, dma->iova, npage_lo, dma->prot); |
339 | dma->iova += overlap; |
340 | dma->vaddr += overlap; |
341 | dma->npage -= npage_lo; |
342 | return npage_lo; |
343 | } |
344 | |
345 | /* Overlap high address of existing range */ |
346 | if (start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { |
347 | size_t overlap; |
348 | |
349 | overlap = dma->iova + NPAGE_TO_SIZE(dma->npage) - start; |
350 | npage_hi = overlap >> PAGE_SHIFT; |
351 | |
352 | vfio_dma_unmap(iommu, start, npage_hi, dma->prot); |
353 | dma->npage -= npage_hi; |
354 | return npage_hi; |
355 | } |
356 | |
357 | /* Split existing */ |
358 | npage_lo = (start - dma->iova) >> PAGE_SHIFT; |
359 | npage_hi = dma->npage - (size >> PAGE_SHIFT) - npage_lo; |
360 | |
361 | split = kzalloc(sizeof *split, GFP_KERNEL); |
362 | if (!split) |
363 | return -ENOMEM; |
364 | |
365 | vfio_dma_unmap(iommu, start, size >> PAGE_SHIFT, dma->prot); |
366 | |
367 | dma->npage = npage_lo; |
368 | |
369 | split->npage = npage_hi; |
370 | split->iova = start + size; |
371 | split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size; |
372 | split->prot = dma->prot; |
373 | list_add(&split->next, &iommu->dma_list); |
374 | return size >> PAGE_SHIFT; |
375 | } |
376 | |
377 | static int vfio_dma_do_unmap(struct vfio_iommu *iommu, |
378 | struct vfio_iommu_type1_dma_unmap *unmap) |
379 | { |
380 | long ret = 0, npage = unmap->size >> PAGE_SHIFT; |
381 | struct vfio_dma *dma, *tmp; |
382 | uint64_t mask; |
383 | |
384 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; |
385 | |
386 | if (unmap->iova & mask) |
387 | return -EINVAL; |
388 | if (unmap->size & mask) |
389 | return -EINVAL; |
390 | |
391 | /* XXX We still break these down into PAGE_SIZE */ |
392 | WARN_ON(mask & PAGE_MASK); |
393 | |
394 | mutex_lock(&iommu->lock); |
395 | |
396 | list_for_each_entry_safe(dma, tmp, &iommu->dma_list, next) { |
397 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), |
398 | unmap->iova, unmap->size)) { |
399 | ret = vfio_remove_dma_overlap(iommu, unmap->iova, |
400 | unmap->size, dma); |
401 | if (ret > 0) |
402 | npage -= ret; |
403 | if (ret < 0 || npage == 0) |
404 | break; |
405 | } |
406 | } |
407 | mutex_unlock(&iommu->lock); |
408 | return ret > 0 ? 0 : (int)ret; |
409 | } |
410 | |
411 | static int vfio_dma_do_map(struct vfio_iommu *iommu, |
412 | struct vfio_iommu_type1_dma_map *map) |
413 | { |
414 | struct vfio_dma *dma, *pdma = NULL; |
415 | dma_addr_t iova = map->iova; |
416 | unsigned long locked, lock_limit, vaddr = map->vaddr; |
417 | size_t size = map->size; |
418 | int ret = 0, prot = 0; |
419 | uint64_t mask; |
420 | long npage; |
421 | |
422 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; |
423 | |
424 | /* READ/WRITE from device perspective */ |
425 | if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) |
426 | prot |= IOMMU_WRITE; |
427 | if (map->flags & VFIO_DMA_MAP_FLAG_READ) |
428 | prot |= IOMMU_READ; |
429 | |
430 | if (!prot) |
431 | return -EINVAL; /* No READ/WRITE? */ |
432 | |
433 | if (vaddr & mask) |
434 | return -EINVAL; |
435 | if (iova & mask) |
436 | return -EINVAL; |
437 | if (size & mask) |
438 | return -EINVAL; |
439 | |
440 | /* XXX We still break these down into PAGE_SIZE */ |
441 | WARN_ON(mask & PAGE_MASK); |
442 | |
443 | /* Don't allow IOVA wrap */ |
444 | if (iova + size && iova + size < iova) |
445 | return -EINVAL; |
446 | |
447 | /* Don't allow virtual address wrap */ |
448 | if (vaddr + size && vaddr + size < vaddr) |
449 | return -EINVAL; |
450 | |
451 | npage = size >> PAGE_SHIFT; |
452 | if (!npage) |
453 | return -EINVAL; |
454 | |
455 | mutex_lock(&iommu->lock); |
456 | |
457 | if (vfio_find_dma(iommu, iova, size)) { |
458 | ret = -EBUSY; |
459 | goto out_lock; |
460 | } |
461 | |
462 | /* account for locked pages */ |
463 | locked = current->mm->locked_vm + npage; |
464 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
465 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { |
466 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", |
467 | __func__, rlimit(RLIMIT_MEMLOCK)); |
468 | ret = -ENOMEM; |
469 | goto out_lock; |
470 | } |
471 | |
472 | ret = __vfio_dma_map(iommu, iova, vaddr, npage, prot); |
473 | if (ret) |
474 | goto out_lock; |
475 | |
476 | /* Check if we abut a region below - nothing below 0 */ |
477 | if (iova) { |
478 | dma = vfio_find_dma(iommu, iova - 1, 1); |
479 | if (dma && dma->prot == prot && |
480 | dma->vaddr + NPAGE_TO_SIZE(dma->npage) == vaddr) { |
481 | |
482 | dma->npage += npage; |
483 | iova = dma->iova; |
484 | vaddr = dma->vaddr; |
485 | npage = dma->npage; |
486 | size = NPAGE_TO_SIZE(npage); |
487 | |
488 | pdma = dma; |
489 | } |
490 | } |
491 | |
492 | /* Check if we abut a region above - nothing above ~0 + 1 */ |
493 | if (iova + size) { |
494 | dma = vfio_find_dma(iommu, iova + size, 1); |
495 | if (dma && dma->prot == prot && |
496 | dma->vaddr == vaddr + size) { |
497 | |
498 | dma->npage += npage; |
499 | dma->iova = iova; |
500 | dma->vaddr = vaddr; |
501 | |
502 | /* |
503 | * If merged above and below, remove previously |
504 | * merged entry. New entry covers it. |
505 | */ |
506 | if (pdma) { |
507 | list_del(&pdma->next); |
508 | kfree(pdma); |
509 | } |
510 | pdma = dma; |
511 | } |
512 | } |
513 | |
514 | /* Isolated, new region */ |
515 | if (!pdma) { |
516 | dma = kzalloc(sizeof *dma, GFP_KERNEL); |
517 | if (!dma) { |
518 | ret = -ENOMEM; |
519 | vfio_dma_unmap(iommu, iova, npage, prot); |
520 | goto out_lock; |
521 | } |
522 | |
523 | dma->npage = npage; |
524 | dma->iova = iova; |
525 | dma->vaddr = vaddr; |
526 | dma->prot = prot; |
527 | list_add(&dma->next, &iommu->dma_list); |
528 | } |
529 | |
530 | out_lock: |
531 | mutex_unlock(&iommu->lock); |
532 | return ret; |
533 | } |
534 | |
535 | static int vfio_iommu_type1_attach_group(void *iommu_data, |
536 | struct iommu_group *iommu_group) |
537 | { |
538 | struct vfio_iommu *iommu = iommu_data; |
539 | struct vfio_group *group, *tmp; |
540 | int ret; |
541 | |
542 | group = kzalloc(sizeof(*group), GFP_KERNEL); |
543 | if (!group) |
544 | return -ENOMEM; |
545 | |
546 | mutex_lock(&iommu->lock); |
547 | |
548 | list_for_each_entry(tmp, &iommu->group_list, next) { |
549 | if (tmp->iommu_group == iommu_group) { |
550 | mutex_unlock(&iommu->lock); |
551 | kfree(group); |
552 | return -EINVAL; |
553 | } |
554 | } |
555 | |
556 | /* |
557 | * TODO: Domain have capabilities that might change as we add |
558 | * groups (see iommu->cache, currently never set). Check for |
559 | * them and potentially disallow groups to be attached when it |
560 | * would change capabilities (ugh). |
561 | */ |
562 | ret = iommu_attach_group(iommu->domain, iommu_group); |
563 | if (ret) { |
564 | mutex_unlock(&iommu->lock); |
565 | kfree(group); |
566 | return ret; |
567 | } |
568 | |
569 | group->iommu_group = iommu_group; |
570 | list_add(&group->next, &iommu->group_list); |
571 | |
572 | mutex_unlock(&iommu->lock); |
573 | |
574 | return 0; |
575 | } |
576 | |
577 | static void vfio_iommu_type1_detach_group(void *iommu_data, |
578 | struct iommu_group *iommu_group) |
579 | { |
580 | struct vfio_iommu *iommu = iommu_data; |
581 | struct vfio_group *group; |
582 | |
583 | mutex_lock(&iommu->lock); |
584 | |
585 | list_for_each_entry(group, &iommu->group_list, next) { |
586 | if (group->iommu_group == iommu_group) { |
587 | iommu_detach_group(iommu->domain, iommu_group); |
588 | list_del(&group->next); |
589 | kfree(group); |
590 | break; |
591 | } |
592 | } |
593 | |
594 | mutex_unlock(&iommu->lock); |
595 | } |
596 | |
597 | static void *vfio_iommu_type1_open(unsigned long arg) |
598 | { |
599 | struct vfio_iommu *iommu; |
600 | |
601 | if (arg != VFIO_TYPE1_IOMMU) |
602 | return ERR_PTR(-EINVAL); |
603 | |
604 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); |
605 | if (!iommu) |
606 | return ERR_PTR(-ENOMEM); |
607 | |
608 | INIT_LIST_HEAD(&iommu->group_list); |
609 | INIT_LIST_HEAD(&iommu->dma_list); |
610 | mutex_init(&iommu->lock); |
611 | |
612 | /* |
613 | * Wish we didn't have to know about bus_type here. |
614 | */ |
615 | iommu->domain = iommu_domain_alloc(&pci_bus_type); |
616 | if (!iommu->domain) { |
617 | kfree(iommu); |
618 | return ERR_PTR(-EIO); |
619 | } |
620 | |
621 | /* |
622 | * Wish we could specify required capabilities rather than create |
623 | * a domain, see what comes out and hope it doesn't change along |
624 | * the way. Fortunately we know interrupt remapping is global for |
625 | * our iommus. |
626 | */ |
627 | if (!allow_unsafe_interrupts && |
628 | !iommu_domain_has_cap(iommu->domain, IOMMU_CAP_INTR_REMAP)) { |
629 | pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", |
630 | __func__); |
631 | iommu_domain_free(iommu->domain); |
632 | kfree(iommu); |
633 | return ERR_PTR(-EPERM); |
634 | } |
635 | |
636 | return iommu; |
637 | } |
638 | |
639 | static void vfio_iommu_type1_release(void *iommu_data) |
640 | { |
641 | struct vfio_iommu *iommu = iommu_data; |
642 | struct vfio_group *group, *group_tmp; |
643 | struct vfio_dma *dma, *dma_tmp; |
644 | |
645 | list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) { |
646 | iommu_detach_group(iommu->domain, group->iommu_group); |
647 | list_del(&group->next); |
648 | kfree(group); |
649 | } |
650 | |
651 | list_for_each_entry_safe(dma, dma_tmp, &iommu->dma_list, next) { |
652 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); |
653 | list_del(&dma->next); |
654 | kfree(dma); |
655 | } |
656 | |
657 | iommu_domain_free(iommu->domain); |
658 | iommu->domain = NULL; |
659 | kfree(iommu); |
660 | } |
661 | |
662 | static long vfio_iommu_type1_ioctl(void *iommu_data, |
663 | unsigned int cmd, unsigned long arg) |
664 | { |
665 | struct vfio_iommu *iommu = iommu_data; |
666 | unsigned long minsz; |
667 | |
668 | if (cmd == VFIO_CHECK_EXTENSION) { |
669 | switch (arg) { |
670 | case VFIO_TYPE1_IOMMU: |
671 | return 1; |
672 | default: |
673 | return 0; |
674 | } |
675 | } else if (cmd == VFIO_IOMMU_GET_INFO) { |
676 | struct vfio_iommu_type1_info info; |
677 | |
678 | minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); |
679 | |
680 | if (copy_from_user(&info, (void __user *)arg, minsz)) |
681 | return -EFAULT; |
682 | |
683 | if (info.argsz < minsz) |
684 | return -EINVAL; |
685 | |
686 | info.flags = 0; |
687 | |
688 | info.iova_pgsizes = iommu->domain->ops->pgsize_bitmap; |
689 | |
690 | return copy_to_user((void __user *)arg, &info, minsz); |
691 | |
692 | } else if (cmd == VFIO_IOMMU_MAP_DMA) { |
693 | struct vfio_iommu_type1_dma_map map; |
694 | uint32_t mask = VFIO_DMA_MAP_FLAG_READ | |
695 | VFIO_DMA_MAP_FLAG_WRITE; |
696 | |
697 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); |
698 | |
699 | if (copy_from_user(&map, (void __user *)arg, minsz)) |
700 | return -EFAULT; |
701 | |
702 | if (map.argsz < minsz || map.flags & ~mask) |
703 | return -EINVAL; |
704 | |
705 | return vfio_dma_do_map(iommu, &map); |
706 | |
707 | } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { |
708 | struct vfio_iommu_type1_dma_unmap unmap; |
709 | |
710 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); |
711 | |
712 | if (copy_from_user(&unmap, (void __user *)arg, minsz)) |
713 | return -EFAULT; |
714 | |
715 | if (unmap.argsz < minsz || unmap.flags) |
716 | return -EINVAL; |
717 | |
718 | return vfio_dma_do_unmap(iommu, &unmap); |
719 | } |
720 | |
721 | return -ENOTTY; |
722 | } |
723 | |
724 | static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { |
725 | .name = "vfio-iommu-type1", |
726 | .owner = THIS_MODULE, |
727 | .open = vfio_iommu_type1_open, |
728 | .release = vfio_iommu_type1_release, |
729 | .ioctl = vfio_iommu_type1_ioctl, |
730 | .attach_group = vfio_iommu_type1_attach_group, |
731 | .detach_group = vfio_iommu_type1_detach_group, |
732 | }; |
733 | |
734 | static int __init vfio_iommu_type1_init(void) |
735 | { |
736 | if (!iommu_present(&pci_bus_type)) |
737 | return -ENODEV; |
738 | |
739 | return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1); |
740 | } |
741 | |
742 | static void __exit vfio_iommu_type1_cleanup(void) |
743 | { |
744 | vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1); |
745 | } |
746 | |
747 | module_init(vfio_iommu_type1_init); |
748 | module_exit(vfio_iommu_type1_cleanup); |
749 | |
750 | MODULE_VERSION(DRIVER_VERSION); |
751 | MODULE_LICENSE("GPL v2"); |
752 | MODULE_AUTHOR(DRIVER_AUTHOR); |
753 | MODULE_DESCRIPTION(DRIVER_DESC); |
754 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9