Root/
1 | /* |
2 | * Kernel-based Virtual Machine - device assignment support |
3 | * |
4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. |
5 | * |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See |
7 | * the COPYING file in the top-level directory. |
8 | * |
9 | */ |
10 | |
11 | #include <linux/kvm_host.h> |
12 | #include <linux/kvm.h> |
13 | #include <linux/uaccess.h> |
14 | #include <linux/vmalloc.h> |
15 | #include <linux/errno.h> |
16 | #include <linux/spinlock.h> |
17 | #include <linux/pci.h> |
18 | #include <linux/interrupt.h> |
19 | #include <linux/slab.h> |
20 | #include <linux/namei.h> |
21 | #include <linux/fs.h> |
22 | #include "irq.h" |
23 | |
24 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, |
25 | int assigned_dev_id) |
26 | { |
27 | struct list_head *ptr; |
28 | struct kvm_assigned_dev_kernel *match; |
29 | |
30 | list_for_each(ptr, head) { |
31 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); |
32 | if (match->assigned_dev_id == assigned_dev_id) |
33 | return match; |
34 | } |
35 | return NULL; |
36 | } |
37 | |
38 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel |
39 | *assigned_dev, int irq) |
40 | { |
41 | int i, index; |
42 | struct msix_entry *host_msix_entries; |
43 | |
44 | host_msix_entries = assigned_dev->host_msix_entries; |
45 | |
46 | index = -1; |
47 | for (i = 0; i < assigned_dev->entries_nr; i++) |
48 | if (irq == host_msix_entries[i].vector) { |
49 | index = i; |
50 | break; |
51 | } |
52 | if (index < 0) |
53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); |
54 | |
55 | return index; |
56 | } |
57 | |
58 | static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) |
59 | { |
60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
61 | int ret; |
62 | |
63 | spin_lock(&assigned_dev->intx_lock); |
64 | if (pci_check_and_mask_intx(assigned_dev->dev)) { |
65 | assigned_dev->host_irq_disabled = true; |
66 | ret = IRQ_WAKE_THREAD; |
67 | } else |
68 | ret = IRQ_NONE; |
69 | spin_unlock(&assigned_dev->intx_lock); |
70 | |
71 | return ret; |
72 | } |
73 | |
74 | static void |
75 | kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, |
76 | int vector) |
77 | { |
78 | if (unlikely(assigned_dev->irq_requested_type & |
79 | KVM_DEV_IRQ_GUEST_INTX)) { |
80 | spin_lock(&assigned_dev->intx_mask_lock); |
81 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) |
82 | kvm_set_irq(assigned_dev->kvm, |
83 | assigned_dev->irq_source_id, vector, 1); |
84 | spin_unlock(&assigned_dev->intx_mask_lock); |
85 | } else |
86 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
87 | vector, 1); |
88 | } |
89 | |
90 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) |
91 | { |
92 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
93 | |
94 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { |
95 | spin_lock_irq(&assigned_dev->intx_lock); |
96 | disable_irq_nosync(irq); |
97 | assigned_dev->host_irq_disabled = true; |
98 | spin_unlock_irq(&assigned_dev->intx_lock); |
99 | } |
100 | |
101 | kvm_assigned_dev_raise_guest_irq(assigned_dev, |
102 | assigned_dev->guest_irq); |
103 | |
104 | return IRQ_HANDLED; |
105 | } |
106 | |
107 | #ifdef __KVM_HAVE_MSI |
108 | static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) |
109 | { |
110 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
111 | |
112 | kvm_assigned_dev_raise_guest_irq(assigned_dev, |
113 | assigned_dev->guest_irq); |
114 | |
115 | return IRQ_HANDLED; |
116 | } |
117 | #endif |
118 | |
119 | #ifdef __KVM_HAVE_MSIX |
120 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) |
121 | { |
122 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
123 | int index = find_index_from_host_irq(assigned_dev, irq); |
124 | u32 vector; |
125 | |
126 | if (index >= 0) { |
127 | vector = assigned_dev->guest_msix_entries[index].vector; |
128 | kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); |
129 | } |
130 | |
131 | return IRQ_HANDLED; |
132 | } |
133 | #endif |
134 | |
135 | /* Ack the irq line for an assigned device */ |
136 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
137 | { |
138 | struct kvm_assigned_dev_kernel *dev = |
139 | container_of(kian, struct kvm_assigned_dev_kernel, |
140 | ack_notifier); |
141 | |
142 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
143 | |
144 | spin_lock(&dev->intx_mask_lock); |
145 | |
146 | if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { |
147 | bool reassert = false; |
148 | |
149 | spin_lock_irq(&dev->intx_lock); |
150 | /* |
151 | * The guest IRQ may be shared so this ack can come from an |
152 | * IRQ for another guest device. |
153 | */ |
154 | if (dev->host_irq_disabled) { |
155 | if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) |
156 | enable_irq(dev->host_irq); |
157 | else if (!pci_check_and_unmask_intx(dev->dev)) |
158 | reassert = true; |
159 | dev->host_irq_disabled = reassert; |
160 | } |
161 | spin_unlock_irq(&dev->intx_lock); |
162 | |
163 | if (reassert) |
164 | kvm_set_irq(dev->kvm, dev->irq_source_id, |
165 | dev->guest_irq, 1); |
166 | } |
167 | |
168 | spin_unlock(&dev->intx_mask_lock); |
169 | } |
170 | |
171 | static void deassign_guest_irq(struct kvm *kvm, |
172 | struct kvm_assigned_dev_kernel *assigned_dev) |
173 | { |
174 | if (assigned_dev->ack_notifier.gsi != -1) |
175 | kvm_unregister_irq_ack_notifier(kvm, |
176 | &assigned_dev->ack_notifier); |
177 | |
178 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
179 | assigned_dev->guest_irq, 0); |
180 | |
181 | if (assigned_dev->irq_source_id != -1) |
182 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
183 | assigned_dev->irq_source_id = -1; |
184 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); |
185 | } |
186 | |
187 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ |
188 | static void deassign_host_irq(struct kvm *kvm, |
189 | struct kvm_assigned_dev_kernel *assigned_dev) |
190 | { |
191 | /* |
192 | * We disable irq here to prevent further events. |
193 | * |
194 | * Notice this maybe result in nested disable if the interrupt type is |
195 | * INTx, but it's OK for we are going to free it. |
196 | * |
197 | * If this function is a part of VM destroy, please ensure that till |
198 | * now, the kvm state is still legal for probably we also have to wait |
199 | * on a currently running IRQ handler. |
200 | */ |
201 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
202 | int i; |
203 | for (i = 0; i < assigned_dev->entries_nr; i++) |
204 | disable_irq(assigned_dev->host_msix_entries[i].vector); |
205 | |
206 | for (i = 0; i < assigned_dev->entries_nr; i++) |
207 | free_irq(assigned_dev->host_msix_entries[i].vector, |
208 | assigned_dev); |
209 | |
210 | assigned_dev->entries_nr = 0; |
211 | kfree(assigned_dev->host_msix_entries); |
212 | kfree(assigned_dev->guest_msix_entries); |
213 | pci_disable_msix(assigned_dev->dev); |
214 | } else { |
215 | /* Deal with MSI and INTx */ |
216 | if ((assigned_dev->irq_requested_type & |
217 | KVM_DEV_IRQ_HOST_INTX) && |
218 | (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { |
219 | spin_lock_irq(&assigned_dev->intx_lock); |
220 | pci_intx(assigned_dev->dev, false); |
221 | spin_unlock_irq(&assigned_dev->intx_lock); |
222 | synchronize_irq(assigned_dev->host_irq); |
223 | } else |
224 | disable_irq(assigned_dev->host_irq); |
225 | |
226 | free_irq(assigned_dev->host_irq, assigned_dev); |
227 | |
228 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
229 | pci_disable_msi(assigned_dev->dev); |
230 | } |
231 | |
232 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); |
233 | } |
234 | |
235 | static int kvm_deassign_irq(struct kvm *kvm, |
236 | struct kvm_assigned_dev_kernel *assigned_dev, |
237 | unsigned long irq_requested_type) |
238 | { |
239 | unsigned long guest_irq_type, host_irq_type; |
240 | |
241 | if (!irqchip_in_kernel(kvm)) |
242 | return -EINVAL; |
243 | /* no irq assignment to deassign */ |
244 | if (!assigned_dev->irq_requested_type) |
245 | return -ENXIO; |
246 | |
247 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; |
248 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; |
249 | |
250 | if (host_irq_type) |
251 | deassign_host_irq(kvm, assigned_dev); |
252 | if (guest_irq_type) |
253 | deassign_guest_irq(kvm, assigned_dev); |
254 | |
255 | return 0; |
256 | } |
257 | |
258 | static void kvm_free_assigned_irq(struct kvm *kvm, |
259 | struct kvm_assigned_dev_kernel *assigned_dev) |
260 | { |
261 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); |
262 | } |
263 | |
264 | static void kvm_free_assigned_device(struct kvm *kvm, |
265 | struct kvm_assigned_dev_kernel |
266 | *assigned_dev) |
267 | { |
268 | kvm_free_assigned_irq(kvm, assigned_dev); |
269 | |
270 | pci_reset_function(assigned_dev->dev); |
271 | if (pci_load_and_free_saved_state(assigned_dev->dev, |
272 | &assigned_dev->pci_saved_state)) |
273 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", |
274 | __func__, dev_name(&assigned_dev->dev->dev)); |
275 | else |
276 | pci_restore_state(assigned_dev->dev); |
277 | |
278 | assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; |
279 | |
280 | pci_release_regions(assigned_dev->dev); |
281 | pci_disable_device(assigned_dev->dev); |
282 | pci_dev_put(assigned_dev->dev); |
283 | |
284 | list_del(&assigned_dev->list); |
285 | kfree(assigned_dev); |
286 | } |
287 | |
288 | void kvm_free_all_assigned_devices(struct kvm *kvm) |
289 | { |
290 | struct list_head *ptr, *ptr2; |
291 | struct kvm_assigned_dev_kernel *assigned_dev; |
292 | |
293 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { |
294 | assigned_dev = list_entry(ptr, |
295 | struct kvm_assigned_dev_kernel, |
296 | list); |
297 | |
298 | kvm_free_assigned_device(kvm, assigned_dev); |
299 | } |
300 | } |
301 | |
302 | static int assigned_device_enable_host_intx(struct kvm *kvm, |
303 | struct kvm_assigned_dev_kernel *dev) |
304 | { |
305 | irq_handler_t irq_handler; |
306 | unsigned long flags; |
307 | |
308 | dev->host_irq = dev->dev->irq; |
309 | |
310 | /* |
311 | * We can only share the IRQ line with other host devices if we are |
312 | * able to disable the IRQ source at device-level - independently of |
313 | * the guest driver. Otherwise host devices may suffer from unbounded |
314 | * IRQ latencies when the guest keeps the line asserted. |
315 | */ |
316 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { |
317 | irq_handler = kvm_assigned_dev_intx; |
318 | flags = IRQF_SHARED; |
319 | } else { |
320 | irq_handler = NULL; |
321 | flags = IRQF_ONESHOT; |
322 | } |
323 | if (request_threaded_irq(dev->host_irq, irq_handler, |
324 | kvm_assigned_dev_thread_intx, flags, |
325 | dev->irq_name, dev)) |
326 | return -EIO; |
327 | |
328 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { |
329 | spin_lock_irq(&dev->intx_lock); |
330 | pci_intx(dev->dev, true); |
331 | spin_unlock_irq(&dev->intx_lock); |
332 | } |
333 | return 0; |
334 | } |
335 | |
336 | #ifdef __KVM_HAVE_MSI |
337 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) |
338 | { |
339 | return IRQ_WAKE_THREAD; |
340 | } |
341 | |
342 | static int assigned_device_enable_host_msi(struct kvm *kvm, |
343 | struct kvm_assigned_dev_kernel *dev) |
344 | { |
345 | int r; |
346 | |
347 | if (!dev->dev->msi_enabled) { |
348 | r = pci_enable_msi(dev->dev); |
349 | if (r) |
350 | return r; |
351 | } |
352 | |
353 | dev->host_irq = dev->dev->irq; |
354 | if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, |
355 | kvm_assigned_dev_thread_msi, 0, |
356 | dev->irq_name, dev)) { |
357 | pci_disable_msi(dev->dev); |
358 | return -EIO; |
359 | } |
360 | |
361 | return 0; |
362 | } |
363 | #endif |
364 | |
365 | #ifdef __KVM_HAVE_MSIX |
366 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) |
367 | { |
368 | return IRQ_WAKE_THREAD; |
369 | } |
370 | |
371 | static int assigned_device_enable_host_msix(struct kvm *kvm, |
372 | struct kvm_assigned_dev_kernel *dev) |
373 | { |
374 | int i, r = -EINVAL; |
375 | |
376 | /* host_msix_entries and guest_msix_entries should have been |
377 | * initialized */ |
378 | if (dev->entries_nr == 0) |
379 | return r; |
380 | |
381 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); |
382 | if (r) |
383 | return r; |
384 | |
385 | for (i = 0; i < dev->entries_nr; i++) { |
386 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
387 | kvm_assigned_dev_msix, |
388 | kvm_assigned_dev_thread_msix, |
389 | 0, dev->irq_name, dev); |
390 | if (r) |
391 | goto err; |
392 | } |
393 | |
394 | return 0; |
395 | err: |
396 | for (i -= 1; i >= 0; i--) |
397 | free_irq(dev->host_msix_entries[i].vector, dev); |
398 | pci_disable_msix(dev->dev); |
399 | return r; |
400 | } |
401 | |
402 | #endif |
403 | |
404 | static int assigned_device_enable_guest_intx(struct kvm *kvm, |
405 | struct kvm_assigned_dev_kernel *dev, |
406 | struct kvm_assigned_irq *irq) |
407 | { |
408 | dev->guest_irq = irq->guest_irq; |
409 | dev->ack_notifier.gsi = irq->guest_irq; |
410 | return 0; |
411 | } |
412 | |
413 | #ifdef __KVM_HAVE_MSI |
414 | static int assigned_device_enable_guest_msi(struct kvm *kvm, |
415 | struct kvm_assigned_dev_kernel *dev, |
416 | struct kvm_assigned_irq *irq) |
417 | { |
418 | dev->guest_irq = irq->guest_irq; |
419 | dev->ack_notifier.gsi = -1; |
420 | return 0; |
421 | } |
422 | #endif |
423 | |
424 | #ifdef __KVM_HAVE_MSIX |
425 | static int assigned_device_enable_guest_msix(struct kvm *kvm, |
426 | struct kvm_assigned_dev_kernel *dev, |
427 | struct kvm_assigned_irq *irq) |
428 | { |
429 | dev->guest_irq = irq->guest_irq; |
430 | dev->ack_notifier.gsi = -1; |
431 | return 0; |
432 | } |
433 | #endif |
434 | |
435 | static int assign_host_irq(struct kvm *kvm, |
436 | struct kvm_assigned_dev_kernel *dev, |
437 | __u32 host_irq_type) |
438 | { |
439 | int r = -EEXIST; |
440 | |
441 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) |
442 | return r; |
443 | |
444 | snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", |
445 | pci_name(dev->dev)); |
446 | |
447 | switch (host_irq_type) { |
448 | case KVM_DEV_IRQ_HOST_INTX: |
449 | r = assigned_device_enable_host_intx(kvm, dev); |
450 | break; |
451 | #ifdef __KVM_HAVE_MSI |
452 | case KVM_DEV_IRQ_HOST_MSI: |
453 | r = assigned_device_enable_host_msi(kvm, dev); |
454 | break; |
455 | #endif |
456 | #ifdef __KVM_HAVE_MSIX |
457 | case KVM_DEV_IRQ_HOST_MSIX: |
458 | r = assigned_device_enable_host_msix(kvm, dev); |
459 | break; |
460 | #endif |
461 | default: |
462 | r = -EINVAL; |
463 | } |
464 | dev->host_irq_disabled = false; |
465 | |
466 | if (!r) |
467 | dev->irq_requested_type |= host_irq_type; |
468 | |
469 | return r; |
470 | } |
471 | |
472 | static int assign_guest_irq(struct kvm *kvm, |
473 | struct kvm_assigned_dev_kernel *dev, |
474 | struct kvm_assigned_irq *irq, |
475 | unsigned long guest_irq_type) |
476 | { |
477 | int id; |
478 | int r = -EEXIST; |
479 | |
480 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) |
481 | return r; |
482 | |
483 | id = kvm_request_irq_source_id(kvm); |
484 | if (id < 0) |
485 | return id; |
486 | |
487 | dev->irq_source_id = id; |
488 | |
489 | switch (guest_irq_type) { |
490 | case KVM_DEV_IRQ_GUEST_INTX: |
491 | r = assigned_device_enable_guest_intx(kvm, dev, irq); |
492 | break; |
493 | #ifdef __KVM_HAVE_MSI |
494 | case KVM_DEV_IRQ_GUEST_MSI: |
495 | r = assigned_device_enable_guest_msi(kvm, dev, irq); |
496 | break; |
497 | #endif |
498 | #ifdef __KVM_HAVE_MSIX |
499 | case KVM_DEV_IRQ_GUEST_MSIX: |
500 | r = assigned_device_enable_guest_msix(kvm, dev, irq); |
501 | break; |
502 | #endif |
503 | default: |
504 | r = -EINVAL; |
505 | } |
506 | |
507 | if (!r) { |
508 | dev->irq_requested_type |= guest_irq_type; |
509 | if (dev->ack_notifier.gsi != -1) |
510 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); |
511 | } else |
512 | kvm_free_irq_source_id(kvm, dev->irq_source_id); |
513 | |
514 | return r; |
515 | } |
516 | |
517 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ |
518 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, |
519 | struct kvm_assigned_irq *assigned_irq) |
520 | { |
521 | int r = -EINVAL; |
522 | struct kvm_assigned_dev_kernel *match; |
523 | unsigned long host_irq_type, guest_irq_type; |
524 | |
525 | if (!irqchip_in_kernel(kvm)) |
526 | return r; |
527 | |
528 | mutex_lock(&kvm->lock); |
529 | r = -ENODEV; |
530 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
531 | assigned_irq->assigned_dev_id); |
532 | if (!match) |
533 | goto out; |
534 | |
535 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); |
536 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); |
537 | |
538 | r = -EINVAL; |
539 | /* can only assign one type at a time */ |
540 | if (hweight_long(host_irq_type) > 1) |
541 | goto out; |
542 | if (hweight_long(guest_irq_type) > 1) |
543 | goto out; |
544 | if (host_irq_type == 0 && guest_irq_type == 0) |
545 | goto out; |
546 | |
547 | r = 0; |
548 | if (host_irq_type) |
549 | r = assign_host_irq(kvm, match, host_irq_type); |
550 | if (r) |
551 | goto out; |
552 | |
553 | if (guest_irq_type) |
554 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); |
555 | out: |
556 | mutex_unlock(&kvm->lock); |
557 | return r; |
558 | } |
559 | |
560 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, |
561 | struct kvm_assigned_irq |
562 | *assigned_irq) |
563 | { |
564 | int r = -ENODEV; |
565 | struct kvm_assigned_dev_kernel *match; |
566 | unsigned long irq_type; |
567 | |
568 | mutex_lock(&kvm->lock); |
569 | |
570 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
571 | assigned_irq->assigned_dev_id); |
572 | if (!match) |
573 | goto out; |
574 | |
575 | irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | |
576 | KVM_DEV_IRQ_GUEST_MASK); |
577 | r = kvm_deassign_irq(kvm, match, irq_type); |
578 | out: |
579 | mutex_unlock(&kvm->lock); |
580 | return r; |
581 | } |
582 | |
583 | /* |
584 | * We want to test whether the caller has been granted permissions to |
585 | * use this device. To be able to configure and control the device, |
586 | * the user needs access to PCI configuration space and BAR resources. |
587 | * These are accessed through PCI sysfs. PCI config space is often |
588 | * passed to the process calling this ioctl via file descriptor, so we |
589 | * can't rely on access to that file. We can check for permissions |
590 | * on each of the BAR resource files, which is a pretty clear |
591 | * indicator that the user has been granted access to the device. |
592 | */ |
593 | static int probe_sysfs_permissions(struct pci_dev *dev) |
594 | { |
595 | #ifdef CONFIG_SYSFS |
596 | int i; |
597 | bool bar_found = false; |
598 | |
599 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { |
600 | char *kpath, *syspath; |
601 | struct path path; |
602 | struct inode *inode; |
603 | int r; |
604 | |
605 | if (!pci_resource_len(dev, i)) |
606 | continue; |
607 | |
608 | kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); |
609 | if (!kpath) |
610 | return -ENOMEM; |
611 | |
612 | /* Per sysfs-rules, sysfs is always at /sys */ |
613 | syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); |
614 | kfree(kpath); |
615 | if (!syspath) |
616 | return -ENOMEM; |
617 | |
618 | r = kern_path(syspath, LOOKUP_FOLLOW, &path); |
619 | kfree(syspath); |
620 | if (r) |
621 | return r; |
622 | |
623 | inode = path.dentry->d_inode; |
624 | |
625 | r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); |
626 | path_put(&path); |
627 | if (r) |
628 | return r; |
629 | |
630 | bar_found = true; |
631 | } |
632 | |
633 | /* If no resources, probably something special */ |
634 | if (!bar_found) |
635 | return -EPERM; |
636 | |
637 | return 0; |
638 | #else |
639 | return -EINVAL; /* No way to control the device without sysfs */ |
640 | #endif |
641 | } |
642 | |
643 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, |
644 | struct kvm_assigned_pci_dev *assigned_dev) |
645 | { |
646 | int r = 0, idx; |
647 | struct kvm_assigned_dev_kernel *match; |
648 | struct pci_dev *dev; |
649 | |
650 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) |
651 | return -EINVAL; |
652 | |
653 | mutex_lock(&kvm->lock); |
654 | idx = srcu_read_lock(&kvm->srcu); |
655 | |
656 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
657 | assigned_dev->assigned_dev_id); |
658 | if (match) { |
659 | /* device already assigned */ |
660 | r = -EEXIST; |
661 | goto out; |
662 | } |
663 | |
664 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); |
665 | if (match == NULL) { |
666 | printk(KERN_INFO "%s: Couldn't allocate memory\n", |
667 | __func__); |
668 | r = -ENOMEM; |
669 | goto out; |
670 | } |
671 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, |
672 | assigned_dev->busnr, |
673 | assigned_dev->devfn); |
674 | if (!dev) { |
675 | printk(KERN_INFO "%s: host device not found\n", __func__); |
676 | r = -EINVAL; |
677 | goto out_free; |
678 | } |
679 | |
680 | /* Don't allow bridges to be assigned */ |
681 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { |
682 | r = -EPERM; |
683 | goto out_put; |
684 | } |
685 | |
686 | r = probe_sysfs_permissions(dev); |
687 | if (r) |
688 | goto out_put; |
689 | |
690 | if (pci_enable_device(dev)) { |
691 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); |
692 | r = -EBUSY; |
693 | goto out_put; |
694 | } |
695 | r = pci_request_regions(dev, "kvm_assigned_device"); |
696 | if (r) { |
697 | printk(KERN_INFO "%s: Could not get access to device regions\n", |
698 | __func__); |
699 | goto out_disable; |
700 | } |
701 | |
702 | pci_reset_function(dev); |
703 | pci_save_state(dev); |
704 | match->pci_saved_state = pci_store_saved_state(dev); |
705 | if (!match->pci_saved_state) |
706 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", |
707 | __func__, dev_name(&dev->dev)); |
708 | |
709 | if (!pci_intx_mask_supported(dev)) |
710 | assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; |
711 | |
712 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
713 | match->host_segnr = assigned_dev->segnr; |
714 | match->host_busnr = assigned_dev->busnr; |
715 | match->host_devfn = assigned_dev->devfn; |
716 | match->flags = assigned_dev->flags; |
717 | match->dev = dev; |
718 | spin_lock_init(&match->intx_lock); |
719 | spin_lock_init(&match->intx_mask_lock); |
720 | match->irq_source_id = -1; |
721 | match->kvm = kvm; |
722 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; |
723 | |
724 | list_add(&match->list, &kvm->arch.assigned_dev_head); |
725 | |
726 | if (!kvm->arch.iommu_domain) { |
727 | r = kvm_iommu_map_guest(kvm); |
728 | if (r) |
729 | goto out_list_del; |
730 | } |
731 | r = kvm_assign_device(kvm, match); |
732 | if (r) |
733 | goto out_list_del; |
734 | |
735 | out: |
736 | srcu_read_unlock(&kvm->srcu, idx); |
737 | mutex_unlock(&kvm->lock); |
738 | return r; |
739 | out_list_del: |
740 | if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) |
741 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", |
742 | __func__, dev_name(&dev->dev)); |
743 | list_del(&match->list); |
744 | pci_release_regions(dev); |
745 | out_disable: |
746 | pci_disable_device(dev); |
747 | out_put: |
748 | pci_dev_put(dev); |
749 | out_free: |
750 | kfree(match); |
751 | srcu_read_unlock(&kvm->srcu, idx); |
752 | mutex_unlock(&kvm->lock); |
753 | return r; |
754 | } |
755 | |
756 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, |
757 | struct kvm_assigned_pci_dev *assigned_dev) |
758 | { |
759 | int r = 0; |
760 | struct kvm_assigned_dev_kernel *match; |
761 | |
762 | mutex_lock(&kvm->lock); |
763 | |
764 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
765 | assigned_dev->assigned_dev_id); |
766 | if (!match) { |
767 | printk(KERN_INFO "%s: device hasn't been assigned before, " |
768 | "so cannot be deassigned\n", __func__); |
769 | r = -EINVAL; |
770 | goto out; |
771 | } |
772 | |
773 | kvm_deassign_device(kvm, match); |
774 | |
775 | kvm_free_assigned_device(kvm, match); |
776 | |
777 | out: |
778 | mutex_unlock(&kvm->lock); |
779 | return r; |
780 | } |
781 | |
782 | |
783 | #ifdef __KVM_HAVE_MSIX |
784 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, |
785 | struct kvm_assigned_msix_nr *entry_nr) |
786 | { |
787 | int r = 0; |
788 | struct kvm_assigned_dev_kernel *adev; |
789 | |
790 | mutex_lock(&kvm->lock); |
791 | |
792 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
793 | entry_nr->assigned_dev_id); |
794 | if (!adev) { |
795 | r = -EINVAL; |
796 | goto msix_nr_out; |
797 | } |
798 | |
799 | if (adev->entries_nr == 0) { |
800 | adev->entries_nr = entry_nr->entry_nr; |
801 | if (adev->entries_nr == 0 || |
802 | adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { |
803 | r = -EINVAL; |
804 | goto msix_nr_out; |
805 | } |
806 | |
807 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * |
808 | entry_nr->entry_nr, |
809 | GFP_KERNEL); |
810 | if (!adev->host_msix_entries) { |
811 | r = -ENOMEM; |
812 | goto msix_nr_out; |
813 | } |
814 | adev->guest_msix_entries = |
815 | kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, |
816 | GFP_KERNEL); |
817 | if (!adev->guest_msix_entries) { |
818 | kfree(adev->host_msix_entries); |
819 | r = -ENOMEM; |
820 | goto msix_nr_out; |
821 | } |
822 | } else /* Not allowed set MSI-X number twice */ |
823 | r = -EINVAL; |
824 | msix_nr_out: |
825 | mutex_unlock(&kvm->lock); |
826 | return r; |
827 | } |
828 | |
829 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, |
830 | struct kvm_assigned_msix_entry *entry) |
831 | { |
832 | int r = 0, i; |
833 | struct kvm_assigned_dev_kernel *adev; |
834 | |
835 | mutex_lock(&kvm->lock); |
836 | |
837 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
838 | entry->assigned_dev_id); |
839 | |
840 | if (!adev) { |
841 | r = -EINVAL; |
842 | goto msix_entry_out; |
843 | } |
844 | |
845 | for (i = 0; i < adev->entries_nr; i++) |
846 | if (adev->guest_msix_entries[i].vector == 0 || |
847 | adev->guest_msix_entries[i].entry == entry->entry) { |
848 | adev->guest_msix_entries[i].entry = entry->entry; |
849 | adev->guest_msix_entries[i].vector = entry->gsi; |
850 | adev->host_msix_entries[i].entry = entry->entry; |
851 | break; |
852 | } |
853 | if (i == adev->entries_nr) { |
854 | r = -ENOSPC; |
855 | goto msix_entry_out; |
856 | } |
857 | |
858 | msix_entry_out: |
859 | mutex_unlock(&kvm->lock); |
860 | |
861 | return r; |
862 | } |
863 | #endif |
864 | |
865 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, |
866 | struct kvm_assigned_pci_dev *assigned_dev) |
867 | { |
868 | int r = 0; |
869 | struct kvm_assigned_dev_kernel *match; |
870 | |
871 | mutex_lock(&kvm->lock); |
872 | |
873 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
874 | assigned_dev->assigned_dev_id); |
875 | if (!match) { |
876 | r = -ENODEV; |
877 | goto out; |
878 | } |
879 | |
880 | spin_lock(&match->intx_mask_lock); |
881 | |
882 | match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; |
883 | match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; |
884 | |
885 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { |
886 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { |
887 | kvm_set_irq(match->kvm, match->irq_source_id, |
888 | match->guest_irq, 0); |
889 | /* |
890 | * Masking at hardware-level is performed on demand, |
891 | * i.e. when an IRQ actually arrives at the host. |
892 | */ |
893 | } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { |
894 | /* |
895 | * Unmask the IRQ line if required. Unmasking at |
896 | * device level will be performed by user space. |
897 | */ |
898 | spin_lock_irq(&match->intx_lock); |
899 | if (match->host_irq_disabled) { |
900 | enable_irq(match->host_irq); |
901 | match->host_irq_disabled = false; |
902 | } |
903 | spin_unlock_irq(&match->intx_lock); |
904 | } |
905 | } |
906 | |
907 | spin_unlock(&match->intx_mask_lock); |
908 | |
909 | out: |
910 | mutex_unlock(&kvm->lock); |
911 | return r; |
912 | } |
913 | |
914 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
915 | unsigned long arg) |
916 | { |
917 | void __user *argp = (void __user *)arg; |
918 | int r; |
919 | |
920 | switch (ioctl) { |
921 | case KVM_ASSIGN_PCI_DEVICE: { |
922 | struct kvm_assigned_pci_dev assigned_dev; |
923 | |
924 | r = -EFAULT; |
925 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) |
926 | goto out; |
927 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); |
928 | if (r) |
929 | goto out; |
930 | break; |
931 | } |
932 | case KVM_ASSIGN_IRQ: { |
933 | r = -EOPNOTSUPP; |
934 | break; |
935 | } |
936 | case KVM_ASSIGN_DEV_IRQ: { |
937 | struct kvm_assigned_irq assigned_irq; |
938 | |
939 | r = -EFAULT; |
940 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) |
941 | goto out; |
942 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); |
943 | if (r) |
944 | goto out; |
945 | break; |
946 | } |
947 | case KVM_DEASSIGN_DEV_IRQ: { |
948 | struct kvm_assigned_irq assigned_irq; |
949 | |
950 | r = -EFAULT; |
951 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) |
952 | goto out; |
953 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); |
954 | if (r) |
955 | goto out; |
956 | break; |
957 | } |
958 | case KVM_DEASSIGN_PCI_DEVICE: { |
959 | struct kvm_assigned_pci_dev assigned_dev; |
960 | |
961 | r = -EFAULT; |
962 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) |
963 | goto out; |
964 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); |
965 | if (r) |
966 | goto out; |
967 | break; |
968 | } |
969 | #ifdef KVM_CAP_IRQ_ROUTING |
970 | case KVM_SET_GSI_ROUTING: { |
971 | struct kvm_irq_routing routing; |
972 | struct kvm_irq_routing __user *urouting; |
973 | struct kvm_irq_routing_entry *entries; |
974 | |
975 | r = -EFAULT; |
976 | if (copy_from_user(&routing, argp, sizeof(routing))) |
977 | goto out; |
978 | r = -EINVAL; |
979 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) |
980 | goto out; |
981 | if (routing.flags) |
982 | goto out; |
983 | r = -ENOMEM; |
984 | entries = vmalloc(routing.nr * sizeof(*entries)); |
985 | if (!entries) |
986 | goto out; |
987 | r = -EFAULT; |
988 | urouting = argp; |
989 | if (copy_from_user(entries, urouting->entries, |
990 | routing.nr * sizeof(*entries))) |
991 | goto out_free_irq_routing; |
992 | r = kvm_set_irq_routing(kvm, entries, routing.nr, |
993 | routing.flags); |
994 | out_free_irq_routing: |
995 | vfree(entries); |
996 | break; |
997 | } |
998 | #endif /* KVM_CAP_IRQ_ROUTING */ |
999 | #ifdef __KVM_HAVE_MSIX |
1000 | case KVM_ASSIGN_SET_MSIX_NR: { |
1001 | struct kvm_assigned_msix_nr entry_nr; |
1002 | r = -EFAULT; |
1003 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) |
1004 | goto out; |
1005 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); |
1006 | if (r) |
1007 | goto out; |
1008 | break; |
1009 | } |
1010 | case KVM_ASSIGN_SET_MSIX_ENTRY: { |
1011 | struct kvm_assigned_msix_entry entry; |
1012 | r = -EFAULT; |
1013 | if (copy_from_user(&entry, argp, sizeof entry)) |
1014 | goto out; |
1015 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); |
1016 | if (r) |
1017 | goto out; |
1018 | break; |
1019 | } |
1020 | #endif |
1021 | case KVM_ASSIGN_SET_INTX_MASK: { |
1022 | struct kvm_assigned_pci_dev assigned_dev; |
1023 | |
1024 | r = -EFAULT; |
1025 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) |
1026 | goto out; |
1027 | r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); |
1028 | break; |
1029 | } |
1030 | default: |
1031 | r = -ENOTTY; |
1032 | break; |
1033 | } |
1034 | out: |
1035 | return r; |
1036 | } |
1037 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9