Root/
1 | /****************************************************************************** |
2 | * privcmd.c |
3 | * |
4 | * Interface to privileged domain-0 commands. |
5 | * |
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic |
7 | */ |
8 | |
9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> |
11 | #include <linux/sched.h> |
12 | #include <linux/slab.h> |
13 | #include <linux/string.h> |
14 | #include <linux/errno.h> |
15 | #include <linux/mm.h> |
16 | #include <linux/mman.h> |
17 | #include <linux/uaccess.h> |
18 | #include <linux/swap.h> |
19 | #include <linux/highmem.h> |
20 | #include <linux/pagemap.h> |
21 | #include <linux/seq_file.h> |
22 | #include <linux/miscdevice.h> |
23 | |
24 | #include <asm/pgalloc.h> |
25 | #include <asm/pgtable.h> |
26 | #include <asm/tlb.h> |
27 | #include <asm/xen/hypervisor.h> |
28 | #include <asm/xen/hypercall.h> |
29 | |
30 | #include <xen/xen.h> |
31 | #include <xen/privcmd.h> |
32 | #include <xen/interface/xen.h> |
33 | #include <xen/features.h> |
34 | #include <xen/page.h> |
35 | #include <xen/xen-ops.h> |
36 | #include <xen/balloon.h> |
37 | |
38 | #include "privcmd.h" |
39 | |
40 | MODULE_LICENSE("GPL"); |
41 | |
42 | #define PRIV_VMA_LOCKED ((void *)1) |
43 | |
44 | #ifndef HAVE_ARCH_PRIVCMD_MMAP |
45 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); |
46 | #endif |
47 | |
48 | static long privcmd_ioctl_hypercall(void __user *udata) |
49 | { |
50 | struct privcmd_hypercall hypercall; |
51 | long ret; |
52 | |
53 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) |
54 | return -EFAULT; |
55 | |
56 | ret = privcmd_call(hypercall.op, |
57 | hypercall.arg[0], hypercall.arg[1], |
58 | hypercall.arg[2], hypercall.arg[3], |
59 | hypercall.arg[4]); |
60 | |
61 | return ret; |
62 | } |
63 | |
64 | static void free_page_list(struct list_head *pages) |
65 | { |
66 | struct page *p, *n; |
67 | |
68 | list_for_each_entry_safe(p, n, pages, lru) |
69 | __free_page(p); |
70 | |
71 | INIT_LIST_HEAD(pages); |
72 | } |
73 | |
74 | /* |
75 | * Given an array of items in userspace, return a list of pages |
76 | * containing the data. If copying fails, either because of memory |
77 | * allocation failure or a problem reading user memory, return an |
78 | * error code; its up to the caller to dispose of any partial list. |
79 | */ |
80 | static int gather_array(struct list_head *pagelist, |
81 | unsigned nelem, size_t size, |
82 | const void __user *data) |
83 | { |
84 | unsigned pageidx; |
85 | void *pagedata; |
86 | int ret; |
87 | |
88 | if (size > PAGE_SIZE) |
89 | return 0; |
90 | |
91 | pageidx = PAGE_SIZE; |
92 | pagedata = NULL; /* quiet, gcc */ |
93 | while (nelem--) { |
94 | if (pageidx > PAGE_SIZE-size) { |
95 | struct page *page = alloc_page(GFP_KERNEL); |
96 | |
97 | ret = -ENOMEM; |
98 | if (page == NULL) |
99 | goto fail; |
100 | |
101 | pagedata = page_address(page); |
102 | |
103 | list_add_tail(&page->lru, pagelist); |
104 | pageidx = 0; |
105 | } |
106 | |
107 | ret = -EFAULT; |
108 | if (copy_from_user(pagedata + pageidx, data, size)) |
109 | goto fail; |
110 | |
111 | data += size; |
112 | pageidx += size; |
113 | } |
114 | |
115 | ret = 0; |
116 | |
117 | fail: |
118 | return ret; |
119 | } |
120 | |
121 | /* |
122 | * Call function "fn" on each element of the array fragmented |
123 | * over a list of pages. |
124 | */ |
125 | static int traverse_pages(unsigned nelem, size_t size, |
126 | struct list_head *pos, |
127 | int (*fn)(void *data, void *state), |
128 | void *state) |
129 | { |
130 | void *pagedata; |
131 | unsigned pageidx; |
132 | int ret = 0; |
133 | |
134 | BUG_ON(size > PAGE_SIZE); |
135 | |
136 | pageidx = PAGE_SIZE; |
137 | pagedata = NULL; /* hush, gcc */ |
138 | |
139 | while (nelem--) { |
140 | if (pageidx > PAGE_SIZE-size) { |
141 | struct page *page; |
142 | pos = pos->next; |
143 | page = list_entry(pos, struct page, lru); |
144 | pagedata = page_address(page); |
145 | pageidx = 0; |
146 | } |
147 | |
148 | ret = (*fn)(pagedata + pageidx, state); |
149 | if (ret) |
150 | break; |
151 | pageidx += size; |
152 | } |
153 | |
154 | return ret; |
155 | } |
156 | |
157 | struct mmap_mfn_state { |
158 | unsigned long va; |
159 | struct vm_area_struct *vma; |
160 | domid_t domain; |
161 | }; |
162 | |
163 | static int mmap_mfn_range(void *data, void *state) |
164 | { |
165 | struct privcmd_mmap_entry *msg = data; |
166 | struct mmap_mfn_state *st = state; |
167 | struct vm_area_struct *vma = st->vma; |
168 | int rc; |
169 | |
170 | /* Do not allow range to wrap the address space. */ |
171 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || |
172 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) |
173 | return -EINVAL; |
174 | |
175 | /* Range chunks must be contiguous in va space. */ |
176 | if ((msg->va != st->va) || |
177 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) |
178 | return -EINVAL; |
179 | |
180 | rc = xen_remap_domain_mfn_range(vma, |
181 | msg->va & PAGE_MASK, |
182 | msg->mfn, msg->npages, |
183 | vma->vm_page_prot, |
184 | st->domain, NULL); |
185 | if (rc < 0) |
186 | return rc; |
187 | |
188 | st->va += msg->npages << PAGE_SHIFT; |
189 | |
190 | return 0; |
191 | } |
192 | |
193 | static long privcmd_ioctl_mmap(void __user *udata) |
194 | { |
195 | struct privcmd_mmap mmapcmd; |
196 | struct mm_struct *mm = current->mm; |
197 | struct vm_area_struct *vma; |
198 | int rc; |
199 | LIST_HEAD(pagelist); |
200 | struct mmap_mfn_state state; |
201 | |
202 | /* We only support privcmd_ioctl_mmap_batch for auto translated. */ |
203 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
204 | return -ENOSYS; |
205 | |
206 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) |
207 | return -EFAULT; |
208 | |
209 | rc = gather_array(&pagelist, |
210 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), |
211 | mmapcmd.entry); |
212 | |
213 | if (rc || list_empty(&pagelist)) |
214 | goto out; |
215 | |
216 | down_write(&mm->mmap_sem); |
217 | |
218 | { |
219 | struct page *page = list_first_entry(&pagelist, |
220 | struct page, lru); |
221 | struct privcmd_mmap_entry *msg = page_address(page); |
222 | |
223 | vma = find_vma(mm, msg->va); |
224 | rc = -EINVAL; |
225 | |
226 | if (!vma || (msg->va != vma->vm_start) || |
227 | !privcmd_enforce_singleshot_mapping(vma)) |
228 | goto out_up; |
229 | } |
230 | |
231 | state.va = vma->vm_start; |
232 | state.vma = vma; |
233 | state.domain = mmapcmd.dom; |
234 | |
235 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), |
236 | &pagelist, |
237 | mmap_mfn_range, &state); |
238 | |
239 | |
240 | out_up: |
241 | up_write(&mm->mmap_sem); |
242 | |
243 | out: |
244 | free_page_list(&pagelist); |
245 | |
246 | return rc; |
247 | } |
248 | |
249 | struct mmap_batch_state { |
250 | domid_t domain; |
251 | unsigned long va; |
252 | struct vm_area_struct *vma; |
253 | int index; |
254 | /* A tristate: |
255 | * 0 for no errors |
256 | * 1 if at least one error has happened (and no |
257 | * -ENOENT errors have happened) |
258 | * -ENOENT if at least 1 -ENOENT has happened. |
259 | */ |
260 | int global_error; |
261 | int version; |
262 | |
263 | /* User-space mfn array to store errors in the second pass for V1. */ |
264 | xen_pfn_t __user *user_mfn; |
265 | /* User-space int array to store errors in the second pass for V2. */ |
266 | int __user *user_err; |
267 | }; |
268 | |
269 | /* auto translated dom0 note: if domU being created is PV, then mfn is |
270 | * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). |
271 | */ |
272 | static int mmap_batch_fn(void *data, void *state) |
273 | { |
274 | xen_pfn_t *mfnp = data; |
275 | struct mmap_batch_state *st = state; |
276 | struct vm_area_struct *vma = st->vma; |
277 | struct page **pages = vma->vm_private_data; |
278 | struct page *cur_page = NULL; |
279 | int ret; |
280 | |
281 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
282 | cur_page = pages[st->index++]; |
283 | |
284 | ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, |
285 | st->vma->vm_page_prot, st->domain, |
286 | &cur_page); |
287 | |
288 | /* Store error code for second pass. */ |
289 | if (st->version == 1) { |
290 | if (ret < 0) { |
291 | /* |
292 | * V1 encodes the error codes in the 32bit top nibble of the |
293 | * mfn (with its known limitations vis-a-vis 64 bit callers). |
294 | */ |
295 | *mfnp |= (ret == -ENOENT) ? |
296 | PRIVCMD_MMAPBATCH_PAGED_ERROR : |
297 | PRIVCMD_MMAPBATCH_MFN_ERROR; |
298 | } |
299 | } else { /* st->version == 2 */ |
300 | *((int *) mfnp) = ret; |
301 | } |
302 | |
303 | /* And see if it affects the global_error. */ |
304 | if (ret < 0) { |
305 | if (ret == -ENOENT) |
306 | st->global_error = -ENOENT; |
307 | else { |
308 | /* Record that at least one error has happened. */ |
309 | if (st->global_error == 0) |
310 | st->global_error = 1; |
311 | } |
312 | } |
313 | st->va += PAGE_SIZE; |
314 | |
315 | return 0; |
316 | } |
317 | |
318 | static int mmap_return_errors(void *data, void *state) |
319 | { |
320 | struct mmap_batch_state *st = state; |
321 | |
322 | if (st->version == 1) { |
323 | xen_pfn_t mfnp = *((xen_pfn_t *) data); |
324 | if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) |
325 | return __put_user(mfnp, st->user_mfn++); |
326 | else |
327 | st->user_mfn++; |
328 | } else { /* st->version == 2 */ |
329 | int err = *((int *) data); |
330 | if (err) |
331 | return __put_user(err, st->user_err++); |
332 | else |
333 | st->user_err++; |
334 | } |
335 | |
336 | return 0; |
337 | } |
338 | |
339 | /* Allocate pfns that are then mapped with gmfns from foreign domid. Update |
340 | * the vma with the page info to use later. |
341 | * Returns: 0 if success, otherwise -errno |
342 | */ |
343 | static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) |
344 | { |
345 | int rc; |
346 | struct page **pages; |
347 | |
348 | pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); |
349 | if (pages == NULL) |
350 | return -ENOMEM; |
351 | |
352 | rc = alloc_xenballooned_pages(numpgs, pages, 0); |
353 | if (rc != 0) { |
354 | pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, |
355 | numpgs, rc); |
356 | kfree(pages); |
357 | return -ENOMEM; |
358 | } |
359 | BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); |
360 | vma->vm_private_data = pages; |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | static struct vm_operations_struct privcmd_vm_ops; |
366 | |
367 | static long privcmd_ioctl_mmap_batch(void __user *udata, int version) |
368 | { |
369 | int ret; |
370 | struct privcmd_mmapbatch_v2 m; |
371 | struct mm_struct *mm = current->mm; |
372 | struct vm_area_struct *vma; |
373 | unsigned long nr_pages; |
374 | LIST_HEAD(pagelist); |
375 | struct mmap_batch_state state; |
376 | |
377 | switch (version) { |
378 | case 1: |
379 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) |
380 | return -EFAULT; |
381 | /* Returns per-frame error in m.arr. */ |
382 | m.err = NULL; |
383 | if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) |
384 | return -EFAULT; |
385 | break; |
386 | case 2: |
387 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) |
388 | return -EFAULT; |
389 | /* Returns per-frame error code in m.err. */ |
390 | if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) |
391 | return -EFAULT; |
392 | break; |
393 | default: |
394 | return -EINVAL; |
395 | } |
396 | |
397 | nr_pages = m.num; |
398 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) |
399 | return -EINVAL; |
400 | |
401 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); |
402 | |
403 | if (ret) |
404 | goto out; |
405 | if (list_empty(&pagelist)) { |
406 | ret = -EINVAL; |
407 | goto out; |
408 | } |
409 | |
410 | if (version == 2) { |
411 | /* Zero error array now to only copy back actual errors. */ |
412 | if (clear_user(m.err, sizeof(int) * m.num)) { |
413 | ret = -EFAULT; |
414 | goto out; |
415 | } |
416 | } |
417 | |
418 | down_write(&mm->mmap_sem); |
419 | |
420 | vma = find_vma(mm, m.addr); |
421 | if (!vma || |
422 | vma->vm_ops != &privcmd_vm_ops || |
423 | (m.addr != vma->vm_start) || |
424 | ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || |
425 | !privcmd_enforce_singleshot_mapping(vma)) { |
426 | up_write(&mm->mmap_sem); |
427 | ret = -EINVAL; |
428 | goto out; |
429 | } |
430 | if (xen_feature(XENFEAT_auto_translated_physmap)) { |
431 | ret = alloc_empty_pages(vma, m.num); |
432 | if (ret < 0) { |
433 | up_write(&mm->mmap_sem); |
434 | goto out; |
435 | } |
436 | } |
437 | |
438 | state.domain = m.dom; |
439 | state.vma = vma; |
440 | state.va = m.addr; |
441 | state.index = 0; |
442 | state.global_error = 0; |
443 | state.version = version; |
444 | |
445 | /* mmap_batch_fn guarantees ret == 0 */ |
446 | BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), |
447 | &pagelist, mmap_batch_fn, &state)); |
448 | |
449 | up_write(&mm->mmap_sem); |
450 | |
451 | if (state.global_error) { |
452 | /* Write back errors in second pass. */ |
453 | state.user_mfn = (xen_pfn_t *)m.arr; |
454 | state.user_err = m.err; |
455 | ret = traverse_pages(m.num, sizeof(xen_pfn_t), |
456 | &pagelist, mmap_return_errors, &state); |
457 | } else |
458 | ret = 0; |
459 | |
460 | /* If we have not had any EFAULT-like global errors then set the global |
461 | * error to -ENOENT if necessary. */ |
462 | if ((ret == 0) && (state.global_error == -ENOENT)) |
463 | ret = -ENOENT; |
464 | |
465 | out: |
466 | free_page_list(&pagelist); |
467 | |
468 | return ret; |
469 | } |
470 | |
471 | static long privcmd_ioctl(struct file *file, |
472 | unsigned int cmd, unsigned long data) |
473 | { |
474 | int ret = -ENOSYS; |
475 | void __user *udata = (void __user *) data; |
476 | |
477 | switch (cmd) { |
478 | case IOCTL_PRIVCMD_HYPERCALL: |
479 | ret = privcmd_ioctl_hypercall(udata); |
480 | break; |
481 | |
482 | case IOCTL_PRIVCMD_MMAP: |
483 | ret = privcmd_ioctl_mmap(udata); |
484 | break; |
485 | |
486 | case IOCTL_PRIVCMD_MMAPBATCH: |
487 | ret = privcmd_ioctl_mmap_batch(udata, 1); |
488 | break; |
489 | |
490 | case IOCTL_PRIVCMD_MMAPBATCH_V2: |
491 | ret = privcmd_ioctl_mmap_batch(udata, 2); |
492 | break; |
493 | |
494 | default: |
495 | ret = -EINVAL; |
496 | break; |
497 | } |
498 | |
499 | return ret; |
500 | } |
501 | |
502 | static void privcmd_close(struct vm_area_struct *vma) |
503 | { |
504 | struct page **pages = vma->vm_private_data; |
505 | int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
506 | |
507 | if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages)) |
508 | return; |
509 | |
510 | xen_unmap_domain_mfn_range(vma, numpgs, pages); |
511 | free_xenballooned_pages(numpgs, pages); |
512 | kfree(pages); |
513 | } |
514 | |
515 | static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
516 | { |
517 | printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", |
518 | vma, vma->vm_start, vma->vm_end, |
519 | vmf->pgoff, vmf->virtual_address); |
520 | |
521 | return VM_FAULT_SIGBUS; |
522 | } |
523 | |
524 | static struct vm_operations_struct privcmd_vm_ops = { |
525 | .close = privcmd_close, |
526 | .fault = privcmd_fault |
527 | }; |
528 | |
529 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) |
530 | { |
531 | /* DONTCOPY is essential for Xen because copy_page_range doesn't know |
532 | * how to recreate these mappings */ |
533 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | |
534 | VM_DONTEXPAND | VM_DONTDUMP; |
535 | vma->vm_ops = &privcmd_vm_ops; |
536 | vma->vm_private_data = NULL; |
537 | |
538 | return 0; |
539 | } |
540 | |
541 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) |
542 | { |
543 | return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); |
544 | } |
545 | |
546 | const struct file_operations xen_privcmd_fops = { |
547 | .owner = THIS_MODULE, |
548 | .unlocked_ioctl = privcmd_ioctl, |
549 | .mmap = privcmd_mmap, |
550 | }; |
551 | EXPORT_SYMBOL_GPL(xen_privcmd_fops); |
552 | |
553 | static struct miscdevice privcmd_dev = { |
554 | .minor = MISC_DYNAMIC_MINOR, |
555 | .name = "xen/privcmd", |
556 | .fops = &xen_privcmd_fops, |
557 | }; |
558 | |
559 | static int __init privcmd_init(void) |
560 | { |
561 | int err; |
562 | |
563 | if (!xen_domain()) |
564 | return -ENODEV; |
565 | |
566 | err = misc_register(&privcmd_dev); |
567 | if (err != 0) { |
568 | printk(KERN_ERR "Could not register Xen privcmd device\n"); |
569 | return err; |
570 | } |
571 | return 0; |
572 | } |
573 | |
574 | static void __exit privcmd_exit(void) |
575 | { |
576 | misc_deregister(&privcmd_dev); |
577 | } |
578 | |
579 | module_init(privcmd_init); |
580 | module_exit(privcmd_exit); |
581 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9