Root/
1 | /*P:050 |
2 | * Lguest guests use a very simple method to describe devices. It's a |
3 | * series of device descriptors contained just above the top of normal Guest |
4 | * memory. |
5 | * |
6 | * We use the standard "virtio" device infrastructure, which provides us with a |
7 | * console, a network and a block driver. Each one expects some configuration |
8 | * information and a "virtqueue" or two to send and receive data. |
9 | :*/ |
10 | #include <linux/init.h> |
11 | #include <linux/bootmem.h> |
12 | #include <linux/lguest_launcher.h> |
13 | #include <linux/virtio.h> |
14 | #include <linux/virtio_config.h> |
15 | #include <linux/interrupt.h> |
16 | #include <linux/virtio_ring.h> |
17 | #include <linux/err.h> |
18 | #include <linux/export.h> |
19 | #include <linux/slab.h> |
20 | #include <asm/io.h> |
21 | #include <asm/paravirt.h> |
22 | #include <asm/lguest_hcall.h> |
23 | |
24 | /* The pointer to our (page) of device descriptions. */ |
25 | static void *lguest_devices; |
26 | |
27 | /* |
28 | * For Guests, device memory can be used as normal memory, so we cast away the |
29 | * __iomem to quieten sparse. |
30 | */ |
31 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) |
32 | { |
33 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); |
34 | } |
35 | |
36 | static inline void lguest_unmap(void *addr) |
37 | { |
38 | iounmap((__force void __iomem *)addr); |
39 | } |
40 | |
41 | /*D:100 |
42 | * Each lguest device is just a virtio device plus a pointer to its entry |
43 | * in the lguest_devices page. |
44 | */ |
45 | struct lguest_device { |
46 | struct virtio_device vdev; |
47 | |
48 | /* The entry in the lguest_devices page for this device. */ |
49 | struct lguest_device_desc *desc; |
50 | }; |
51 | |
52 | /* |
53 | * Since the virtio infrastructure hands us a pointer to the virtio_device all |
54 | * the time, it helps to have a curt macro to get a pointer to the struct |
55 | * lguest_device it's enclosed in. |
56 | */ |
57 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) |
58 | |
59 | /*D:130 |
60 | * Device configurations |
61 | * |
62 | * The configuration information for a device consists of one or more |
63 | * virtqueues, a feature bitmap, and some configuration bytes. The |
64 | * configuration bytes don't really matter to us: the Launcher sets them up, and |
65 | * the driver will look at them during setup. |
66 | * |
67 | * A convenient routine to return the device's virtqueue config array: |
68 | * immediately after the descriptor. |
69 | */ |
70 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) |
71 | { |
72 | return (void *)(desc + 1); |
73 | } |
74 | |
75 | /* The features come immediately after the virtqueues. */ |
76 | static u8 *lg_features(const struct lguest_device_desc *desc) |
77 | { |
78 | return (void *)(lg_vq(desc) + desc->num_vq); |
79 | } |
80 | |
81 | /* The config space comes after the two feature bitmasks. */ |
82 | static u8 *lg_config(const struct lguest_device_desc *desc) |
83 | { |
84 | return lg_features(desc) + desc->feature_len * 2; |
85 | } |
86 | |
87 | /* The total size of the config page used by this device (incl. desc) */ |
88 | static unsigned desc_size(const struct lguest_device_desc *desc) |
89 | { |
90 | return sizeof(*desc) |
91 | + desc->num_vq * sizeof(struct lguest_vqconfig) |
92 | + desc->feature_len * 2 |
93 | + desc->config_len; |
94 | } |
95 | |
96 | /* This gets the device's feature bits. */ |
97 | static u32 lg_get_features(struct virtio_device *vdev) |
98 | { |
99 | unsigned int i; |
100 | u32 features = 0; |
101 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
102 | u8 *in_features = lg_features(desc); |
103 | |
104 | /* We do this the slow but generic way. */ |
105 | for (i = 0; i < min(desc->feature_len * 8, 32); i++) |
106 | if (in_features[i / 8] & (1 << (i % 8))) |
107 | features |= (1 << i); |
108 | |
109 | return features; |
110 | } |
111 | |
112 | /* |
113 | * To notify on reset or feature finalization, we (ab)use the NOTIFY |
114 | * hypercall, with the descriptor address of the device. |
115 | */ |
116 | static void status_notify(struct virtio_device *vdev) |
117 | { |
118 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; |
119 | |
120 | hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0); |
121 | } |
122 | |
123 | /* |
124 | * The virtio core takes the features the Host offers, and copies the ones |
125 | * supported by the driver into the vdev->features array. Once that's all |
126 | * sorted out, this routine is called so we can tell the Host which features we |
127 | * understand and accept. |
128 | */ |
129 | static void lg_finalize_features(struct virtio_device *vdev) |
130 | { |
131 | unsigned int i, bits; |
132 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
133 | /* Second half of bitmap is features we accept. */ |
134 | u8 *out_features = lg_features(desc) + desc->feature_len; |
135 | |
136 | /* Give virtio_ring a chance to accept features. */ |
137 | vring_transport_features(vdev); |
138 | |
139 | /* |
140 | * The vdev->feature array is a Linux bitmask: this isn't the same as a |
141 | * the simple array of bits used by lguest devices for features. So we |
142 | * do this slow, manual conversion which is completely general. |
143 | */ |
144 | memset(out_features, 0, desc->feature_len); |
145 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; |
146 | for (i = 0; i < bits; i++) { |
147 | if (test_bit(i, vdev->features)) |
148 | out_features[i / 8] |= (1 << (i % 8)); |
149 | } |
150 | |
151 | /* Tell Host we've finished with this device's feature negotiation */ |
152 | status_notify(vdev); |
153 | } |
154 | |
155 | /* Once they've found a field, getting a copy of it is easy. */ |
156 | static void lg_get(struct virtio_device *vdev, unsigned int offset, |
157 | void *buf, unsigned len) |
158 | { |
159 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
160 | |
161 | /* Check they didn't ask for more than the length of the config! */ |
162 | BUG_ON(offset + len > desc->config_len); |
163 | memcpy(buf, lg_config(desc) + offset, len); |
164 | } |
165 | |
166 | /* Setting the contents is also trivial. */ |
167 | static void lg_set(struct virtio_device *vdev, unsigned int offset, |
168 | const void *buf, unsigned len) |
169 | { |
170 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
171 | |
172 | /* Check they didn't ask for more than the length of the config! */ |
173 | BUG_ON(offset + len > desc->config_len); |
174 | memcpy(lg_config(desc) + offset, buf, len); |
175 | } |
176 | |
177 | /* |
178 | * The operations to get and set the status word just access the status field |
179 | * of the device descriptor. |
180 | */ |
181 | static u8 lg_get_status(struct virtio_device *vdev) |
182 | { |
183 | return to_lgdev(vdev)->desc->status; |
184 | } |
185 | |
186 | static void lg_set_status(struct virtio_device *vdev, u8 status) |
187 | { |
188 | BUG_ON(!status); |
189 | to_lgdev(vdev)->desc->status = status; |
190 | |
191 | /* Tell Host immediately if we failed. */ |
192 | if (status & VIRTIO_CONFIG_S_FAILED) |
193 | status_notify(vdev); |
194 | } |
195 | |
196 | static void lg_reset(struct virtio_device *vdev) |
197 | { |
198 | /* 0 status means "reset" */ |
199 | to_lgdev(vdev)->desc->status = 0; |
200 | status_notify(vdev); |
201 | } |
202 | |
203 | /* |
204 | * Virtqueues |
205 | * |
206 | * The other piece of infrastructure virtio needs is a "virtqueue": a way of |
207 | * the Guest device registering buffers for the other side to read from or |
208 | * write into (ie. send and receive buffers). Each device can have multiple |
209 | * virtqueues: for example the console driver uses one queue for sending and |
210 | * another for receiving. |
211 | * |
212 | * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue |
213 | * already exists in virtio_ring.c. We just need to connect it up. |
214 | * |
215 | * We start with the information we need to keep about each virtqueue. |
216 | */ |
217 | |
218 | /*D:140 This is the information we remember about each virtqueue. */ |
219 | struct lguest_vq_info { |
220 | /* A copy of the information contained in the device config. */ |
221 | struct lguest_vqconfig config; |
222 | |
223 | /* The address where we mapped the virtio ring, so we can unmap it. */ |
224 | void *pages; |
225 | }; |
226 | |
227 | /* |
228 | * When the virtio_ring code wants to prod the Host, it calls us here and we |
229 | * make a hypercall. We hand the physical address of the virtqueue so the Host |
230 | * knows which virtqueue we're talking about. |
231 | */ |
232 | static void lg_notify(struct virtqueue *vq) |
233 | { |
234 | /* |
235 | * We store our virtqueue information in the "priv" pointer of the |
236 | * virtqueue structure. |
237 | */ |
238 | struct lguest_vq_info *lvq = vq->priv; |
239 | |
240 | hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0); |
241 | } |
242 | |
243 | /* An extern declaration inside a C file is bad form. Don't do it. */ |
244 | extern int lguest_setup_irq(unsigned int irq); |
245 | |
246 | /* |
247 | * This routine finds the Nth virtqueue described in the configuration of |
248 | * this device and sets it up. |
249 | * |
250 | * This is kind of an ugly duckling. It'd be nicer to have a standard |
251 | * representation of a virtqueue in the configuration space, but it seems that |
252 | * everyone wants to do it differently. The KVM coders want the Guest to |
253 | * allocate its own pages and tell the Host where they are, but for lguest it's |
254 | * simpler for the Host to simply tell us where the pages are. |
255 | */ |
256 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, |
257 | unsigned index, |
258 | void (*callback)(struct virtqueue *vq), |
259 | const char *name) |
260 | { |
261 | struct lguest_device *ldev = to_lgdev(vdev); |
262 | struct lguest_vq_info *lvq; |
263 | struct virtqueue *vq; |
264 | int err; |
265 | |
266 | if (!name) |
267 | return NULL; |
268 | |
269 | /* We must have this many virtqueues. */ |
270 | if (index >= ldev->desc->num_vq) |
271 | return ERR_PTR(-ENOENT); |
272 | |
273 | lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); |
274 | if (!lvq) |
275 | return ERR_PTR(-ENOMEM); |
276 | |
277 | /* |
278 | * Make a copy of the "struct lguest_vqconfig" entry, which sits after |
279 | * the descriptor. We need a copy because the config space might not |
280 | * be aligned correctly. |
281 | */ |
282 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); |
283 | |
284 | printk("Mapping virtqueue %i addr %lx\n", index, |
285 | (unsigned long)lvq->config.pfn << PAGE_SHIFT); |
286 | /* Figure out how many pages the ring will take, and map that memory */ |
287 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, |
288 | DIV_ROUND_UP(vring_size(lvq->config.num, |
289 | LGUEST_VRING_ALIGN), |
290 | PAGE_SIZE)); |
291 | if (!lvq->pages) { |
292 | err = -ENOMEM; |
293 | goto free_lvq; |
294 | } |
295 | |
296 | /* |
297 | * OK, tell virtio_ring.c to set up a virtqueue now we know its size |
298 | * and we've got a pointer to its pages. Note that we set weak_barriers |
299 | * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu |
300 | * barriers. |
301 | */ |
302 | vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev, |
303 | true, lvq->pages, lg_notify, callback, name); |
304 | if (!vq) { |
305 | err = -ENOMEM; |
306 | goto unmap; |
307 | } |
308 | |
309 | /* Make sure the interrupt is allocated. */ |
310 | err = lguest_setup_irq(lvq->config.irq); |
311 | if (err) |
312 | goto destroy_vring; |
313 | |
314 | /* |
315 | * Tell the interrupt for this virtqueue to go to the virtio_ring |
316 | * interrupt handler. |
317 | * |
318 | * FIXME: We used to have a flag for the Host to tell us we could use |
319 | * the interrupt as a source of randomness: it'd be nice to have that |
320 | * back. |
321 | */ |
322 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, |
323 | dev_name(&vdev->dev), vq); |
324 | if (err) |
325 | goto free_desc; |
326 | |
327 | /* |
328 | * Last of all we hook up our 'struct lguest_vq_info" to the |
329 | * virtqueue's priv pointer. |
330 | */ |
331 | vq->priv = lvq; |
332 | return vq; |
333 | |
334 | free_desc: |
335 | irq_free_desc(lvq->config.irq); |
336 | destroy_vring: |
337 | vring_del_virtqueue(vq); |
338 | unmap: |
339 | lguest_unmap(lvq->pages); |
340 | free_lvq: |
341 | kfree(lvq); |
342 | return ERR_PTR(err); |
343 | } |
344 | /*:*/ |
345 | |
346 | /* Cleaning up a virtqueue is easy */ |
347 | static void lg_del_vq(struct virtqueue *vq) |
348 | { |
349 | struct lguest_vq_info *lvq = vq->priv; |
350 | |
351 | /* Release the interrupt */ |
352 | free_irq(lvq->config.irq, vq); |
353 | /* Tell virtio_ring.c to free the virtqueue. */ |
354 | vring_del_virtqueue(vq); |
355 | /* Unmap the pages containing the ring. */ |
356 | lguest_unmap(lvq->pages); |
357 | /* Free our own queue information. */ |
358 | kfree(lvq); |
359 | } |
360 | |
361 | static void lg_del_vqs(struct virtio_device *vdev) |
362 | { |
363 | struct virtqueue *vq, *n; |
364 | |
365 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) |
366 | lg_del_vq(vq); |
367 | } |
368 | |
369 | static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs, |
370 | struct virtqueue *vqs[], |
371 | vq_callback_t *callbacks[], |
372 | const char *names[]) |
373 | { |
374 | struct lguest_device *ldev = to_lgdev(vdev); |
375 | int i; |
376 | |
377 | /* We must have this many virtqueues. */ |
378 | if (nvqs > ldev->desc->num_vq) |
379 | return -ENOENT; |
380 | |
381 | for (i = 0; i < nvqs; ++i) { |
382 | vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]); |
383 | if (IS_ERR(vqs[i])) |
384 | goto error; |
385 | } |
386 | return 0; |
387 | |
388 | error: |
389 | lg_del_vqs(vdev); |
390 | return PTR_ERR(vqs[i]); |
391 | } |
392 | |
393 | static const char *lg_bus_name(struct virtio_device *vdev) |
394 | { |
395 | return ""; |
396 | } |
397 | |
398 | /* The ops structure which hooks everything together. */ |
399 | static const struct virtio_config_ops lguest_config_ops = { |
400 | .get_features = lg_get_features, |
401 | .finalize_features = lg_finalize_features, |
402 | .get = lg_get, |
403 | .set = lg_set, |
404 | .get_status = lg_get_status, |
405 | .set_status = lg_set_status, |
406 | .reset = lg_reset, |
407 | .find_vqs = lg_find_vqs, |
408 | .del_vqs = lg_del_vqs, |
409 | .bus_name = lg_bus_name, |
410 | }; |
411 | |
412 | /* |
413 | * The root device for the lguest virtio devices. This makes them appear as |
414 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. |
415 | */ |
416 | static struct device *lguest_root; |
417 | |
418 | /*D:120 |
419 | * This is the core of the lguest bus: actually adding a new device. |
420 | * It's a separate function because it's neater that way, and because an |
421 | * earlier version of the code supported hotplug and unplug. They were removed |
422 | * early on because they were never used. |
423 | * |
424 | * As Andrew Tridgell says, "Untested code is buggy code". |
425 | * |
426 | * It's worth reading this carefully: we start with a pointer to the new device |
427 | * descriptor in the "lguest_devices" page, and the offset into the device |
428 | * descriptor page so we can uniquely identify it if things go badly wrong. |
429 | */ |
430 | static void add_lguest_device(struct lguest_device_desc *d, |
431 | unsigned int offset) |
432 | { |
433 | struct lguest_device *ldev; |
434 | |
435 | /* Start with zeroed memory; Linux's device layer counts on it. */ |
436 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); |
437 | if (!ldev) { |
438 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", |
439 | offset, d->type); |
440 | return; |
441 | } |
442 | |
443 | /* This devices' parent is the lguest/ dir. */ |
444 | ldev->vdev.dev.parent = lguest_root; |
445 | /* |
446 | * The device type comes straight from the descriptor. There's also a |
447 | * device vendor field in the virtio_device struct, which we leave as |
448 | * 0. |
449 | */ |
450 | ldev->vdev.id.device = d->type; |
451 | /* |
452 | * We have a simple set of routines for querying the device's |
453 | * configuration information and setting its status. |
454 | */ |
455 | ldev->vdev.config = &lguest_config_ops; |
456 | /* And we remember the device's descriptor for lguest_config_ops. */ |
457 | ldev->desc = d; |
458 | |
459 | /* |
460 | * register_virtio_device() sets up the generic fields for the struct |
461 | * virtio_device and calls device_register(). This makes the bus |
462 | * infrastructure look for a matching driver. |
463 | */ |
464 | if (register_virtio_device(&ldev->vdev) != 0) { |
465 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", |
466 | offset, d->type); |
467 | kfree(ldev); |
468 | } |
469 | } |
470 | |
471 | /*D:110 |
472 | * scan_devices() simply iterates through the device page. The type 0 is |
473 | * reserved to mean "end of devices". |
474 | */ |
475 | static void scan_devices(void) |
476 | { |
477 | unsigned int i; |
478 | struct lguest_device_desc *d; |
479 | |
480 | /* We start at the page beginning, and skip over each entry. */ |
481 | for (i = 0; i < PAGE_SIZE; i += desc_size(d)) { |
482 | d = lguest_devices + i; |
483 | |
484 | /* Once we hit a zero, stop. */ |
485 | if (d->type == 0) |
486 | break; |
487 | |
488 | printk("Device at %i has size %u\n", i, desc_size(d)); |
489 | add_lguest_device(d, i); |
490 | } |
491 | } |
492 | |
493 | /*D:105 |
494 | * Fairly early in boot, lguest_devices_init() is called to set up the |
495 | * lguest device infrastructure. We check that we are a Guest by checking |
496 | * pv_info.name: there are other ways of checking, but this seems most |
497 | * obvious to me. |
498 | * |
499 | * So we can access the "struct lguest_device_desc"s easily, we map that memory |
500 | * and store the pointer in the global "lguest_devices". Then we register a |
501 | * root device from which all our devices will hang (this seems to be the |
502 | * correct sysfs incantation). |
503 | * |
504 | * Finally we call scan_devices() which adds all the devices found in the |
505 | * lguest_devices page. |
506 | */ |
507 | static int __init lguest_devices_init(void) |
508 | { |
509 | if (strcmp(pv_info.name, "lguest") != 0) |
510 | return 0; |
511 | |
512 | lguest_root = root_device_register("lguest"); |
513 | if (IS_ERR(lguest_root)) |
514 | panic("Could not register lguest root"); |
515 | |
516 | /* Devices are in a single page above top of "normal" mem */ |
517 | lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); |
518 | |
519 | scan_devices(); |
520 | return 0; |
521 | } |
522 | /* We do this after core stuff, but before the drivers. */ |
523 | postcore_initcall(lguest_devices_init); |
524 | |
525 | /*D:150 |
526 | * At this point in the journey we used to now wade through the lguest |
527 | * devices themselves: net, block and console. Since they're all now virtio |
528 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, |
529 | * they're kind of boring. But this does mean you'll never experience the |
530 | * thrill of reading the forbidden love scene buried deep in the block driver. |
531 | * |
532 | * "make Launcher" beckons, where we answer questions like "Where do Guests |
533 | * come from?", and "What do you do when someone asks for optimization?". |
534 | */ |
535 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9