wok view qemu/stuff/virtio.u @ rev 698

qemu: add virtio support
author Pascal Bellard <pascal.bellard@slitaz.org>
date Mon Apr 28 11:41:08 2008 +0000 (2008-04-28)
parents
children
line source
1 Index: qemu-0.9.1/Makefile.target
2 ===================================================================
3 --- qemu-0.9.1.orig/Makefile.target 2008-01-06 19:38:41.000000000 +0000
4 +++ qemu-0.9.1/Makefile.target 2008-02-07 13:36:23.000000000 +0000
5 @@ -436,6 +436,9 @@
6 VL_OBJS += pcnet.o
7 VL_OBJS += rtl8139.o
9 +# virtio devices
10 +VL_OBJS += virtio.o
11 +
12 ifeq ($(TARGET_BASE_ARCH), i386)
13 # Hardware support
14 VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
15 Index: qemu-0.9.1/hw/virtio.c
16 ===================================================================
17 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
18 +++ qemu-0.9.1/hw/virtio.c 2008-02-07 13:36:23.000000000 +0000
19 @@ -0,0 +1,422 @@
20 +/*
21 + * Virtio Support
22 + *
23 + * Copyright IBM, Corp. 2007
24 + *
25 + * Authors:
26 + * Anthony Liguori <address@hidden>
27 + *
28 + * This work is licensed under the terms of the GNU GPL, version 2. See
29 + * the COPYING file in the top-level directory.
30 + *
31 + */
32 +
33 +#include <inttypes.h>
34 +#include <err.h>
35 +
36 +#include "virtio.h"
37 +#include "sysemu.h"
38 +
39 +/* from Linux's linux/virtio_pci.h */
40 +
41 +/* A 32-bit r/o bitmask of the features supported by the host */
42 +#define VIRTIO_PCI_HOST_FEATURES 0
43 +
44 +/* A 32-bit r/w bitmask of features activated by the guest */
45 +#define VIRTIO_PCI_GUEST_FEATURES 4
46 +
47 +/* A 32-bit r/w PFN for the currently selected queue */
48 +#define VIRTIO_PCI_QUEUE_PFN 8
49 +
50 +/* A 16-bit r/o queue size for the currently selected queue */
51 +#define VIRTIO_PCI_QUEUE_NUM 12
52 +
53 +/* A 16-bit r/w queue selector */
54 +#define VIRTIO_PCI_QUEUE_SEL 14
55 +
56 +/* A 16-bit r/w queue notifier */
57 +#define VIRTIO_PCI_QUEUE_NOTIFY 16
58 +
59 +/* An 8-bit device status register. */
60 +#define VIRTIO_PCI_STATUS 18
61 +
62 +/* An 8-bit r/o interrupt status register. Reading the value will return the
63 + * current contents of the ISR and will also clear it. This is effectively
64 + * a read-and-acknowledge. */
65 +#define VIRTIO_PCI_ISR 19
66 +
67 +#define VIRTIO_PCI_CONFIG 20
68 +
69 +/* QEMU doesn't strictly need write barriers since everything runs in
70 + * lock-step. We'll leave the calls to wmb() in though to make it obvious for
71 + * KVM or if kqemu gets SMP support.
72 + */
73 +#define wmb() do { } while (0)
74 +
75 +/* virt queue functions */
76 +
77 +static void virtqueue_init(VirtQueue *vq, void *p)
78 +{
79 + vq->vring.desc = p;
80 + vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
81 + vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
82 +}
83 +
84 +static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
85 +{
86 + unsigned int next;
87 +
88 + /* If this descriptor says it doesn't chain, we're done. */
89 + if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
90 + return vq->vring.num;
91 +
92 + /* Check they're not leading us off end of descriptors. */
93 + next = vq->vring.desc[i].next;
94 + /* Make sure compiler knows to grab that: we don't want it changing! */
95 + wmb();
96 +
97 + if (next >= vq->vring.num)
98 + errx(1, "Desc next is %u", next);
99 +
100 + return next;
101 +}
102 +
103 +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
104 + unsigned int len)
105 +{
106 + VRingUsedElem *used;
107 +
108 + /* Get a pointer to the next entry in the used ring. */
109 + used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
110 + used->id = elem->index;
111 + used->len = len;
112 + /* Make sure buffer is written before we update index. */
113 + wmb();
114 + vq->vring.used->idx++;
115 +}
116 +
117 +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
118 +{
119 + unsigned int i, head;
120 + unsigned int position;
121 +
122 + /* Check it isn't doing very strange things with descriptor numbers. */
123 + if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
124 + errx(1, "Guest moved used index from %u to %u",
125 + vq->last_avail_idx, vq->vring.avail->idx);
126 +
127 + /* If there's nothing new since last we looked, return invalid. */
128 + if (vq->vring.avail->idx == vq->last_avail_idx)
129 + return 0;
130 +
131 + /* Grab the next descriptor number they're advertising, and increment
132 + * the index we've seen. */
133 + head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
134 +
135 + /* If their number is silly, that's a fatal mistake. */
136 + if (head >= vq->vring.num)
137 + errx(1, "Guest says index %u is available", head);
138 +
139 + /* When we start there are none of either input nor output. */
140 + position = elem->out_num = elem->in_num = 0;
141 +
142 + i = head;
143 + do {
144 + struct iovec *sg;
145 +
146 + if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
147 + errx(1, "Guest sent invalid pointer");
148 +
149 + if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
150 + sg = &elem->in_sg[elem->in_num++];
151 + else
152 + sg = &elem->out_sg[elem->out_num++];
153 +
154 + /* Grab the first descriptor, and check it's OK. */
155 + sg->iov_len = vq->vring.desc[i].len;
156 + sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
157 +
158 + /* If we've got too many, that implies a descriptor loop. */
159 + if ((elem->in_num + elem->out_num) > vq->vring.num)
160 + errx(1, "Looped descriptor");
161 + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
162 +
163 + elem->index = head;
164 +
165 + return elem->in_num + elem->out_num;
166 +}
167 +
168 +/* virtio device */
169 +
170 +static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
171 +{
172 + return (VirtIODevice *)pci_dev;
173 +}
174 +
175 +static void virtio_update_irq(VirtIODevice *vdev)
176 +{
177 + qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
178 +}
179 +
180 +static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
181 +{
182 + VirtIODevice *vdev = to_virtio_device(opaque);
183 + ram_addr_t pa;
184 +
185 + addr -= vdev->addr;
186 +
187 + switch (addr) {
188 + case VIRTIO_PCI_GUEST_FEATURES:
189 + if (vdev->set_features)
190 + vdev->set_features(vdev, val);
191 + vdev->features = val;
192 + break;
193 + case VIRTIO_PCI_QUEUE_PFN:
194 + pa = (ram_addr_t)val << TARGET_PAGE_BITS;
195 + vdev->vq[vdev->queue_sel].pfn = val;
196 + if (pa == 0) {
197 + vdev->vq[vdev->queue_sel].vring.desc = NULL;
198 + vdev->vq[vdev->queue_sel].vring.avail = NULL;
199 + vdev->vq[vdev->queue_sel].vring.used = NULL;
200 + } else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
201 + virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
202 + /* FIXME if pa == 0, deal with device tear down */
203 + }
204 + break;
205 + case VIRTIO_PCI_QUEUE_SEL:
206 + if (val < VIRTIO_PCI_QUEUE_MAX)
207 + vdev->queue_sel = val;
208 + break;
209 + case VIRTIO_PCI_QUEUE_NOTIFY:
210 + if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
211 + vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
212 + break;
213 + case VIRTIO_PCI_STATUS:
214 + vdev->status = val & 0xFF;
215 + break;
216 + }
217 +}
218 +
219 +static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
220 +{
221 + VirtIODevice *vdev = to_virtio_device(opaque);
222 + uint32_t ret = 0xFFFFFFFF;
223 +
224 + addr -= vdev->addr;
225 +
226 + switch (addr) {
227 + case VIRTIO_PCI_HOST_FEATURES:
228 + ret = vdev->get_features(vdev);
229 + break;
230 + case VIRTIO_PCI_GUEST_FEATURES:
231 + ret = vdev->features;
232 + break;
233 + case VIRTIO_PCI_QUEUE_PFN:
234 + ret = vdev->vq[vdev->queue_sel].pfn;
235 + break;
236 + case VIRTIO_PCI_QUEUE_NUM:
237 + ret = vdev->vq[vdev->queue_sel].vring.num;
238 + break;
239 + case VIRTIO_PCI_QUEUE_SEL:
240 + ret = vdev->queue_sel;
241 + break;
242 + case VIRTIO_PCI_STATUS:
243 + ret = vdev->status;
244 + break;
245 + case VIRTIO_PCI_ISR:
246 + /* reading from the ISR also clears it. */
247 + ret = vdev->isr;
248 + vdev->isr = 0;
249 + virtio_update_irq(vdev);
250 + break;
251 + default:
252 + break;
253 + }
254 +
255 + return ret;
256 +}
257 +
258 +static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
259 +{
260 + VirtIODevice *vdev = opaque;
261 + uint8_t val;
262 +
263 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
264 + if (addr > (vdev->config_len - sizeof(val)))
265 + return (uint32_t)-1;
266 +
267 + memcpy(&val, vdev->config + addr, sizeof(val));
268 + return val;
269 +}
270 +
271 +static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
272 +{
273 + VirtIODevice *vdev = opaque;
274 + uint16_t val;
275 +
276 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
277 + if (addr > (vdev->config_len - sizeof(val)))
278 + return (uint32_t)-1;
279 +
280 + memcpy(&val, vdev->config + addr, sizeof(val));
281 + return val;
282 +}
283 +
284 +static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
285 +{
286 + VirtIODevice *vdev = opaque;
287 + uint32_t val;
288 +
289 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
290 + if (addr > (vdev->config_len - sizeof(val)))
291 + return (uint32_t)-1;
292 +
293 + memcpy(&val, vdev->config + addr, sizeof(val));
294 + return val;
295 +}
296 +
297 +static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
298 +{
299 + VirtIODevice *vdev = opaque;
300 + uint8_t val = data;
301 +
302 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
303 + if (addr > (vdev->config_len - sizeof(val)))
304 + return;
305 +
306 + memcpy(vdev->config + addr, &val, sizeof(val));
307 +}
308 +
309 +static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
310 +{
311 + VirtIODevice *vdev = opaque;
312 + uint16_t val = data;
313 +
314 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
315 + if (addr > (vdev->config_len - sizeof(val)))
316 + return;
317 +
318 + memcpy(vdev->config + addr, &val, sizeof(val));
319 +}
320 +
321 +static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
322 +{
323 + VirtIODevice *vdev = opaque;
324 + uint32_t val = data;
325 +
326 + addr -= vdev->addr + VIRTIO_PCI_CONFIG;
327 + if (addr > (vdev->config_len - sizeof(val)))
328 + return;
329 +
330 + memcpy(vdev->config + addr, &val, sizeof(val));
331 +}
332 +
333 +static void virtio_map(PCIDevice *pci_dev, int region_num,
334 + uint32_t addr, uint32_t size, int type)
335 +{
336 + VirtIODevice *vdev = to_virtio_device(pci_dev);
337 + int i;
338 +
339 + vdev->addr = addr;
340 + for (i = 0; i < 3; i++) {
341 + register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
342 + register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
343 + }
344 +
345 + if (vdev->config_len) {
346 + register_ioport_write(addr + 20, vdev->config_len, 1,
347 + virtio_config_writeb, vdev);
348 + register_ioport_write(addr + 20, vdev->config_len, 2,
349 + virtio_config_writew, vdev);
350 + register_ioport_write(addr + 20, vdev->config_len, 4,
351 + virtio_config_writel, vdev);
352 + register_ioport_read(addr + 20, vdev->config_len, 1,
353 + virtio_config_readb, vdev);
354 + register_ioport_read(addr + 20, vdev->config_len, 2,
355 + virtio_config_readw, vdev);
356 + register_ioport_read(addr + 20, vdev->config_len, 4,
357 + virtio_config_readl, vdev);
358 +
359 + vdev->update_config(vdev, vdev->config);
360 + }
361 +}
362 +
363 +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
364 + void (*handle_output)(VirtIODevice *, VirtQueue *))
365 +{
366 + int i;
367 +
368 + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
369 + if (vdev->vq[i].vring.num == 0)
370 + break;
371 + }
372 +
373 + if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
374 + abort();
375 +
376 + vdev->vq[i].vring.num = queue_size;
377 + vdev->vq[i].handle_output = handle_output;
378 + vdev->vq[i].index = i;
379 +
380 + return &vdev->vq[i];
381 +}
382 +
383 +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
384 +{
385 + if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
386 + return;
387 +
388 + vdev->isr = 1;
389 + virtio_update_irq(vdev);
390 +}
391 +
392 +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
393 + uint16_t vendor, uint16_t device,
394 + uint16_t subvendor, uint16_t subdevice,
395 + uint8_t class_code, uint8_t subclass_code,
396 + uint8_t pif, size_t config_size,
397 + size_t struct_size)
398 +{
399 + VirtIODevice *vdev;
400 + PCIDevice *pci_dev;
401 + uint8_t *config;
402 +
403 + pci_dev = pci_register_device(bus, name, struct_size,
404 + -1, NULL, NULL);
405 + vdev = to_virtio_device(pci_dev);
406 +
407 + vdev->status = 0;
408 + vdev->isr = 0;
409 + vdev->queue_sel = 0;
410 + memset(vdev->vq, 0, sizeof(vdev->vq));
411 +
412 + config = pci_dev->config;
413 + config[0x00] = vendor & 0xFF;
414 + config[0x01] = (vendor >> 8) & 0xFF;
415 + config[0x02] = device & 0xFF;
416 + config[0x03] = (device >> 8) & 0xFF;
417 +
418 + config[0x09] = pif;
419 + config[0x0a] = subclass_code;
420 + config[0x0b] = class_code;
421 + config[0x0e] = 0x00;
422 +
423 + config[0x2c] = subvendor & 0xFF;
424 + config[0x2d] = (subvendor >> 8) & 0xFF;
425 + config[0x2e] = subdevice & 0xFF;
426 + config[0x2f] = (subdevice >> 8) & 0xFF;
427 +
428 + config[0x3d] = 1;
429 +
430 + vdev->name = name;
431 + vdev->config_len = config_size;
432 + if (vdev->config_len)
433 + vdev->config = qemu_mallocz(config_size);
434 + else
435 + vdev->config = NULL;
436 +
437 + pci_register_io_region(pci_dev, 0, 20 + config_size, PCI_ADDRESS_SPACE_IO,
438 + virtio_map);
439 +
440 + return vdev;
441 +}
442 Index: qemu-0.9.1/hw/virtio.h
443 ===================================================================
444 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
445 +++ qemu-0.9.1/hw/virtio.h 2008-02-07 13:36:23.000000000 +0000
446 @@ -0,0 +1,143 @@
447 +/*
448 + * Virtio Support
449 + *
450 + * Copyright IBM, Corp. 2007
451 + *
452 + * Authors:
453 + * Anthony Liguori <address@hidden>
454 + *
455 + * This work is licensed under the terms of the GNU GPL, version 2. See
456 + * the COPYING file in the top-level directory.
457 + *
458 + */
459 +
460 +#ifndef _QEMU_VIRTIO_H
461 +#define _QEMU_VIRTIO_H
462 +
463 +#include <sys/uio.h>
464 +#include "hw.h"
465 +#include "pci.h"
466 +
467 +/* from Linux's linux/virtio_config.h */
468 +
469 +/* Status byte for guest to report progress, and synchronize features. */
470 +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
471 +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
472 +/* We have found a driver for the device. */
473 +#define VIRTIO_CONFIG_S_DRIVER 2
474 +/* Driver has used its parts of the config, and is happy */
475 +#define VIRTIO_CONFIG_S_DRIVER_OK 4
476 +/* We've given up on this device. */
477 +#define VIRTIO_CONFIG_S_FAILED 0x80
478 +
479 +/* from Linux's linux/virtio_ring.h */
480 +
481 +/* This marks a buffer as continuing via the next field. */
482 +#define VRING_DESC_F_NEXT 1
483 +/* This marks a buffer as write-only (otherwise read-only). */
484 +#define VRING_DESC_F_WRITE 2
485 +
486 +/* This means don't notify other side when buffer added. */
487 +#define VRING_USED_F_NO_NOTIFY 1
488 +/* This means don't interrupt guest when buffer consumed. */
489 +#define VRING_AVAIL_F_NO_INTERRUPT 1
490 +
491 +typedef struct VirtQueue VirtQueue;
492 +typedef struct VirtIODevice VirtIODevice;
493 +
494 +typedef struct VRingDesc
495 +{
496 + uint64_t addr;
497 + uint32_t len;
498 + uint16_t flags;
499 + uint16_t next;
500 +} VRingDesc;
501 +
502 +typedef struct VRingAvail
503 +{
504 + uint16_t flags;
505 + uint16_t idx;
506 + uint16_t ring[0];
507 +} VRingAvail;
508 +
509 +typedef struct VRingUsedElem
510 +{
511 + uint32_t id;
512 + uint32_t len;
513 +} VRingUsedElem;
514 +
515 +typedef struct VRingUsed
516 +{
517 + uint16_t flags;
518 + uint16_t idx;
519 + VRingUsedElem ring[0];
520 +} VRingUsed;
521 +
522 +typedef struct VRing
523 +{
524 + unsigned int num;
525 + VRingDesc *desc;
526 + VRingAvail *avail;
527 + VRingUsed *used;
528 +} VRing;
529 +
530 +struct VirtQueue
531 +{
532 + VRing vring;
533 + uint32_t pfn;
534 + uint16_t last_avail_idx;
535 + void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
536 + int index;
537 +};
538 +
539 +#define VIRTQUEUE_MAX_SIZE 1024
540 +
541 +typedef struct VirtQueueElement
542 +{
543 + unsigned int index;
544 + unsigned int out_num;
545 + unsigned int in_num;
546 + struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
547 + struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
548 +} VirtQueueElement;
549 +
550 +#define VIRTIO_PCI_QUEUE_MAX 16
551 +
552 +struct VirtIODevice
553 +{
554 + PCIDevice pci_dev;
555 + const char *name;
556 + uint32_t addr;
557 + uint16_t vendor;
558 + uint16_t device;
559 + uint8_t status;
560 + uint8_t isr;
561 + uint16_t queue_sel;
562 + uint32_t features;
563 + size_t config_len;
564 + void *config;
565 + uint32_t (*get_features)(VirtIODevice *vdev);
566 + void (*set_features)(VirtIODevice *vdev, uint32_t val);
567 + void (*update_config)(VirtIODevice *vdev, uint8_t *config);
568 + VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
569 +};
570 +
571 +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
572 + uint16_t vendor, uint16_t device,
573 + uint16_t subvendor, uint16_t subdevice,
574 + uint8_t class_code, uint8_t subclass_code,
575 + uint8_t pif, size_t config_size,
576 + size_t struct_size);
577 +
578 +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
579 + void (*handle_output)(VirtIODevice *,
580 + VirtQueue *));
581 +
582 +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
583 + unsigned int len);
584 +
585 +int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
586 +
587 +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
588 +
589 +#endif
590 Index: qemu-0.9.1/Makefile.target
591 ===================================================================
592 --- qemu-0.9.1.orig/Makefile.target 2008-02-07 13:36:23.000000000 +0000
593 +++ qemu-0.9.1/Makefile.target 2008-02-07 13:36:37.000000000 +0000
594 @@ -437,7 +437,7 @@
595 VL_OBJS += rtl8139.o
597 # virtio devices
598 -VL_OBJS += virtio.o
599 +VL_OBJS += virtio.o virtio-net.o
601 ifeq ($(TARGET_BASE_ARCH), i386)
602 # Hardware support
603 Index: qemu-0.9.1/hw/pc.h
604 ===================================================================
605 --- qemu-0.9.1.orig/hw/pc.h 2008-01-06 19:38:42.000000000 +0000
606 +++ qemu-0.9.1/hw/pc.h 2008-02-07 13:36:37.000000000 +0000
607 @@ -142,4 +142,9 @@
609 void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);
611 +/* virtio-net.c */
612 +
613 +void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
614 +
615 +
616 #endif
617 Index: qemu-0.9.1/hw/pci.c
618 ===================================================================
619 --- qemu-0.9.1.orig/hw/pci.c 2008-01-06 19:38:42.000000000 +0000
620 +++ qemu-0.9.1/hw/pci.c 2008-02-07 13:36:37.000000000 +0000
621 @@ -25,6 +25,7 @@
622 #include "pci.h"
623 #include "console.h"
624 #include "net.h"
625 +#include "pc.h"
627 //#define DEBUG_PCI
629 @@ -638,9 +639,11 @@
630 pci_rtl8139_init(bus, nd, devfn);
631 } else if (strcmp(nd->model, "pcnet") == 0) {
632 pci_pcnet_init(bus, nd, devfn);
633 + } else if (strcmp(nd->model, "virtio") == 0) {
634 + virtio_net_init(bus, nd, devfn);
635 } else if (strcmp(nd->model, "?") == 0) {
636 fprintf(stderr, "qemu: Supported PCI NICs: i82551 i82557b i82559er"
637 - " ne2k_pci pcnet rtl8139\n");
638 + " ne2k_pci pcnet rtl8139 virtio\n");
639 exit (1);
640 } else {
641 fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
642 Index: qemu-0.9.1/hw/virtio-net.c
643 ===================================================================
644 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
645 +++ qemu-0.9.1/hw/virtio-net.c 2008-02-07 13:36:37.000000000 +0000
646 @@ -0,0 +1,178 @@
647 +/*
648 + * Virtio Network Device
649 + *
650 + * Copyright IBM, Corp. 2007
651 + *
652 + * Authors:
653 + * Anthony Liguori <address@hidden>
654 + *
655 + * This work is licensed under the terms of the GNU GPL, version 2. See
656 + * the COPYING file in the top-level directory.
657 + *
658 + */
659 +
660 +#include "virtio.h"
661 +#include "net.h"
662 +#include "pc.h"
663 +
664 +/* from Linux's virtio_net.h */
665 +
666 +/* The ID for virtio_net */
667 +#define VIRTIO_ID_NET 1
668 +
669 +/* The feature bitmap for virtio net */
670 +#define VIRTIO_NET_F_NO_CSUM 0
671 +#define VIRTIO_NET_F_TSO4 1
672 +#define VIRTIO_NET_F_UFO 2
673 +#define VIRTIO_NET_F_TSO4_ECN 3
674 +#define VIRTIO_NET_F_TSO6 4
675 +#define VIRTIO_NET_F_MAC 5
676 +
677 +/* The config defining mac address (6 bytes) */
678 +struct virtio_net_config
679 +{
680 + uint8_t mac[6];
681 +} __attribute__((packed));
682 +
683 +/* This is the first element of the scatter-gather list. If you don't
684 + * specify GSO or CSUM features, you can simply ignore the header. */
685 +struct virtio_net_hdr
686 +{
687 +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
688 + uint8_t flags;
689 +#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
690 +#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
691 +/* FIXME: Do we need this? If they said they can handle ECN, do they care? */
692 +#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN
693 +#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
694 +#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
695 + uint8_t gso_type;
696 + uint16_t gso_size;
697 + uint16_t csum_start;
698 + uint16_t csum_offset;
699 +};
700 +
701 +typedef struct VirtIONet
702 +{
703 + VirtIODevice vdev;
704 + uint8_t mac[6];
705 + VirtQueue *rx_vq;
706 + VirtQueue *tx_vq;
707 + VLANClientState *vc;
708 + int can_receive;
709 +} VirtIONet;
710 +
711 +static VirtIONet *to_virtio_net(VirtIODevice *vdev)
712 +{
713 + return (VirtIONet *)vdev;
714 +}
715 +
716 +static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
717 +{
718 + VirtIONet *n = to_virtio_net(vdev);
719 + struct virtio_net_config netcfg;
720 +
721 + memcpy(netcfg.mac, n->mac, 6);
722 + memcpy(config, &netcfg, sizeof(netcfg));
723 +}
724 +
725 +static uint32_t virtio_net_get_features(VirtIODevice *vdev)
726 +{
727 + return (1 << VIRTIO_NET_F_MAC);
728 +}
729 +
730 +/* RX */
731 +
732 +static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
733 +{
734 + VirtIONet *n = to_virtio_net(vdev);
735 + n->can_receive = 1;
736 +}
737 +
738 +static int virtio_net_can_receive(void *opaque)
739 +{
740 + VirtIONet *n = opaque;
741 +
742 + return (n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) && n->can_receive;
743 +}
744 +
745 +static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
746 +{
747 + VirtIONet *n = opaque;
748 + VirtQueueElement elem;
749 + struct virtio_net_hdr *hdr;
750 + int offset, i;
751 +
752 + /* FIXME: the drivers really need to set their status better */
753 + if (n->rx_vq->vring.avail == NULL) {
754 + n->can_receive = 0;
755 + return;
756 + }
757 +
758 + if (virtqueue_pop(n->rx_vq, &elem) == 0) {
759 + /* wait until the guest adds some rx bufs */
760 + n->can_receive = 0;
761 + return;
762 + }
763 +
764 + hdr = (void *)elem.in_sg[0].iov_base;
765 + hdr->flags = 0;
766 + hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
767 +
768 + /* copy in packet. ugh */
769 + offset = 0;
770 + i = 1;
771 + while (offset < size && i < elem.in_num) {
772 + int len = MIN(elem.in_sg[i].iov_len, size - offset);
773 + memcpy(elem.in_sg[i].iov_base, buf + offset, len);
774 + offset += len;
775 + i++;
776 + }
777 +
778 + /* signal other side */
779 + virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
780 + virtio_notify(&n->vdev, n->rx_vq);
781 +}
782 +
783 +/* TX */
784 +static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
785 +{
786 + VirtIONet *n = to_virtio_net(vdev);
787 + VirtQueueElement elem;
788 +
789 + while (virtqueue_pop(vq, &elem)) {
790 + int i;
791 + size_t len = 0;
792 +
793 + /* ignore the header for now */
794 + for (i = 1; i < elem.out_num; i++) {
795 + qemu_send_packet(n->vc, elem.out_sg[i].iov_base,
796 + elem.out_sg[i].iov_len);
797 + len += elem.out_sg[i].iov_len;
798 + }
799 +
800 + virtqueue_push(vq, &elem, sizeof(struct virtio_net_hdr) + len);
801 + virtio_notify(&n->vdev, vq);
802 + }
803 +}
804 +
805 +void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
806 +{
807 + VirtIONet *n;
808 +
809 + n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
810 + 0, VIRTIO_ID_NET,
811 + 0x02, 0x00, 0x00,
812 + 6, sizeof(VirtIONet));
813 +
814 + n->vdev.update_config = virtio_net_update_config;
815 + n->vdev.get_features = virtio_net_get_features;
816 + n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx);
817 + n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
818 + n->can_receive = 0;
819 + memcpy(n->mac, nd->macaddr, 6);
820 + n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
821 + virtio_net_can_receive, n);
822 +
823 + return &n->vdev;
824 +}
825 Index: qemu-0.9.1/Makefile.target
826 ===================================================================
827 --- qemu-0.9.1.orig/Makefile.target 2008-02-07 13:36:37.000000000 +0000
828 +++ qemu-0.9.1/Makefile.target 2008-02-07 13:38:53.000000000 +0000
829 @@ -437,7 +437,7 @@
830 VL_OBJS += rtl8139.o
832 # virtio devices
833 -VL_OBJS += virtio.o virtio-net.o
834 +VL_OBJS += virtio.o virtio-net.o virtio-blk.o
836 ifeq ($(TARGET_BASE_ARCH), i386)
837 # Hardware support
838 Index: qemu-0.9.1/hw/pc.c
839 ===================================================================
840 --- qemu-0.9.1.orig/hw/pc.c 2008-01-06 19:38:42.000000000 +0000
841 +++ qemu-0.9.1/hw/pc.c 2008-02-07 13:38:53.000000000 +0000
842 @@ -1008,6 +1008,18 @@
843 }
844 }
845 }
846 +
847 + /* Add virtio block devices */
848 + if (pci_enabled) {
849 + int index;
850 + int unit_id = 0;
851 +
852 + while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
853 + virtio_blk_init(pci_bus, 0x5002, 0x2258,
854 + drives_table[index].bdrv);
855 + unit_id++;
856 + }
857 + }
858 }
860 static void pc_init_pci(int ram_size, int vga_ram_size,
861 Index: qemu-0.9.1/hw/pc.h
862 ===================================================================
863 --- qemu-0.9.1.orig/hw/pc.h 2008-02-07 13:36:37.000000000 +0000
864 +++ qemu-0.9.1/hw/pc.h 2008-02-07 13:38:53.000000000 +0000
865 @@ -147,4 +147,8 @@
866 void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
869 +/* virtio-blk.h */
870 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
871 + BlockDriverState *bs);
872 +
873 #endif
874 Index: qemu-0.9.1/hw/virtio-blk.c
875 ===================================================================
876 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
877 +++ qemu-0.9.1/hw/virtio-blk.c 2008-02-07 13:38:53.000000000 +0000
878 @@ -0,0 +1,163 @@
879 +/*
880 + * Virtio Block Device
881 + *
882 + * Copyright IBM, Corp. 2007
883 + *
884 + * Authors:
885 + * Anthony Liguori <address@hidden>
886 + *
887 + * This work is licensed under the terms of the GNU GPL, version 2. See
888 + * the COPYING file in the top-level directory.
889 + *
890 + */
891 +
892 +#include "virtio.h"
893 +#include "block.h"
894 +#include "pc.h"
895 +
896 +/* from Linux's linux/virtio_blk.h */
897 +
898 +/* The ID for virtio_block */
899 +#define VIRTIO_ID_BLOCK 2
900 +
901 +/* Feature bits */
902 +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
903 +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
904 +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
905 +
906 +struct virtio_blk_config
907 +{
908 + uint64_t capacity;
909 + uint32_t size_max;
910 + uint32_t seg_max;
911 +};
912 +
913 +/* These two define direction. */
914 +#define VIRTIO_BLK_T_IN 0
915 +#define VIRTIO_BLK_T_OUT 1
916 +
917 +/* This bit says it's a scsi command, not an actual read or write. */
918 +#define VIRTIO_BLK_T_SCSI_CMD 2
919 +
920 +/* Barrier before this op. */
921 +#define VIRTIO_BLK_T_BARRIER 0x80000000
922 +
923 +/* This is the first element of the read scatter-gather list. */
924 +struct virtio_blk_outhdr
925 +{
926 + /* VIRTIO_BLK_T* */
927 + uint32_t type;
928 + /* io priority. */
929 + uint32_t ioprio;
930 + /* Sector (ie. 512 byte offset) */
931 + uint64_t sector;
932 + /* Where to put reply. */
933 + uint64_t id;
934 +};
935 +
936 +#define VIRTIO_BLK_S_OK 0
937 +#define VIRTIO_BLK_S_IOERR 1
938 +#define VIRTIO_BLK_S_UNSUPP 2
939 +
940 +/* This is the first element of the write scatter-gather list */
941 +struct virtio_blk_inhdr
942 +{
943 + unsigned char status;
944 +};
945 +
946 +typedef struct VirtIOBlock
947 +{
948 + VirtIODevice vdev;
949 + BlockDriverState *bs;
950 +} VirtIOBlock;
951 +
952 +static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
953 +{
954 + return (VirtIOBlock *)vdev;
955 +}
956 +
957 +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
958 +{
959 + VirtIOBlock *s = to_virtio_blk(vdev);
960 + VirtQueueElement elem;
961 + unsigned int count;
962 +
963 + while ((count = virtqueue_pop(vq, &elem)) != 0) {
964 + struct virtio_blk_inhdr *in;
965 + struct virtio_blk_outhdr *out;
966 + unsigned int wlen;
967 + off_t off;
968 + int i;
969 +
970 + out = (void *)elem.out_sg[0].iov_base;
971 + in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
972 + off = out->sector;
973 +
974 + if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
975 + wlen = sizeof(*in);
976 + in->status = VIRTIO_BLK_S_UNSUPP;
977 + } else if (out->type & VIRTIO_BLK_T_OUT) {
978 + wlen = sizeof(*in);
979 +
980 + for (i = 1; i < elem.out_num; i++) {
981 + bdrv_write(s->bs, off,
982 + elem.out_sg[i].iov_base,
983 + elem.out_sg[i].iov_len / 512);
984 + off += elem.out_sg[i].iov_len / 512;
985 + }
986 +
987 + in->status = VIRTIO_BLK_S_OK;
988 + } else {
989 + wlen = sizeof(*in);
990 +
991 + for (i = 0; i < elem.in_num - 1; i++) {
992 + bdrv_read(s->bs, off,
993 + elem.in_sg[i].iov_base,
994 + elem.in_sg[i].iov_len / 512);
995 + off += elem.in_sg[i].iov_len / 512;
996 + wlen += elem.in_sg[i].iov_len;
997 + }
998 +
999 + in->status = VIRTIO_BLK_S_OK;
1000 + }
1002 + virtqueue_push(vq, &elem, wlen);
1003 + virtio_notify(vdev, vq);
1004 + }
1005 +}
1007 +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
1008 +{
1009 + VirtIOBlock *s = to_virtio_blk(vdev);
1010 + struct virtio_blk_config blkcfg;
1011 + int64_t capacity;
1013 + bdrv_get_geometry(s->bs, &capacity);
1014 + blkcfg.capacity = capacity;
1015 + blkcfg.seg_max = 128 - 2;
1016 + memcpy(config, &blkcfg, sizeof(blkcfg));
1017 +}
1019 +static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
1020 +{
1021 + return (1 << VIRTIO_BLK_F_SEG_MAX);
1022 +}
1024 +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
1025 + BlockDriverState *bs)
1026 +{
1027 + VirtIOBlock *s;
1029 + s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", 6900, 0x1001,
1030 + 0, VIRTIO_ID_BLOCK,
1031 + 0x01, 0x80, 0x00,
1032 + 16, sizeof(VirtIOBlock));
1034 + s->vdev.update_config = virtio_blk_update_config;
1035 + s->vdev.get_features = virtio_blk_get_features;
1036 + s->bs = bs;
1038 + virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
1040 + return &s->vdev;
1041 +}
1042 Index: qemu-0.9.1/sysemu.h
1043 ===================================================================
1044 --- qemu-0.9.1.orig/sysemu.h 2008-01-06 19:38:42.000000000 +0000
1045 +++ qemu-0.9.1/sysemu.h 2008-02-07 13:38:53.000000000 +0000
1046 @@ -117,7 +117,7 @@
1047 #endif
1049 typedef enum {
1050 - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD
1051 + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
1052 } BlockInterfaceType;
1054 typedef struct DriveInfo {
1055 Index: qemu-0.9.1/vl.c
1056 ===================================================================
1057 --- qemu-0.9.1.orig/vl.c 2008-01-06 19:38:42.000000000 +0000
1058 +++ qemu-0.9.1/vl.c 2008-02-07 13:40:52.000000000 +0000
1059 @@ -4953,6 +4953,9 @@
1060 } else if (!strcmp(buf, "sd")) {
1061 type = IF_SD;
1062 max_devs = 0;
1063 + } else if (!strcmp(buf, "virtio")) {
1064 + type = IF_VIRTIO;
1065 + max_devs = 0;
1066 } else {
1067 fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
1068 return -1;
1069 @@ -5141,6 +5144,7 @@
1070 break;
1071 case IF_PFLASH:
1072 case IF_MTD:
1073 + case IF_VIRTIO:
1074 break;
1076 if (!file[0])