wok rev 698

qemu: add virtio support
author Pascal Bellard <pascal.bellard@slitaz.org>
date Mon Apr 28 11:41:08 2008 +0000 (2008-04-28)
parents 41832070af28
children 036b1f427445
files qemu/receipt qemu/stuff/virtio.u
line diff
     1.1 --- a/qemu/receipt	Sun Apr 27 12:33:42 2008 +0200
     1.2 +++ b/qemu/receipt	Mon Apr 28 11:41:08 2008 +0000
     1.3 @@ -15,6 +15,7 @@
     1.4  compile_rules()
     1.5  {
     1.6  	cd $src
     1.7 +	patch -p1 < ../stuff/virtio.u
     1.8  	#./configure --prefix=/usr --enable-alsa --disable-gfx-check \
     1.9  
    1.10  	./configure --prefix=/usr --enable-alsa \
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/qemu/stuff/virtio.u	Mon Apr 28 11:41:08 2008 +0000
     2.3 @@ -0,0 +1,1076 @@
     2.4 +Index: qemu-0.9.1/Makefile.target
     2.5 +===================================================================
     2.6 +--- qemu-0.9.1.orig/Makefile.target	2008-01-06 19:38:41.000000000 +0000
     2.7 ++++ qemu-0.9.1/Makefile.target	2008-02-07 13:36:23.000000000 +0000
     2.8 +@@ -436,6 +436,9 @@
     2.9 + VL_OBJS += pcnet.o
    2.10 + VL_OBJS += rtl8139.o
    2.11 + 
    2.12 ++# virtio devices
    2.13 ++VL_OBJS += virtio.o
    2.14 ++
    2.15 + ifeq ($(TARGET_BASE_ARCH), i386)
    2.16 + # Hardware support
    2.17 + VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
    2.18 +Index: qemu-0.9.1/hw/virtio.c
    2.19 +===================================================================
    2.20 +--- /dev/null	1970-01-01 00:00:00.000000000 +0000
    2.21 ++++ qemu-0.9.1/hw/virtio.c	2008-02-07 13:36:23.000000000 +0000
    2.22 +@@ -0,0 +1,422 @@
    2.23 ++/*
    2.24 ++ * Virtio Support
    2.25 ++ *
    2.26 ++ * Copyright IBM, Corp. 2007
    2.27 ++ *
    2.28 ++ * Authors:
    2.29 ++ *  Anthony Liguori   <address@hidden>
    2.30 ++ *
    2.31 ++ * This work is licensed under the terms of the GNU GPL, version 2.  See
    2.32 ++ * the COPYING file in the top-level directory.
    2.33 ++ *
    2.34 ++ */
    2.35 ++
    2.36 ++#include <inttypes.h>
    2.37 ++#include <err.h>
    2.38 ++
    2.39 ++#include "virtio.h"
    2.40 ++#include "sysemu.h"
    2.41 ++
    2.42 ++/* from Linux's linux/virtio_pci.h */
    2.43 ++
    2.44 ++/* A 32-bit r/o bitmask of the features supported by the host */
    2.45 ++#define VIRTIO_PCI_HOST_FEATURES       0
    2.46 ++
    2.47 ++/* A 32-bit r/w bitmask of features activated by the guest */
    2.48 ++#define VIRTIO_PCI_GUEST_FEATURES      4
    2.49 ++
    2.50 ++/* A 32-bit r/w PFN for the currently selected queue */
    2.51 ++#define VIRTIO_PCI_QUEUE_PFN           8
    2.52 ++
    2.53 ++/* A 16-bit r/o queue size for the currently selected queue */
    2.54 ++#define VIRTIO_PCI_QUEUE_NUM           12
    2.55 ++
    2.56 ++/* A 16-bit r/w queue selector */
    2.57 ++#define VIRTIO_PCI_QUEUE_SEL           14
    2.58 ++
    2.59 ++/* A 16-bit r/w queue notifier */
    2.60 ++#define VIRTIO_PCI_QUEUE_NOTIFY                16
    2.61 ++
    2.62 ++/* An 8-bit device status register.  */
    2.63 ++#define VIRTIO_PCI_STATUS              18
    2.64 ++
    2.65 ++/* An 8-bit r/o interrupt status register.  Reading the value will return the
    2.66 ++ * current contents of the ISR and will also clear it.  This is effectively
    2.67 ++ * a read-and-acknowledge. */
    2.68 ++#define VIRTIO_PCI_ISR                 19
    2.69 ++
    2.70 ++#define VIRTIO_PCI_CONFIG              20
    2.71 ++
    2.72 ++/* QEMU doesn't strictly need write barriers since everything runs in
    2.73 ++ * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
    2.74 ++ * KVM or if kqemu gets SMP support.
    2.75 ++ */
    2.76 ++#define wmb() do { } while (0)
    2.77 ++
    2.78 ++/* virt queue functions */
    2.79 ++
    2.80 ++static void virtqueue_init(VirtQueue *vq, void *p)
    2.81 ++{
    2.82 ++    vq->vring.desc = p;
    2.83 ++    vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
    2.84 ++    vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
    2.85 ++}
    2.86 ++
    2.87 ++static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
    2.88 ++{
    2.89 ++    unsigned int next;
    2.90 ++
    2.91 ++    /* If this descriptor says it doesn't chain, we're done. */
    2.92 ++    if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
    2.93 ++       return vq->vring.num;
    2.94 ++
    2.95 ++    /* Check they're not leading us off end of descriptors. */
    2.96 ++    next = vq->vring.desc[i].next;
    2.97 ++    /* Make sure compiler knows to grab that: we don't want it changing! */
    2.98 ++    wmb();
    2.99 ++
   2.100 ++    if (next >= vq->vring.num)
   2.101 ++       errx(1, "Desc next is %u", next);
   2.102 ++
   2.103 ++    return next;
   2.104 ++}
   2.105 ++
   2.106 ++void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
   2.107 ++                   unsigned int len)
   2.108 ++{
   2.109 ++    VRingUsedElem *used;
   2.110 ++
   2.111 ++    /* Get a pointer to the next entry in the used ring. */
   2.112 ++    used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
   2.113 ++    used->id = elem->index;
   2.114 ++    used->len = len;
   2.115 ++    /* Make sure buffer is written before we update index. */
   2.116 ++    wmb();
   2.117 ++    vq->vring.used->idx++;
   2.118 ++}
   2.119 ++
   2.120 ++int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
   2.121 ++{
   2.122 ++    unsigned int i, head;
   2.123 ++    unsigned int position;
   2.124 ++
   2.125 ++    /* Check it isn't doing very strange things with descriptor numbers. */
   2.126 ++    if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
   2.127 ++       errx(1, "Guest moved used index from %u to %u",
   2.128 ++            vq->last_avail_idx, vq->vring.avail->idx);
   2.129 ++
   2.130 ++    /* If there's nothing new since last we looked, return invalid. */
   2.131 ++    if (vq->vring.avail->idx == vq->last_avail_idx)
   2.132 ++       return 0;
   2.133 ++
   2.134 ++    /* Grab the next descriptor number they're advertising, and increment
   2.135 ++     * the index we've seen. */
   2.136 ++    head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
   2.137 ++
   2.138 ++    /* If their number is silly, that's a fatal mistake. */
   2.139 ++    if (head >= vq->vring.num)
   2.140 ++       errx(1, "Guest says index %u is available", head);
   2.141 ++
   2.142 ++    /* When we start there are none of either input nor output. */
   2.143 ++    position = elem->out_num = elem->in_num = 0;
   2.144 ++
   2.145 ++    i = head;
   2.146 ++    do {
   2.147 ++       struct iovec *sg;
   2.148 ++
   2.149 ++       if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
   2.150 ++           errx(1, "Guest sent invalid pointer");
   2.151 ++
   2.152 ++       if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
   2.153 ++           sg = &elem->in_sg[elem->in_num++];
   2.154 ++       else
   2.155 ++           sg = &elem->out_sg[elem->out_num++];
   2.156 ++
   2.157 ++       /* Grab the first descriptor, and check it's OK. */
   2.158 ++       sg->iov_len = vq->vring.desc[i].len;
   2.159 ++       sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
   2.160 ++
   2.161 ++       /* If we've got too many, that implies a descriptor loop. */
   2.162 ++       if ((elem->in_num + elem->out_num) > vq->vring.num)
   2.163 ++           errx(1, "Looped descriptor");
   2.164 ++    } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
   2.165 ++
   2.166 ++    elem->index = head;
   2.167 ++
   2.168 ++    return elem->in_num + elem->out_num;
   2.169 ++}
   2.170 ++
   2.171 ++/* virtio device */
   2.172 ++
   2.173 ++static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
   2.174 ++{
   2.175 ++    return (VirtIODevice *)pci_dev;
   2.176 ++}
   2.177 ++
   2.178 ++static void virtio_update_irq(VirtIODevice *vdev)
   2.179 ++{
   2.180 ++    qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
   2.181 ++}
   2.182 ++
   2.183 ++static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
   2.184 ++{
   2.185 ++    VirtIODevice *vdev = to_virtio_device(opaque);
   2.186 ++    ram_addr_t pa;
   2.187 ++
   2.188 ++    addr -= vdev->addr;
   2.189 ++
   2.190 ++    switch (addr) {
   2.191 ++    case VIRTIO_PCI_GUEST_FEATURES:
   2.192 ++       if (vdev->set_features)
   2.193 ++           vdev->set_features(vdev, val);
   2.194 ++       vdev->features = val;
   2.195 ++       break;
   2.196 ++    case VIRTIO_PCI_QUEUE_PFN:
   2.197 ++       pa = (ram_addr_t)val << TARGET_PAGE_BITS;
   2.198 ++       vdev->vq[vdev->queue_sel].pfn = val;
   2.199 ++       if (pa == 0) {
   2.200 ++           vdev->vq[vdev->queue_sel].vring.desc = NULL;
   2.201 ++           vdev->vq[vdev->queue_sel].vring.avail = NULL;
   2.202 ++           vdev->vq[vdev->queue_sel].vring.used = NULL;
   2.203 ++       } else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
   2.204 ++           virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
   2.205 ++           /* FIXME if pa == 0, deal with device tear down */
   2.206 ++       }
   2.207 ++       break;
   2.208 ++    case VIRTIO_PCI_QUEUE_SEL:
   2.209 ++       if (val < VIRTIO_PCI_QUEUE_MAX)
   2.210 ++           vdev->queue_sel = val;
   2.211 ++       break;
   2.212 ++    case VIRTIO_PCI_QUEUE_NOTIFY:
   2.213 ++       if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
   2.214 ++           vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
   2.215 ++       break;
   2.216 ++    case VIRTIO_PCI_STATUS:
   2.217 ++       vdev->status = val & 0xFF;
   2.218 ++       break;
   2.219 ++    }
   2.220 ++}
   2.221 ++
   2.222 ++static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
   2.223 ++{
   2.224 ++    VirtIODevice *vdev = to_virtio_device(opaque);
   2.225 ++    uint32_t ret = 0xFFFFFFFF;
   2.226 ++
   2.227 ++    addr -= vdev->addr;
   2.228 ++
   2.229 ++    switch (addr) {
   2.230 ++    case VIRTIO_PCI_HOST_FEATURES:
   2.231 ++       ret = vdev->get_features(vdev);
   2.232 ++       break;
   2.233 ++    case VIRTIO_PCI_GUEST_FEATURES:
   2.234 ++       ret = vdev->features;
   2.235 ++       break;
   2.236 ++    case VIRTIO_PCI_QUEUE_PFN:
   2.237 ++       ret = vdev->vq[vdev->queue_sel].pfn;
   2.238 ++       break;
   2.239 ++    case VIRTIO_PCI_QUEUE_NUM:
   2.240 ++       ret = vdev->vq[vdev->queue_sel].vring.num;
   2.241 ++       break;
   2.242 ++    case VIRTIO_PCI_QUEUE_SEL:
   2.243 ++       ret = vdev->queue_sel;
   2.244 ++       break;
   2.245 ++    case VIRTIO_PCI_STATUS:
   2.246 ++       ret = vdev->status;
   2.247 ++       break;
   2.248 ++    case VIRTIO_PCI_ISR:
   2.249 ++       /* reading from the ISR also clears it. */
   2.250 ++       ret = vdev->isr;
   2.251 ++       vdev->isr = 0;
   2.252 ++       virtio_update_irq(vdev);
   2.253 ++       break;
   2.254 ++    default:
   2.255 ++       break;
   2.256 ++    }
   2.257 ++
   2.258 ++    return ret;
   2.259 ++}
   2.260 ++
   2.261 ++static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
   2.262 ++{
   2.263 ++    VirtIODevice *vdev = opaque;
   2.264 ++    uint8_t val;
   2.265 ++
   2.266 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.267 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.268 ++       return (uint32_t)-1;
   2.269 ++
   2.270 ++    memcpy(&val, vdev->config + addr, sizeof(val));
   2.271 ++    return val;
   2.272 ++}
   2.273 ++
   2.274 ++static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
   2.275 ++{
   2.276 ++    VirtIODevice *vdev = opaque;
   2.277 ++    uint16_t val;
   2.278 ++
   2.279 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.280 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.281 ++       return (uint32_t)-1;
   2.282 ++
   2.283 ++    memcpy(&val, vdev->config + addr, sizeof(val));
   2.284 ++    return val;
   2.285 ++}
   2.286 ++
   2.287 ++static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
   2.288 ++{
   2.289 ++    VirtIODevice *vdev = opaque;
   2.290 ++    uint32_t val;
   2.291 ++
   2.292 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.293 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.294 ++       return (uint32_t)-1;
   2.295 ++
   2.296 ++    memcpy(&val, vdev->config + addr, sizeof(val));
   2.297 ++    return val;
   2.298 ++}
   2.299 ++
   2.300 ++static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
   2.301 ++{
   2.302 ++    VirtIODevice *vdev = opaque;
   2.303 ++    uint8_t val = data;
   2.304 ++
   2.305 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.306 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.307 ++       return;
   2.308 ++
   2.309 ++    memcpy(vdev->config + addr, &val, sizeof(val));
   2.310 ++}
   2.311 ++
   2.312 ++static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
   2.313 ++{
   2.314 ++    VirtIODevice *vdev = opaque;
   2.315 ++    uint16_t val = data;
   2.316 ++
   2.317 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.318 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.319 ++       return;
   2.320 ++
   2.321 ++    memcpy(vdev->config + addr, &val, sizeof(val));
   2.322 ++}
   2.323 ++
   2.324 ++static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
   2.325 ++{
   2.326 ++    VirtIODevice *vdev = opaque;
   2.327 ++    uint32_t val = data;
   2.328 ++
   2.329 ++    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
   2.330 ++    if (addr > (vdev->config_len - sizeof(val)))
   2.331 ++       return;
   2.332 ++
   2.333 ++    memcpy(vdev->config + addr, &val, sizeof(val));
   2.334 ++}
   2.335 ++
   2.336 ++static void virtio_map(PCIDevice *pci_dev, int region_num,
   2.337 ++                      uint32_t addr, uint32_t size, int type)
   2.338 ++{
   2.339 ++    VirtIODevice *vdev = to_virtio_device(pci_dev);
   2.340 ++    int i;
   2.341 ++
   2.342 ++    vdev->addr = addr;
   2.343 ++    for (i = 0; i < 3; i++) {
   2.344 ++       register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
   2.345 ++       register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
   2.346 ++    }
   2.347 ++
   2.348 ++    if (vdev->config_len) {
   2.349 ++       register_ioport_write(addr + 20, vdev->config_len, 1,
   2.350 ++                             virtio_config_writeb, vdev);
   2.351 ++       register_ioport_write(addr + 20, vdev->config_len, 2,
   2.352 ++                             virtio_config_writew, vdev);
   2.353 ++       register_ioport_write(addr + 20, vdev->config_len, 4,
   2.354 ++                             virtio_config_writel, vdev);
   2.355 ++       register_ioport_read(addr + 20, vdev->config_len, 1,
   2.356 ++                            virtio_config_readb, vdev);
   2.357 ++       register_ioport_read(addr + 20, vdev->config_len, 2,
   2.358 ++                            virtio_config_readw, vdev);
   2.359 ++       register_ioport_read(addr + 20, vdev->config_len, 4,
   2.360 ++                            virtio_config_readl, vdev);
   2.361 ++
   2.362 ++       vdev->update_config(vdev, vdev->config);
   2.363 ++    }
   2.364 ++}
   2.365 ++
   2.366 ++VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
   2.367 ++                           void (*handle_output)(VirtIODevice *, VirtQueue *))
   2.368 ++{
   2.369 ++    int i;
   2.370 ++
   2.371 ++    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
   2.372 ++       if (vdev->vq[i].vring.num == 0)
   2.373 ++           break;
   2.374 ++    }
   2.375 ++
   2.376 ++    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
   2.377 ++       abort();
   2.378 ++
   2.379 ++    vdev->vq[i].vring.num = queue_size;
   2.380 ++    vdev->vq[i].handle_output = handle_output;
   2.381 ++    vdev->vq[i].index = i;
   2.382 ++
   2.383 ++    return &vdev->vq[i];
   2.384 ++}
   2.385 ++
   2.386 ++void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
   2.387 ++{
   2.388 ++    if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
   2.389 ++       return;
   2.390 ++
   2.391 ++    vdev->isr = 1;
   2.392 ++    virtio_update_irq(vdev);
   2.393 ++}
   2.394 ++
   2.395 ++VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
   2.396 ++                             uint16_t vendor, uint16_t device,
   2.397 ++                             uint16_t subvendor, uint16_t subdevice,
   2.398 ++                             uint8_t class_code, uint8_t subclass_code,
   2.399 ++                             uint8_t pif, size_t config_size,
   2.400 ++                             size_t struct_size)
   2.401 ++{
   2.402 ++    VirtIODevice *vdev;
   2.403 ++    PCIDevice *pci_dev;
   2.404 ++    uint8_t *config;
   2.405 ++
   2.406 ++    pci_dev = pci_register_device(bus, name, struct_size,
   2.407 ++                                 -1, NULL, NULL);
   2.408 ++    vdev = to_virtio_device(pci_dev);
   2.409 ++
   2.410 ++    vdev->status = 0;
   2.411 ++    vdev->isr = 0;
   2.412 ++    vdev->queue_sel = 0;
   2.413 ++    memset(vdev->vq, 0, sizeof(vdev->vq));
   2.414 ++
   2.415 ++    config = pci_dev->config;
   2.416 ++    config[0x00] = vendor & 0xFF;
   2.417 ++    config[0x01] = (vendor >> 8) & 0xFF;
   2.418 ++    config[0x02] = device & 0xFF;
   2.419 ++    config[0x03] = (device >> 8) & 0xFF;
   2.420 ++
   2.421 ++    config[0x09] = pif;
   2.422 ++    config[0x0a] = subclass_code;
   2.423 ++    config[0x0b] = class_code;
   2.424 ++    config[0x0e] = 0x00;
   2.425 ++
   2.426 ++    config[0x2c] = subvendor & 0xFF;
   2.427 ++    config[0x2d] = (subvendor >> 8) & 0xFF;
   2.428 ++    config[0x2e] = subdevice & 0xFF;
   2.429 ++    config[0x2f] = (subdevice >> 8) & 0xFF;
   2.430 ++
   2.431 ++    config[0x3d] = 1;
   2.432 ++
   2.433 ++    vdev->name = name;
   2.434 ++    vdev->config_len = config_size;
   2.435 ++    if (vdev->config_len)
   2.436 ++       vdev->config = qemu_mallocz(config_size);
   2.437 ++    else
   2.438 ++       vdev->config = NULL;
   2.439 ++
   2.440 ++    pci_register_io_region(pci_dev, 0, 20 + config_size, PCI_ADDRESS_SPACE_IO,
   2.441 ++                          virtio_map);
   2.442 ++
   2.443 ++    return vdev;
   2.444 ++}
   2.445 +Index: qemu-0.9.1/hw/virtio.h
   2.446 +===================================================================
   2.447 +--- /dev/null	1970-01-01 00:00:00.000000000 +0000
   2.448 ++++ qemu-0.9.1/hw/virtio.h	2008-02-07 13:36:23.000000000 +0000
   2.449 +@@ -0,0 +1,143 @@
   2.450 ++/*
   2.451 ++ * Virtio Support
   2.452 ++ *
   2.453 ++ * Copyright IBM, Corp. 2007
   2.454 ++ *
   2.455 ++ * Authors:
   2.456 ++ *  Anthony Liguori   <address@hidden>
   2.457 ++ *
   2.458 ++ * This work is licensed under the terms of the GNU GPL, version 2.  See
   2.459 ++ * the COPYING file in the top-level directory.
   2.460 ++ *
   2.461 ++ */
   2.462 ++
   2.463 ++#ifndef _QEMU_VIRTIO_H
   2.464 ++#define _QEMU_VIRTIO_H
   2.465 ++
   2.466 ++#include <sys/uio.h>
   2.467 ++#include "hw.h"
   2.468 ++#include "pci.h"
   2.469 ++
   2.470 ++/* from Linux's linux/virtio_config.h */
   2.471 ++
   2.472 ++/* Status byte for guest to report progress, and synchronize features. */
   2.473 ++/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
   2.474 ++#define VIRTIO_CONFIG_S_ACKNOWLEDGE    1
   2.475 ++/* We have found a driver for the device. */
   2.476 ++#define VIRTIO_CONFIG_S_DRIVER         2
   2.477 ++/* Driver has used its parts of the config, and is happy */
   2.478 ++#define VIRTIO_CONFIG_S_DRIVER_OK      4
   2.479 ++/* We've given up on this device. */
   2.480 ++#define VIRTIO_CONFIG_S_FAILED         0x80
   2.481 ++
   2.482 ++/* from Linux's linux/virtio_ring.h */
   2.483 ++
   2.484 ++/* This marks a buffer as continuing via the next field. */
   2.485 ++#define VRING_DESC_F_NEXT      1
   2.486 ++/* This marks a buffer as write-only (otherwise read-only). */
   2.487 ++#define VRING_DESC_F_WRITE     2
   2.488 ++
   2.489 ++/* This means don't notify other side when buffer added. */
   2.490 ++#define VRING_USED_F_NO_NOTIFY 1
   2.491 ++/* This means don't interrupt guest when buffer consumed. */
   2.492 ++#define VRING_AVAIL_F_NO_INTERRUPT     1
   2.493 ++
   2.494 ++typedef struct VirtQueue VirtQueue;
   2.495 ++typedef struct VirtIODevice VirtIODevice;
   2.496 ++
   2.497 ++typedef struct VRingDesc
   2.498 ++{
   2.499 ++    uint64_t addr;
   2.500 ++    uint32_t len;
   2.501 ++    uint16_t flags;
   2.502 ++    uint16_t next;
   2.503 ++} VRingDesc;
   2.504 ++
   2.505 ++typedef struct VRingAvail
   2.506 ++{
   2.507 ++    uint16_t flags;
   2.508 ++    uint16_t idx;
   2.509 ++    uint16_t ring[0];
   2.510 ++} VRingAvail;
   2.511 ++
   2.512 ++typedef struct VRingUsedElem
   2.513 ++{
   2.514 ++    uint32_t id;
   2.515 ++    uint32_t len;
   2.516 ++} VRingUsedElem;
   2.517 ++
   2.518 ++typedef struct VRingUsed
   2.519 ++{
   2.520 ++    uint16_t flags;
   2.521 ++    uint16_t idx;
   2.522 ++    VRingUsedElem ring[0];
   2.523 ++} VRingUsed;
   2.524 ++
   2.525 ++typedef struct VRing
   2.526 ++{
   2.527 ++    unsigned int num;
   2.528 ++    VRingDesc *desc;
   2.529 ++    VRingAvail *avail;
   2.530 ++    VRingUsed *used;
   2.531 ++} VRing;
   2.532 ++
   2.533 ++struct VirtQueue
   2.534 ++{
   2.535 ++    VRing vring;
   2.536 ++    uint32_t pfn;
   2.537 ++    uint16_t last_avail_idx;
   2.538 ++    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
   2.539 ++    int index;
   2.540 ++};
   2.541 ++
   2.542 ++#define VIRTQUEUE_MAX_SIZE 1024
   2.543 ++
   2.544 ++typedef struct VirtQueueElement
   2.545 ++{
   2.546 ++    unsigned int index;
   2.547 ++    unsigned int out_num;
   2.548 ++    unsigned int in_num;
   2.549 ++    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
   2.550 ++    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
   2.551 ++} VirtQueueElement;
   2.552 ++
   2.553 ++#define VIRTIO_PCI_QUEUE_MAX   16
   2.554 ++
   2.555 ++struct VirtIODevice
   2.556 ++{
   2.557 ++    PCIDevice pci_dev;
   2.558 ++    const char *name;
   2.559 ++    uint32_t addr;
   2.560 ++    uint16_t vendor;
   2.561 ++    uint16_t device;
   2.562 ++    uint8_t status;
   2.563 ++    uint8_t isr;
   2.564 ++    uint16_t queue_sel;
   2.565 ++    uint32_t features;
   2.566 ++    size_t config_len;
   2.567 ++    void *config;
   2.568 ++    uint32_t (*get_features)(VirtIODevice *vdev);
   2.569 ++    void (*set_features)(VirtIODevice *vdev, uint32_t val);
   2.570 ++    void (*update_config)(VirtIODevice *vdev, uint8_t *config);
   2.571 ++    VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
   2.572 ++};
   2.573 ++
   2.574 ++VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
   2.575 ++                             uint16_t vendor, uint16_t device,
   2.576 ++                             uint16_t subvendor, uint16_t subdevice,
   2.577 ++                             uint8_t class_code, uint8_t subclass_code,
   2.578 ++                             uint8_t pif, size_t config_size,
   2.579 ++                             size_t struct_size);
   2.580 ++
   2.581 ++VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
   2.582 ++                           void (*handle_output)(VirtIODevice *,
   2.583 ++                                                 VirtQueue *));
   2.584 ++
   2.585 ++void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
   2.586 ++                   unsigned int len);
   2.587 ++
   2.588 ++int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
   2.589 ++
   2.590 ++void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
   2.591 ++
   2.592 ++#endif
   2.593 +Index: qemu-0.9.1/Makefile.target
   2.594 +===================================================================
   2.595 +--- qemu-0.9.1.orig/Makefile.target	2008-02-07 13:36:23.000000000 +0000
   2.596 ++++ qemu-0.9.1/Makefile.target	2008-02-07 13:36:37.000000000 +0000
   2.597 +@@ -437,7 +437,7 @@
   2.598 + VL_OBJS += rtl8139.o
   2.599 + 
   2.600 + # virtio devices
   2.601 +-VL_OBJS += virtio.o
   2.602 ++VL_OBJS += virtio.o virtio-net.o
   2.603 + 
   2.604 + ifeq ($(TARGET_BASE_ARCH), i386)
   2.605 + # Hardware support
   2.606 +Index: qemu-0.9.1/hw/pc.h
   2.607 +===================================================================
   2.608 +--- qemu-0.9.1.orig/hw/pc.h	2008-01-06 19:38:42.000000000 +0000
   2.609 ++++ qemu-0.9.1/hw/pc.h	2008-02-07 13:36:37.000000000 +0000
   2.610 +@@ -142,4 +142,9 @@
   2.611 + 
   2.612 + void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);
   2.613 + 
   2.614 ++/* virtio-net.c */
   2.615 ++
   2.616 ++void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
   2.617 ++
   2.618 ++
   2.619 + #endif
   2.620 +Index: qemu-0.9.1/hw/pci.c
   2.621 +===================================================================
   2.622 +--- qemu-0.9.1.orig/hw/pci.c	2008-01-06 19:38:42.000000000 +0000
   2.623 ++++ qemu-0.9.1/hw/pci.c	2008-02-07 13:36:37.000000000 +0000
   2.624 +@@ -25,6 +25,7 @@
   2.625 + #include "pci.h"
   2.626 + #include "console.h"
   2.627 + #include "net.h"
   2.628 ++#include "pc.h"
   2.629 + 
   2.630 + //#define DEBUG_PCI
   2.631 + 
   2.632 +@@ -638,9 +639,11 @@
   2.633 +         pci_rtl8139_init(bus, nd, devfn);
   2.634 +     } else if (strcmp(nd->model, "pcnet") == 0) {
   2.635 +         pci_pcnet_init(bus, nd, devfn);
   2.636 ++    } else if (strcmp(nd->model, "virtio") == 0) {
   2.637 ++       virtio_net_init(bus, nd, devfn);
   2.638 +     } else if (strcmp(nd->model, "?") == 0) {
   2.639 +         fprintf(stderr, "qemu: Supported PCI NICs: i82551 i82557b i82559er"
   2.640 +-                        " ne2k_pci pcnet rtl8139\n");
   2.641 ++                        " ne2k_pci pcnet rtl8139 virtio\n");
   2.642 +         exit (1);
   2.643 +     } else {
   2.644 +         fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
   2.645 +Index: qemu-0.9.1/hw/virtio-net.c
   2.646 +===================================================================
   2.647 +--- /dev/null	1970-01-01 00:00:00.000000000 +0000
   2.648 ++++ qemu-0.9.1/hw/virtio-net.c	2008-02-07 13:36:37.000000000 +0000
   2.649 +@@ -0,0 +1,178 @@
   2.650 ++/*
   2.651 ++ * Virtio Network Device
   2.652 ++ *
   2.653 ++ * Copyright IBM, Corp. 2007
   2.654 ++ *
   2.655 ++ * Authors:
   2.656 ++ *  Anthony Liguori   <address@hidden>
   2.657 ++ *
   2.658 ++ * This work is licensed under the terms of the GNU GPL, version 2.  See
   2.659 ++ * the COPYING file in the top-level directory.
   2.660 ++ *
   2.661 ++ */
   2.662 ++
   2.663 ++#include "virtio.h"
   2.664 ++#include "net.h"
   2.665 ++#include "pc.h"
   2.666 ++
   2.667 ++/* from Linux's virtio_net.h */
   2.668 ++
   2.669 ++/* The ID for virtio_net */
   2.670 ++#define VIRTIO_ID_NET  1
   2.671 ++
   2.672 ++/* The feature bitmap for virtio net */
   2.673 ++#define VIRTIO_NET_F_NO_CSUM   0
   2.674 ++#define VIRTIO_NET_F_TSO4      1
   2.675 ++#define VIRTIO_NET_F_UFO       2
   2.676 ++#define VIRTIO_NET_F_TSO4_ECN  3
   2.677 ++#define VIRTIO_NET_F_TSO6      4
   2.678 ++#define VIRTIO_NET_F_MAC       5
   2.679 ++
   2.680 ++/* The config defining mac address (6 bytes) */
   2.681 ++struct virtio_net_config
   2.682 ++{
   2.683 ++    uint8_t mac[6];
   2.684 ++} __attribute__((packed));
   2.685 ++
   2.686 ++/* This is the first element of the scatter-gather list.  If you don't
   2.687 ++ * specify GSO or CSUM features, you can simply ignore the header. */
   2.688 ++struct virtio_net_hdr
   2.689 ++{
   2.690 ++#define VIRTIO_NET_HDR_F_NEEDS_CSUM    1       // Use csum_start, csum_offset
   2.691 ++    uint8_t flags;
   2.692 ++#define VIRTIO_NET_HDR_GSO_NONE                0       // Not a GSO frame
   2.693 ++#define VIRTIO_NET_HDR_GSO_TCPV4       1       // GSO frame, IPv4 TCP (TSO)
   2.694 ++/* FIXME: Do we need this?  If they said they can handle ECN, do they care? */
   2.695 ++#define VIRTIO_NET_HDR_GSO_TCPV4_ECN   2       // GSO frame, IPv4 TCP w/ ECN
   2.696 ++#define VIRTIO_NET_HDR_GSO_UDP         3       // GSO frame, IPv4 UDP (UFO)
   2.697 ++#define VIRTIO_NET_HDR_GSO_TCPV6       4       // GSO frame, IPv6 TCP
   2.698 ++    uint8_t gso_type;
   2.699 ++    uint16_t gso_size;
   2.700 ++    uint16_t csum_start;
   2.701 ++    uint16_t csum_offset;
   2.702 ++};
   2.703 ++
   2.704 ++typedef struct VirtIONet
   2.705 ++{
   2.706 ++    VirtIODevice vdev;
   2.707 ++    uint8_t mac[6];
   2.708 ++    VirtQueue *rx_vq;
   2.709 ++    VirtQueue *tx_vq;
   2.710 ++    VLANClientState *vc;
   2.711 ++    int can_receive;
   2.712 ++} VirtIONet;
   2.713 ++
   2.714 ++static VirtIONet *to_virtio_net(VirtIODevice *vdev)
   2.715 ++{
   2.716 ++    return (VirtIONet *)vdev;
   2.717 ++}
   2.718 ++
   2.719 ++static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
   2.720 ++{
   2.721 ++    VirtIONet *n = to_virtio_net(vdev);
   2.722 ++    struct virtio_net_config netcfg;
   2.723 ++
   2.724 ++    memcpy(netcfg.mac, n->mac, 6);
   2.725 ++    memcpy(config, &netcfg, sizeof(netcfg));
   2.726 ++}
   2.727 ++
   2.728 ++static uint32_t virtio_net_get_features(VirtIODevice *vdev)
   2.729 ++{
   2.730 ++    return (1 << VIRTIO_NET_F_MAC);
   2.731 ++}
   2.732 ++
   2.733 ++/* RX */
   2.734 ++
   2.735 ++static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
   2.736 ++{
   2.737 ++    VirtIONet *n = to_virtio_net(vdev);
   2.738 ++    n->can_receive = 1;
   2.739 ++}
   2.740 ++
   2.741 ++static int virtio_net_can_receive(void *opaque)
   2.742 ++{
   2.743 ++    VirtIONet *n = opaque;
   2.744 ++
   2.745 ++    return (n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) && n->can_receive;
   2.746 ++}
   2.747 ++
   2.748 ++static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
   2.749 ++{
   2.750 ++    VirtIONet *n = opaque;
   2.751 ++    VirtQueueElement elem;
   2.752 ++    struct virtio_net_hdr *hdr;
   2.753 ++    int offset, i;
   2.754 ++
   2.755 ++    /* FIXME: the drivers really need to set their status better */
   2.756 ++    if (n->rx_vq->vring.avail == NULL) {
   2.757 ++       n->can_receive = 0;
   2.758 ++       return;
   2.759 ++    }
   2.760 ++
   2.761 ++    if (virtqueue_pop(n->rx_vq, &elem) == 0) {
   2.762 ++       /* wait until the guest adds some rx bufs */
   2.763 ++       n->can_receive = 0;
   2.764 ++       return;
   2.765 ++    }
   2.766 ++
   2.767 ++    hdr = (void *)elem.in_sg[0].iov_base;
   2.768 ++    hdr->flags = 0;
   2.769 ++    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
   2.770 ++
   2.771 ++    /* copy in packet.  ugh */
   2.772 ++    offset = 0;
   2.773 ++    i = 1;
   2.774 ++    while (offset < size && i < elem.in_num) {
   2.775 ++       int len = MIN(elem.in_sg[i].iov_len, size - offset);
   2.776 ++       memcpy(elem.in_sg[i].iov_base, buf + offset, len);
   2.777 ++       offset += len;
   2.778 ++       i++;
   2.779 ++    }
   2.780 ++
   2.781 ++    /* signal other side */
   2.782 ++    virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
   2.783 ++    virtio_notify(&n->vdev, n->rx_vq);
   2.784 ++}
   2.785 ++
   2.786 ++/* TX */
   2.787 ++static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
   2.788 ++{
   2.789 ++    VirtIONet *n = to_virtio_net(vdev);
   2.790 ++    VirtQueueElement elem;
   2.791 ++
   2.792 ++    while (virtqueue_pop(vq, &elem)) {
   2.793 ++       int i;
   2.794 ++       size_t len = 0;
   2.795 ++
   2.796 ++       /* ignore the header for now */
   2.797 ++       for (i = 1; i < elem.out_num; i++) {
   2.798 ++           qemu_send_packet(n->vc, elem.out_sg[i].iov_base,
   2.799 ++                            elem.out_sg[i].iov_len);
   2.800 ++           len += elem.out_sg[i].iov_len;
   2.801 ++       }
   2.802 ++
   2.803 ++       virtqueue_push(vq, &elem, sizeof(struct virtio_net_hdr) + len);
   2.804 ++       virtio_notify(&n->vdev, vq);
   2.805 ++    }
   2.806 ++}
   2.807 ++
   2.808 ++void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
   2.809 ++{
   2.810 ++    VirtIONet *n;
   2.811 ++
   2.812 ++    n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
   2.813 ++                                    0, VIRTIO_ID_NET,
   2.814 ++                                    0x02, 0x00, 0x00,
   2.815 ++                                    6, sizeof(VirtIONet));
   2.816 ++
   2.817 ++    n->vdev.update_config = virtio_net_update_config;
   2.818 ++    n->vdev.get_features = virtio_net_get_features;
   2.819 ++    n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx);
   2.820 ++    n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
   2.821 ++    n->can_receive = 0;
   2.822 ++    memcpy(n->mac, nd->macaddr, 6);
   2.823 ++    n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
   2.824 ++                                virtio_net_can_receive, n);
   2.825 ++
   2.826 ++    return &n->vdev;
   2.827 ++}
   2.828 +Index: qemu-0.9.1/Makefile.target
   2.829 +===================================================================
   2.830 +--- qemu-0.9.1.orig/Makefile.target	2008-02-07 13:36:37.000000000 +0000
   2.831 ++++ qemu-0.9.1/Makefile.target	2008-02-07 13:38:53.000000000 +0000
   2.832 +@@ -437,7 +437,7 @@
   2.833 + VL_OBJS += rtl8139.o
   2.834 + 
   2.835 + # virtio devices
   2.836 +-VL_OBJS += virtio.o virtio-net.o
   2.837 ++VL_OBJS += virtio.o virtio-net.o virtio-blk.o
   2.838 + 
   2.839 + ifeq ($(TARGET_BASE_ARCH), i386)
   2.840 + # Hardware support
   2.841 +Index: qemu-0.9.1/hw/pc.c
   2.842 +===================================================================
   2.843 +--- qemu-0.9.1.orig/hw/pc.c	2008-01-06 19:38:42.000000000 +0000
   2.844 ++++ qemu-0.9.1/hw/pc.c	2008-02-07 13:38:53.000000000 +0000
   2.845 +@@ -1008,6 +1008,18 @@
   2.846 + 	    }
   2.847 +         }
   2.848 +     }
   2.849 ++
   2.850 ++    /* Add virtio block devices */
   2.851 ++    if (pci_enabled) {
   2.852 ++       int index;
   2.853 ++       int unit_id = 0;
   2.854 ++
   2.855 ++       while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
   2.856 ++           virtio_blk_init(pci_bus, 0x5002, 0x2258,
   2.857 ++                           drives_table[index].bdrv);
   2.858 ++           unit_id++;
   2.859 ++       }
   2.860 ++    }
   2.861 + }
   2.862 + 
   2.863 + static void pc_init_pci(int ram_size, int vga_ram_size,
   2.864 +Index: qemu-0.9.1/hw/pc.h
   2.865 +===================================================================
   2.866 +--- qemu-0.9.1.orig/hw/pc.h	2008-02-07 13:36:37.000000000 +0000
   2.867 ++++ qemu-0.9.1/hw/pc.h	2008-02-07 13:38:53.000000000 +0000
   2.868 +@@ -147,4 +147,8 @@
   2.869 + void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
   2.870 + 
   2.871 + 
   2.872 ++/* virtio-blk.h */
   2.873 ++void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
   2.874 ++                     BlockDriverState *bs);
   2.875 ++
   2.876 + #endif
   2.877 +Index: qemu-0.9.1/hw/virtio-blk.c
   2.878 +===================================================================
   2.879 +--- /dev/null	1970-01-01 00:00:00.000000000 +0000
   2.880 ++++ qemu-0.9.1/hw/virtio-blk.c	2008-02-07 13:38:53.000000000 +0000
   2.881 +@@ -0,0 +1,163 @@
   2.882 ++/*
   2.883 ++ * Virtio Block Device
   2.884 ++ *
   2.885 ++ * Copyright IBM, Corp. 2007
   2.886 ++ *
   2.887 ++ * Authors:
   2.888 ++ *  Anthony Liguori   <address@hidden>
   2.889 ++ *
   2.890 ++ * This work is licensed under the terms of the GNU GPL, version 2.  See
   2.891 ++ * the COPYING file in the top-level directory.
   2.892 ++ *
   2.893 ++ */
   2.894 ++
   2.895 ++#include "virtio.h"
   2.896 ++#include "block.h"
   2.897 ++#include "pc.h"
   2.898 ++
   2.899 ++/* from Linux's linux/virtio_blk.h */
   2.900 ++
   2.901 ++/* The ID for virtio_block */
   2.902 ++#define VIRTIO_ID_BLOCK        2
   2.903 ++
   2.904 ++/* Feature bits */
   2.905 ++#define VIRTIO_BLK_F_BARRIER   0       /* Does host support barriers? */
   2.906 ++#define VIRTIO_BLK_F_SIZE_MAX  1       /* Indicates maximum segment size */
   2.907 ++#define VIRTIO_BLK_F_SEG_MAX   2       /* Indicates maximum # of segments */
   2.908 ++
   2.909 ++struct virtio_blk_config
   2.910 ++{
   2.911 ++    uint64_t capacity;
   2.912 ++    uint32_t size_max;
   2.913 ++    uint32_t seg_max;
   2.914 ++};
   2.915 ++
   2.916 ++/* These two define direction. */
   2.917 ++#define VIRTIO_BLK_T_IN                0
   2.918 ++#define VIRTIO_BLK_T_OUT       1
   2.919 ++
   2.920 ++/* This bit says it's a scsi command, not an actual read or write. */
   2.921 ++#define VIRTIO_BLK_T_SCSI_CMD  2
   2.922 ++
   2.923 ++/* Barrier before this op. */
   2.924 ++#define VIRTIO_BLK_T_BARRIER   0x80000000
   2.925 ++
   2.926 ++/* This is the first element of the read scatter-gather list. */
   2.927 ++struct virtio_blk_outhdr
   2.928 ++{
   2.929 ++    /* VIRTIO_BLK_T* */
   2.930 ++    uint32_t type;
   2.931 ++    /* io priority. */
   2.932 ++    uint32_t ioprio;
   2.933 ++    /* Sector (ie. 512 byte offset) */
   2.934 ++    uint64_t sector;
   2.935 ++    /* Where to put reply. */
   2.936 ++    uint64_t id;
   2.937 ++};
   2.938 ++
   2.939 ++#define VIRTIO_BLK_S_OK                0
   2.940 ++#define VIRTIO_BLK_S_IOERR     1
   2.941 ++#define VIRTIO_BLK_S_UNSUPP    2
   2.942 ++
   2.943 ++/* This is the first element of the write scatter-gather list */
   2.944 ++struct virtio_blk_inhdr
   2.945 ++{
   2.946 ++    unsigned char status;
   2.947 ++};
   2.948 ++
   2.949 ++typedef struct VirtIOBlock
   2.950 ++{
   2.951 ++    VirtIODevice vdev;
   2.952 ++    BlockDriverState *bs;
   2.953 ++} VirtIOBlock;
   2.954 ++
   2.955 ++static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
   2.956 ++{
   2.957 ++    return (VirtIOBlock *)vdev;
   2.958 ++}
   2.959 ++
   2.960 ++static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
   2.961 ++{
   2.962 ++    VirtIOBlock *s = to_virtio_blk(vdev);
   2.963 ++    VirtQueueElement elem;
   2.964 ++    unsigned int count;
   2.965 ++
   2.966 ++    while ((count = virtqueue_pop(vq, &elem)) != 0) {
   2.967 ++       struct virtio_blk_inhdr *in;
   2.968 ++       struct virtio_blk_outhdr *out;
   2.969 ++       unsigned int wlen;
   2.970 ++       off_t off;
   2.971 ++       int i;
   2.972 ++
   2.973 ++       out = (void *)elem.out_sg[0].iov_base;
   2.974 ++       in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
   2.975 ++       off = out->sector;
   2.976 ++
   2.977 ++       if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
   2.978 ++           wlen = sizeof(*in);
   2.979 ++           in->status = VIRTIO_BLK_S_UNSUPP;
   2.980 ++       } else if (out->type & VIRTIO_BLK_T_OUT) {
   2.981 ++           wlen = sizeof(*in);
   2.982 ++
   2.983 ++           for (i = 1; i < elem.out_num; i++) {
   2.984 ++               bdrv_write(s->bs, off,
   2.985 ++                          elem.out_sg[i].iov_base,
   2.986 ++                          elem.out_sg[i].iov_len / 512);
   2.987 ++               off += elem.out_sg[i].iov_len / 512;
   2.988 ++           }
   2.989 ++
   2.990 ++           in->status = VIRTIO_BLK_S_OK;
   2.991 ++       } else {
   2.992 ++           wlen = sizeof(*in);
   2.993 ++
   2.994 ++           for (i = 0; i < elem.in_num - 1; i++) {
   2.995 ++               bdrv_read(s->bs, off,
   2.996 ++                         elem.in_sg[i].iov_base,
   2.997 ++                         elem.in_sg[i].iov_len / 512);
   2.998 ++               off += elem.in_sg[i].iov_len / 512;
   2.999 ++               wlen += elem.in_sg[i].iov_len;
  2.1000 ++           }
  2.1001 ++
  2.1002 ++           in->status = VIRTIO_BLK_S_OK;
  2.1003 ++       }
  2.1004 ++
  2.1005 ++       virtqueue_push(vq, &elem, wlen);
  2.1006 ++       virtio_notify(vdev, vq);
  2.1007 ++    }
  2.1008 ++}
  2.1009 ++
  2.1010 ++static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
  2.1011 ++{
  2.1012 ++    VirtIOBlock *s = to_virtio_blk(vdev);
  2.1013 ++    struct virtio_blk_config blkcfg;
  2.1014 ++    int64_t capacity;
  2.1015 ++
  2.1016 ++    bdrv_get_geometry(s->bs, &capacity);
  2.1017 ++    blkcfg.capacity = capacity;
  2.1018 ++    blkcfg.seg_max = 128 - 2;
  2.1019 ++    memcpy(config, &blkcfg, sizeof(blkcfg));
  2.1020 ++}
  2.1021 ++
  2.1022 ++static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
  2.1023 ++{
  2.1024 ++    return (1 << VIRTIO_BLK_F_SEG_MAX);
  2.1025 ++}
  2.1026 ++
  2.1027 ++void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
  2.1028 ++                     BlockDriverState *bs)
  2.1029 ++{
  2.1030 ++    VirtIOBlock *s;
  2.1031 ++
  2.1032 ++    s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", 6900, 0x1001,
  2.1033 ++                                      0, VIRTIO_ID_BLOCK,
  2.1034 ++                                      0x01, 0x80, 0x00,
  2.1035 ++                                      16, sizeof(VirtIOBlock));
  2.1036 ++
  2.1037 ++    s->vdev.update_config = virtio_blk_update_config;
  2.1038 ++    s->vdev.get_features = virtio_blk_get_features;
  2.1039 ++    s->bs = bs;
  2.1040 ++
  2.1041 ++    virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
  2.1042 ++
  2.1043 ++    return &s->vdev;
  2.1044 ++}
  2.1045 +Index: qemu-0.9.1/sysemu.h
  2.1046 +===================================================================
  2.1047 +--- qemu-0.9.1.orig/sysemu.h	2008-01-06 19:38:42.000000000 +0000
  2.1048 ++++ qemu-0.9.1/sysemu.h	2008-02-07 13:38:53.000000000 +0000
  2.1049 +@@ -117,7 +117,7 @@
  2.1050 + #endif
  2.1051 + 
  2.1052 + typedef enum {
  2.1053 +-    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD
  2.1054 ++    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
  2.1055 + } BlockInterfaceType;
  2.1056 + 
  2.1057 + typedef struct DriveInfo {
  2.1058 +Index: qemu-0.9.1/vl.c
  2.1059 +===================================================================
  2.1060 +--- qemu-0.9.1.orig/vl.c	2008-01-06 19:38:42.000000000 +0000
  2.1061 ++++ qemu-0.9.1/vl.c	2008-02-07 13:40:52.000000000 +0000
  2.1062 +@@ -4953,6 +4953,9 @@
  2.1063 + 	} else if (!strcmp(buf, "sd")) {
  2.1064 + 	    type = IF_SD;
  2.1065 +             max_devs = 0;
  2.1066 ++	} else if (!strcmp(buf, "virtio")) {
  2.1067 ++	    type = IF_VIRTIO;
  2.1068 ++            max_devs = 0;
  2.1069 + 	} else {
  2.1070 +             fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
  2.1071 +             return -1;
  2.1072 +@@ -5141,6 +5144,7 @@
  2.1073 +         break;
  2.1074 +     case IF_PFLASH:
  2.1075 +     case IF_MTD:
  2.1076 ++    case IF_VIRTIO:
  2.1077 +         break;
  2.1078 +     }
  2.1079 +     if (!file[0])