[ipxe-devel] [PATCH 3/4] [virtio] Add virtio 1.0 PCI support

Ladi Prosek lprosek at redhat.com
Wed Mar 9 18:20:02 UTC 2016


This commit adds support for driving virtio 1.0 PCI devices.
In addition to various helpers, a number of vpm_ functions are
introduced to be used instead of their legacy vp_ counterparts
when accessing virtio 1.0 (aka modern) devices.

Signed-off-by: Ladi Prosek <lprosek at redhat.com>
---
 src/drivers/bus/virtio-pci.c   | 213 ++++++++++++++++++++++++++++++++++++++++-
 src/drivers/bus/virtio-ring.c  |  12 ++-
 src/drivers/net/virtio-net.c   |   2 +-
 src/include/ipxe/errfile.h     |   1 +
 src/include/ipxe/virtio-pci.h  | 176 ++++++++++++++++++++++++++++++++++
 src/include/ipxe/virtio-ring.h |   1 +
 6 files changed, 396 insertions(+), 9 deletions(-)

diff --git a/src/drivers/bus/virtio-pci.c b/src/drivers/bus/virtio-pci.c
index fbef067..cfa321b 100644
--- a/src/drivers/bus/virtio-pci.c
+++ b/src/drivers/bus/virtio-pci.c
@@ -11,10 +11,13 @@
  *
  */
 
+#include "errno.h"
 #include "etherboot.h"
 #include "ipxe/io.h"
-#include "ipxe/virtio-ring.h"
+#include "ipxe/iomap.h"
+#include "ipxe/pci.h"
 #include "ipxe/virtio-pci.h"
+#include "ipxe/virtio-ring.h"
 
 int vp_find_vq(unsigned int ioaddr, int queue_index,
                struct vring_virtqueue *vq)
@@ -30,19 +33,19 @@ int vp_find_vq(unsigned int ioaddr, int queue_index,
 
    num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM);
    if (!num) {
-           printf("ERROR: queue size is 0\n");
+           DBG("VIRTIO-PCI ERROR: queue size is 0\n");
            return -1;
    }
 
    if (num > MAX_QUEUE_NUM) {
-           printf("ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM);
+           DBG("VIRTIO-PCI ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM);
            return -1;
    }
 
    /* check if the queue is already active */
 
    if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) {
-           printf("ERROR: queue already active\n");
+           DBG("VIRTIO-PCI ERROR: queue already active\n");
            return -1;
    }
 
@@ -62,3 +65,205 @@ int vp_find_vq(unsigned int ioaddr, int queue_index,
 
    return num;
 }
+
+int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type)
+{
+    int pos;
+    uint8_t type, bar;
+
+    for (pos = pci_find_capability(pci, PCI_CAP_ID_VNDR);
+         pos > 0;
+         pos = pci_find_next_capability(pci, pos, PCI_CAP_ID_VNDR)) {
+
+        pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
+            cfg_type), &type);
+        pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
+            bar), &bar);
+
+        /* Ignore structures with reserved BAR values */
+        if (bar > 0x5) {
+            continue;
+        }
+
+        if (type == cfg_type) {
+            return pos;
+        }
+    }
+    return 0;
+}
+
+int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
+                              u32 align, u32 start, u32 size,
+                              struct virtio_pci_region *region)
+{
+    u8 bar;
+    u32 offset, length, base_raw;
+    unsigned long base;
+
+    pci_read_config_byte(pci, cap + offsetof(struct virtio_pci_cap, bar), &bar);
+    pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, offset),
+                          &offset);
+    pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, length),
+                          &length);
+
+    if (length <= start) {
+        DBG("VIRTIO-PCI bad capability len %u (>%u expected)\n", length, start);
+        return -EINVAL;
+    }
+    if (length - start < minlen) {
+        DBG("VIRTIO-PCI bad capability len %u (>=%zu expected)\n", length, minlen);
+        return -EINVAL;
+    }
+    length -= start;
+    if (start + offset < offset) {
+        DBG("VIRTIO-PCI map wrap-around %u+%u\n", start, offset);
+        return -EINVAL;
+    }
+    offset += start;
+    if (offset & (align - 1)) {
+        DBG("VIRTIO-PCI offset %u not aligned to %u\n", offset, align);
+        return -EINVAL;
+    }
+    if (length > size) {
+        length = size;
+    }
+
+    region->length = length;
+    if (minlen + offset < minlen ||
+        minlen + offset > pci_bar_size(pci, PCI_BASE_ADDRESS(bar))) {
+        DBG("VIRTIO-PCI map virtio %zu@%u out of range on bar %i length %lu\n",
+            minlen, offset,
+            bar, (unsigned long)pci_bar_size(pci, PCI_BASE_ADDRESS(bar)));
+        return -EINVAL;
+    }
+
+    base = pci_bar_start(pci, PCI_BASE_ADDRESS(bar));
+    pci_read_config_dword(pci, PCI_BASE_ADDRESS(bar), &base_raw);
+
+    if (base_raw & PCI_BASE_ADDRESS_SPACE_IO) {
+        region->base = (void *)(base + offset);
+        region->flags = VIRTIO_PCI_REGION_PORT;
+    } else {
+        region->base = ioremap(base + offset, length);
+        region->flags = 0;
+    }
+    if (!region->base) {
+        DBG("VIRTIO-PCI unable to map virtio %u@%u on bar %i\n",
+            length, offset, bar);
+    }
+    return 0;
+}
+
+void virtio_pci_unmap_capability(struct virtio_pci_region *region)
+{
+    if (region->base) {
+        if ((region->flags &
+            (VIRTIO_PCI_REGION_PORT | VIRTIO_PCI_REGION_NO_UNMAP)) == 0) {
+            iounmap(region->base);
+        }
+    }
+}
+
+void vpm_notify(struct vring_virtqueue *vq)
+{
+    vpm_iowrite16(&vq->notification, (u16)vq->queue_index, 0);
+}
+
+int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
+                 unsigned nvqs, struct vring_virtqueue *vqs)
+{
+    unsigned i;
+    struct vring_virtqueue *vq;
+    u16 size, off;
+    int err;
+
+    if (nvqs > vpm_ioread16(&vdev->common, COMMON_OFFSET(num_queues))) {
+        return -ENOENT;
+    }
+
+    for (i = 0; i < nvqs; i++) {
+        /* Select the queue we're interested in */
+        vpm_iowrite16(&vdev->common, (u16)i, COMMON_OFFSET(queue_select));
+
+        /* Check if queue is either not available or already active. */
+        size = vpm_ioread16(&vdev->common, COMMON_OFFSET(queue_size));
+        /* QEMU has a bug where queues don't revert to inactive on device
+         * reset. Skip checking the queue_enable field until it is fixed.
+         */
+        if (!size /*|| vpm_ioread16(&vdev->common.queue_enable)*/)
+            return -ENOENT;
+
+        if (size & (size - 1)) {
+            DBG("VIRTIO-PCI %p: bad queue size %u", vdev, size);
+            return -EINVAL;
+        }
+
+        vq = &vqs[i];
+        vq->queue_index = i;
+
+        /* get offset of notification word for this vq */
+        off = vpm_ioread16(&vdev->common, COMMON_OFFSET(queue_notify_off));
+        vq->vring.num = size;
+
+        vring_init(&vq->vring, size, (unsigned char *)vq->queue);
+
+        /* activate the queue */
+        vpm_iowrite16(&vdev->common, size, COMMON_OFFSET(queue_size));
+
+        vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.desc),
+                      COMMON_OFFSET(queue_desc_lo),
+                      COMMON_OFFSET(queue_desc_hi));
+        vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.avail),
+                      COMMON_OFFSET(queue_avail_lo),
+                      COMMON_OFFSET(queue_avail_hi));
+        vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.used),
+                      COMMON_OFFSET(queue_used_lo),
+                      COMMON_OFFSET(queue_used_hi));
+
+        if (vdev->notify_base.base) {
+            /* offset should not wrap */
+            if ((u64)off * vdev->notify_offset_multiplier + 2
+                > vdev->notify_base.length) {
+                DBG("VIRTIO-PCI %p: bad notification offset %u (x %u)"
+                    "for queue %u > %zd",
+                    vdev,
+                    off, vdev->notify_offset_multiplier,
+                    i, vdev->notify_base.length);
+                err = -EINVAL;
+                goto err_map_notify;
+            }
+            vq->notification.base = (void *)(vdev->notify_base.base +
+                off * vdev->notify_offset_multiplier);
+            vq->notification.length = 2;
+            vq->notification.flags = VIRTIO_PCI_REGION_NO_UNMAP;
+        }
+        else {
+            err = virtio_pci_map_capability(vdev->pci,
+                vdev->notify_map_cap, 2, 2,
+                off * vdev->notify_offset_multiplier, 2,
+                &vq->notification);
+        }
+
+        if (err) {
+            goto err_map_notify;
+        }
+    }
+
+    /* Select and activate all queues. Has to be done last: once we do
+     * this, there's no way to go back except reset.
+     */
+    for (i = 0; i < nvqs; i++) {
+        vq = &vqs[i];
+        vpm_iowrite16(&vdev->common, (u16)vq->queue_index,
+                      COMMON_OFFSET(queue_select));
+        vpm_iowrite16(&vdev->common, 1, COMMON_OFFSET(queue_enable));
+    }
+    return 0;
+
+err_map_notify:
+    /* Undo the virtio_pci_map_capability calls. */
+    while (i-- > 0) {
+        virtio_pci_unmap_capability(&vqs[i].notification);
+    }
+    return err;
+}
diff --git a/src/drivers/bus/virtio-ring.c b/src/drivers/bus/virtio-ring.c
index e55b6d0..93e2573 100644
--- a/src/drivers/bus/virtio-ring.c
+++ b/src/drivers/bus/virtio-ring.c
@@ -18,8 +18,8 @@ FILE_LICENCE ( GPL2_OR_LATER );
 
 #include "etherboot.h"
 #include "ipxe/io.h"
-#include "ipxe/virtio-ring.h"
 #include "ipxe/virtio-pci.h"
+#include "ipxe/virtio-ring.h"
 
 #define BUG() do { \
    printf("BUG: failure at %s:%d/%s()!\n", \
@@ -130,7 +130,11 @@ void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added)
    vr->avail->idx += num_added;
 
    mb();
-   if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY))
-           vp_notify(ioaddr, vq->queue_index);
+   if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY)) {
+           if (ioaddr) {
+                   vp_notify(ioaddr, vq->queue_index);
+           } else {
+                   vpm_notify(vq);
+           }
+   }
 }
-
diff --git a/src/drivers/net/virtio-net.c b/src/drivers/net/virtio-net.c
index 533ccb0..10a9f71 100644
--- a/src/drivers/net/virtio-net.c
+++ b/src/drivers/net/virtio-net.c
@@ -30,8 +30,8 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 #include <ipxe/pci.h>
 #include <ipxe/if_ether.h>
 #include <ipxe/ethernet.h>
-#include <ipxe/virtio-ring.h>
 #include <ipxe/virtio-pci.h>
+#include <ipxe/virtio-ring.h>
 #include "virtio-net.h"
 
 /*
diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h
index 65b4d9c..2fb4898 100644
--- a/src/include/ipxe/errfile.h
+++ b/src/include/ipxe/errfile.h
@@ -345,6 +345,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 #define ERRFILE_efi_pxe		      ( ERRFILE_OTHER | 0x004a0000 )
 #define ERRFILE_efi_usb		      ( ERRFILE_OTHER | 0x004b0000 )
 #define ERRFILE_efi_fbcon	      ( ERRFILE_OTHER | 0x004c0000 )
+#define ERRFILE_virtio_pci	      ( ERRFILE_OTHER | 0x004d0000 )
 
 /** @} */
 
diff --git a/src/include/ipxe/virtio-pci.h b/src/include/ipxe/virtio-pci.h
index 58c9bf2..5f96d61 100644
--- a/src/include/ipxe/virtio-pci.h
+++ b/src/include/ipxe/virtio-pci.h
@@ -1,6 +1,8 @@
 #ifndef _VIRTIO_PCI_H_
 # define _VIRTIO_PCI_H_
 
+#include <byteswap.h>
+
 /* A 32-bit r/o bitmask of the features supported by the host */
 #define VIRTIO_PCI_HOST_FEATURES        0
 
@@ -92,6 +94,41 @@ struct virtio_pci_common_cfg {
     __le32 queue_used_hi;         /* read-write */
 } __attribute__((packed));
 
+#define COMMON_OFFSET(field) offsetof(struct virtio_pci_common_cfg, field)
+
+/* Virtio 1.0 PCI region descriptor. We support both port I/O and
+ * memory mapped I/O so base pointers need to be accompanied by
+ * flags to discriminate between the two cases. */
+struct virtio_pci_region {
+    void *base;
+    size_t length;
+
+/* This is a port I/O range */
+#define VIRTIO_PCI_REGION_PORT      0x00000001
+/* No unmap call needed */
+#define VIRTIO_PCI_REGION_NO_UNMAP  0x00000002
+    unsigned flags;
+};
+
+/* Virtio 1.0 device state */
+struct virtio_pci_modern_device {
+    struct pci_device *pci;
+
+    /* VIRTIO_PCI_CAP_COMMON_CFG data */
+    struct virtio_pci_region common;
+
+    /* VIRTIO_PCI_CAP_DEVICE_CFG data */
+    struct virtio_pci_region device;
+
+    /* VIRTIO_PCI_CAP_ISR_CFG data */
+    struct virtio_pci_region isr;
+
+    /* VIRTIO_PCI_CAP_NOTIFY_CFG data */
+    struct virtio_pci_region notify_base;
+    int notify_map_cap;
+    u32 notify_offset_multiplier;
+};
+
 static inline u32 vp_get_features(unsigned int ioaddr)
 {
    return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES);
@@ -151,6 +188,145 @@ static inline void vp_del_vq(unsigned int ioaddr, int queue_index)
    outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN);
 }
 
+struct vring_virtqueue;
+
 int vp_find_vq(unsigned int ioaddr, int queue_index,
                struct vring_virtqueue *vq);
+
+/* Virtio 1.0 I/O routines with both port I/O and memory I/O support
+ * and endianness conversions */
+
+static inline void vpm_iowrite8(struct virtio_pci_region *region,
+                                u8 data, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    outb(data, region->base + offset);
+  } else {
+    writeb(data, region->base + offset);
+  }
+}
+
+static inline void vpm_iowrite16(struct virtio_pci_region *region,
+                                 u16 data, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    outw(cpu_to_le16(data), region->base + offset);
+  } else {
+    writew(cpu_to_le16(data), region->base + offset);
+  }
+}
+
+static inline void vpm_iowrite32(struct virtio_pci_region *region,
+                                 u32 data, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    outl(cpu_to_le32(data), region->base + offset);
+  } else {
+    writel(cpu_to_le32(data), region->base + offset);
+  }
+}
+
+static inline void vpm_iowrite64(struct virtio_pci_region *region,
+                                 u64 data,
+                                 size_t offset_lo,
+                                 size_t offset_hi)
+{
+  vpm_iowrite32(region, (u32)data, offset_lo);
+  vpm_iowrite32(region, data >> 32, offset_hi);
+}
+
+static inline u8 vpm_ioread8(struct virtio_pci_region *region, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    return inb(region->base + offset);
+  } else {
+    return readb(region->base + offset);
+  }
+}
+
+static inline u16 vpm_ioread16(struct virtio_pci_region *region, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    return le16_to_cpu(inw(region->base + offset));
+  } else {
+    return le16_to_cpu(readw(region->base + offset));
+  }
+}
+
+static inline u32 vpm_ioread32(struct virtio_pci_region *region, size_t offset)
+{
+  if (region->flags & VIRTIO_PCI_REGION_PORT) {
+    return le32_to_cpu(inl(region->base + offset));
+  } else {
+    return le32_to_cpu(readl(region->base + offset));
+  }
+}
+
+/* Virtio 1.0 device manipulation routines */
+
+static inline void vpm_reset(struct virtio_pci_modern_device *vdev)
+{
+  vpm_iowrite8(&vdev->common, 0, COMMON_OFFSET(device_status));
+  (void)vpm_ioread8(&vdev->common, COMMON_OFFSET(device_status));
+}
+
+static inline void vpm_add_status(struct virtio_pci_modern_device *vdev,
+                                  u8 status)
+{
+  u8 curr_status = vpm_ioread8(&vdev->common, COMMON_OFFSET(device_status));
+  vpm_iowrite8(&vdev->common,
+               curr_status | status, COMMON_OFFSET(device_status));
+}
+
+static inline u64 vpm_get_features(struct virtio_pci_modern_device *vdev)
+{
+  u32 features_lo, features_hi;
+
+  vpm_iowrite32(&vdev->common, 0, COMMON_OFFSET(device_feature_select));
+  features_lo = vpm_ioread32(&vdev->common, COMMON_OFFSET(device_feature));
+  vpm_iowrite32(&vdev->common, 1, COMMON_OFFSET(device_feature_select));
+  features_hi = vpm_ioread32(&vdev->common, COMMON_OFFSET(device_feature));
+
+  return ((u64)features_hi << 32) | features_lo;
+}
+
+static inline void vpm_set_features(struct virtio_pci_modern_device *vdev,
+                                    u64 features)
+{
+  u32 features_lo = (u32)features;
+  u32 features_hi = features >> 32;
+
+  vpm_iowrite32(&vdev->common, 0, COMMON_OFFSET(guest_feature_select));
+  vpm_iowrite32(&vdev->common, features_lo, COMMON_OFFSET(guest_feature));
+  vpm_iowrite32(&vdev->common, 1, COMMON_OFFSET(guest_feature_select));
+  vpm_iowrite32(&vdev->common, features_hi, COMMON_OFFSET(guest_feature));
+}
+
+static inline void vpm_get(struct virtio_pci_modern_device *vdev,
+                           unsigned offset, void *buf, unsigned len)
+{
+  u8 *ptr = buf;
+  unsigned i;
+
+  for (i = 0; i < len; i++)
+    ptr[i] = vpm_ioread8(&vdev->device, offset + i);
+}
+
+static inline u8 vpm_get_isr(struct virtio_pci_modern_device *vdev)
+{
+  return vpm_ioread8(&vdev->isr, 0);
+}
+
+void vpm_notify(struct vring_virtqueue *vq);
+
+int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
+                 unsigned nvqs, struct vring_virtqueue *vqs);
+
+int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type);
+
+int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
+                              u32 align, u32 start, u32 size,
+                              struct virtio_pci_region *region);
+
+void virtio_pci_unmap_capability(struct virtio_pci_region *region);
 #endif /* _VIRTIO_PCI_H_ */
diff --git a/src/include/ipxe/virtio-ring.h b/src/include/ipxe/virtio-ring.h
index e44d13c..20a8570 100644
--- a/src/include/ipxe/virtio-ring.h
+++ b/src/include/ipxe/virtio-ring.h
@@ -79,6 +79,7 @@ struct vring_virtqueue {
    void *vdata[MAX_QUEUE_NUM];
    /* PCI */
    int queue_index;
+   struct virtio_pci_region notification;
 };
 
 struct vring_list {
-- 
2.5.0




More information about the ipxe-devel mailing list