[ipxe-devel] [PATCH 3/4] [virtio] Add virtio 1.0 PCI support
Ladi Prosek
lprosek at redhat.com
Thu Mar 10 14:41:27 UTC 2016
On Thu, Mar 10, 2016 at 2:52 PM, Michael S. Tsirkin <mst at redhat.com> wrote:
> On Wed, Mar 09, 2016 at 07:20:02PM +0100, Ladi Prosek wrote:
>> This commit adds support for driving virtio 1.0 PCI devices.
>> In addition to various helpers, a number of vpm_ functions are
>> introduced to be used instead of their legacy vp_ counterparts
>> when accessing virtio 1.0 (aka modern) devices.
>>
>> Signed-off-by: Ladi Prosek <lprosek at redhat.com>
>> ---
>> src/drivers/bus/virtio-pci.c | 213 ++++++++++++++++++++++++++++++++++++++++-
>> src/drivers/bus/virtio-ring.c | 12 ++-
>> src/drivers/net/virtio-net.c | 2 +-
>> src/include/ipxe/errfile.h | 1 +
>> src/include/ipxe/virtio-pci.h | 176 ++++++++++++++++++++++++++++++++++
>> src/include/ipxe/virtio-ring.h | 1 +
>> 6 files changed, 396 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/drivers/bus/virtio-pci.c b/src/drivers/bus/virtio-pci.c
>> index fbef067..cfa321b 100644
>> --- a/src/drivers/bus/virtio-pci.c
>> +++ b/src/drivers/bus/virtio-pci.c
>> @@ -11,10 +11,13 @@
>> *
>> */
>>
>> +#include "errno.h"
>> #include "etherboot.h"
>> #include "ipxe/io.h"
>> -#include "ipxe/virtio-ring.h"
>> +#include "ipxe/iomap.h"
>> +#include "ipxe/pci.h"
>> #include "ipxe/virtio-pci.h"
>> +#include "ipxe/virtio-ring.h"
>>
>> int vp_find_vq(unsigned int ioaddr, int queue_index,
>> struct vring_virtqueue *vq)
>> @@ -30,19 +33,19 @@ int vp_find_vq(unsigned int ioaddr, int queue_index,
>>
>> num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM);
>> if (!num) {
>> - printf("ERROR: queue size is 0\n");
>> + DBG("VIRTIO-PCI ERROR: queue size is 0\n");
>> return -1;
>> }
>>
>> if (num > MAX_QUEUE_NUM) {
>> - printf("ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM);
>> + DBG("VIRTIO-PCI ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM);
>> return -1;
>> }
>>
>> /* check if the queue is already active */
>>
>> if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) {
>> - printf("ERROR: queue already active\n");
>> + DBG("VIRTIO-PCI ERROR: queue already active\n");
>> return -1;
>> }
>>
>> @@ -62,3 +65,205 @@ int vp_find_vq(unsigned int ioaddr, int queue_index,
>>
>> return num;
>> }
>> +
>> +int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type)
>> +{
>> + int pos;
>> + uint8_t type, bar;
>> +
>> + for (pos = pci_find_capability(pci, PCI_CAP_ID_VNDR);
>> + pos > 0;
>> + pos = pci_find_next_capability(pci, pos, PCI_CAP_ID_VNDR)) {
>> +
>> + pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
>> + cfg_type), &type);
>> + pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
>> + bar), &bar);
>> +
>> + /* Ignore structures with reserved BAR values */
>> + if (bar > 0x5) {
>> + continue;
>> + }
>> +
>> + if (type == cfg_type) {
>> + return pos;
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> +int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
>> + u32 align, u32 start, u32 size,
>> + struct virtio_pci_region *region)
>> +{
>> + u8 bar;
>> + u32 offset, length, base_raw;
>> + unsigned long base;
>> +
>> + pci_read_config_byte(pci, cap + offsetof(struct virtio_pci_cap, bar), &bar);
>> + pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, offset),
>> + &offset);
>> + pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, length),
>> + &length);
>> +
>> + if (length <= start) {
>> + DBG("VIRTIO-PCI bad capability len %u (>%u expected)\n", length, start);
>> + return -EINVAL;
>> + }
>> + if (length - start < minlen) {
>> + DBG("VIRTIO-PCI bad capability len %u (>=%zu expected)\n", length, minlen);
>> + return -EINVAL;
>> + }
>> + length -= start;
>> + if (start + offset < offset) {
>> + DBG("VIRTIO-PCI map wrap-around %u+%u\n", start, offset);
>> + return -EINVAL;
>> + }
>> + offset += start;
>> + if (offset & (align - 1)) {
>> + DBG("VIRTIO-PCI offset %u not aligned to %u\n", offset, align);
>> + return -EINVAL;
>> + }
>> + if (length > size) {
>> + length = size;
>> + }
>> +
>> + region->length = length;
>> + if (minlen + offset < minlen ||
>> + minlen + offset > pci_bar_size(pci, PCI_BASE_ADDRESS(bar))) {
>> + DBG("VIRTIO-PCI map virtio %zu@%u out of range on bar %i length %lu\n",
>> + minlen, offset,
>> + bar, (unsigned long)pci_bar_size(pci, PCI_BASE_ADDRESS(bar)));
>> + return -EINVAL;
>> + }
>> +
>> + base = pci_bar_start(pci, PCI_BASE_ADDRESS(bar));
>> + pci_read_config_dword(pci, PCI_BASE_ADDRESS(bar), &base_raw);
>> +
>> + if (base_raw & PCI_BASE_ADDRESS_SPACE_IO) {
>> + region->base = (void *)(base + offset);
>> + region->flags = VIRTIO_PCI_REGION_PORT;
>> + } else {
>> + region->base = ioremap(base + offset, length);
>> + region->flags = 0;
>> + }
>> + if (!region->base) {
>> + DBG("VIRTIO-PCI unable to map virtio %u@%u on bar %i\n",
>> + length, offset, bar);
>> + }
>> + return 0;
>> +}
>> +
>> +void virtio_pci_unmap_capability(struct virtio_pci_region *region)
>> +{
>> + if (region->base) {
>> + if ((region->flags &
>> + (VIRTIO_PCI_REGION_PORT | VIRTIO_PCI_REGION_NO_UNMAP)) == 0) {
>> + iounmap(region->base);
>> + }
>> + }
>> +}
>> +
>> +void vpm_notify(struct vring_virtqueue *vq)
>> +{
>> + vpm_iowrite16(&vq->notification, (u16)vq->queue_index, 0);
>> +}
>> +
>> +int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
>> + unsigned nvqs, struct vring_virtqueue *vqs)
>> +{
>> + unsigned i;
>> + struct vring_virtqueue *vq;
>> + u16 size, off;
>> + int err;
>> +
>> + if (nvqs > vpm_ioread16(&vdev->common, COMMON_OFFSET(num_queues))) {
>> + return -ENOENT;
>> + }
>> +
>> + for (i = 0; i < nvqs; i++) {
>> + /* Select the queue we're interested in */
>> + vpm_iowrite16(&vdev->common, (u16)i, COMMON_OFFSET(queue_select));
>> +
>> + /* Check if queue is either not available or already active. */
>> + size = vpm_ioread16(&vdev->common, COMMON_OFFSET(queue_size));
>> + /* QEMU has a bug where queues don't revert to inactive on device
>> + * reset. Skip checking the queue_enable field until it is fixed.
>> + */
>> + if (!size /*|| vpm_ioread16(&vdev->common.queue_enable)*/)
>> + return -ENOENT;
>> +
>> + if (size & (size - 1)) {
>> + DBG("VIRTIO-PCI %p: bad queue size %u", vdev, size);
>> + return -EINVAL;
>> + }
>> +
>> + vq = &vqs[i];
>> + vq->queue_index = i;
>> +
>> + /* get offset of notification word for this vq */
>> + off = vpm_ioread16(&vdev->common, COMMON_OFFSET(queue_notify_off));
>> + vq->vring.num = size;
>> +
>> + vring_init(&vq->vring, size, (unsigned char *)vq->queue);
>> +
>> + /* activate the queue */
>> + vpm_iowrite16(&vdev->common, size, COMMON_OFFSET(queue_size));
>> +
>> + vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.desc),
>> + COMMON_OFFSET(queue_desc_lo),
>> + COMMON_OFFSET(queue_desc_hi));
>> + vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.avail),
>> + COMMON_OFFSET(queue_avail_lo),
>> + COMMON_OFFSET(queue_avail_hi));
>> + vpm_iowrite64(&vdev->common, virt_to_phys(vq->vring.used),
>> + COMMON_OFFSET(queue_used_lo),
>> + COMMON_OFFSET(queue_used_hi));
>> +
>> + if (vdev->notify_base.base) {
>> + /* offset should not wrap */
>> + if ((u64)off * vdev->notify_offset_multiplier + 2
>> + > vdev->notify_base.length) {
>> + DBG("VIRTIO-PCI %p: bad notification offset %u (x %u)"
>> + "for queue %u > %zd",
>> + vdev,
>> + off, vdev->notify_offset_multiplier,
>> + i, vdev->notify_base.length);
>> + err = -EINVAL;
>> + goto err_map_notify;
>> + }
>> + vq->notification.base = (void *)(vdev->notify_base.base +
>> + off * vdev->notify_offset_multiplier);
>> + vq->notification.length = 2;
>> + vq->notification.flags = VIRTIO_PCI_REGION_NO_UNMAP;
>> + }
>
> where is notify_base initialized? I couldn't find it anywhere.
> Is this branch dead code?
It's initialized in virtnet_probe_modern in virtio-net.c, in the last
patch (4/4).
> I think it's easier to always map each queue deparately.
>
>> + else {
>
> should be:
>
> } else {
>
Will fix, thanks!
>> + err = virtio_pci_map_capability(vdev->pci,
>> + vdev->notify_map_cap, 2, 2,
>> + off * vdev->notify_offset_multiplier, 2,
>> + &vq->notification);
>> + }
>> +
>> + if (err) {
>> + goto err_map_notify;
>> + }
>> + }
>> +
>> + /* Select and activate all queues. Has to be done last: once we do
>> + * this, there's no way to go back except reset.
>> + */
>> + for (i = 0; i < nvqs; i++) {
>> + vq = &vqs[i];
>> + vpm_iowrite16(&vdev->common, (u16)vq->queue_index,
>> + COMMON_OFFSET(queue_select));
>> + vpm_iowrite16(&vdev->common, 1, COMMON_OFFSET(queue_enable));
>> + }
>> + return 0;
>> +
>> +err_map_notify:
>> + /* Undo the virtio_pci_map_capability calls. */
>> + while (i-- > 0) {
>> + virtio_pci_unmap_capability(&vqs[i].notification);
>> + }
>> + return err;
>> +}
>> diff --git a/src/drivers/bus/virtio-ring.c b/src/drivers/bus/virtio-ring.c
>> index e55b6d0..93e2573 100644
>> --- a/src/drivers/bus/virtio-ring.c
>> +++ b/src/drivers/bus/virtio-ring.c
>> @@ -18,8 +18,8 @@ FILE_LICENCE ( GPL2_OR_LATER );
>>
>> #include "etherboot.h"
>> #include "ipxe/io.h"
>> -#include "ipxe/virtio-ring.h"
>> #include "ipxe/virtio-pci.h"
>> +#include "ipxe/virtio-ring.h"
>>
>> #define BUG() do { \
>> printf("BUG: failure at %s:%d/%s()!\n", \
>> @@ -130,7 +130,11 @@ void vring_kick(unsigned int ioaddr, struct vring_virtqueue *vq, int num_added)
>> vr->avail->idx += num_added;
>>
>> mb();
>> - if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY))
>> - vp_notify(ioaddr, vq->queue_index);
>> + if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY)) {
>> + if (ioaddr) {
>> + vp_notify(ioaddr, vq->queue_index);
>> + } else {
>> + vpm_notify(vq);
>> + }
>> + }
>> }
>> -
>> diff --git a/src/drivers/net/virtio-net.c b/src/drivers/net/virtio-net.c
>> index 533ccb0..10a9f71 100644
>> --- a/src/drivers/net/virtio-net.c
>> +++ b/src/drivers/net/virtio-net.c
>> @@ -30,8 +30,8 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
>> #include <ipxe/pci.h>
>> #include <ipxe/if_ether.h>
>> #include <ipxe/ethernet.h>
>> -#include <ipxe/virtio-ring.h>
>> #include <ipxe/virtio-pci.h>
>> +#include <ipxe/virtio-ring.h>
>> #include "virtio-net.h"
>>
>> /*
>> diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h
>> index 65b4d9c..2fb4898 100644
>> --- a/src/include/ipxe/errfile.h
>> +++ b/src/include/ipxe/errfile.h
>> @@ -345,6 +345,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
>> #define ERRFILE_efi_pxe ( ERRFILE_OTHER | 0x004a0000 )
>> #define ERRFILE_efi_usb ( ERRFILE_OTHER | 0x004b0000 )
>> #define ERRFILE_efi_fbcon ( ERRFILE_OTHER | 0x004c0000 )
>> +#define ERRFILE_virtio_pci ( ERRFILE_OTHER | 0x004d0000 )
>>
>> /** @} */
>>
>> diff --git a/src/include/ipxe/virtio-pci.h b/src/include/ipxe/virtio-pci.h
>> index 58c9bf2..5f96d61 100644
>> --- a/src/include/ipxe/virtio-pci.h
>> +++ b/src/include/ipxe/virtio-pci.h
>> @@ -1,6 +1,8 @@
>> #ifndef _VIRTIO_PCI_H_
>> # define _VIRTIO_PCI_H_
>>
>> +#include <byteswap.h>
>> +
>> /* A 32-bit r/o bitmask of the features supported by the host */
>> #define VIRTIO_PCI_HOST_FEATURES 0
>>
>> @@ -92,6 +94,41 @@ struct virtio_pci_common_cfg {
>> __le32 queue_used_hi; /* read-write */
>> } __attribute__((packed));
>>
>> +#define COMMON_OFFSET(field) offsetof(struct virtio_pci_common_cfg, field)
>> +
>> +/* Virtio 1.0 PCI region descriptor. We support both port I/O and
>> + * memory mapped I/O so base pointers need to be accompanied by
>> + * flags to discriminate between the two cases. */
>> +struct virtio_pci_region {
>> + void *base;
>> + size_t length;
>> +
>> +/* This is a port I/O range */
>> +#define VIRTIO_PCI_REGION_PORT 0x00000001
>> +/* No unmap call needed */
>> +#define VIRTIO_PCI_REGION_NO_UNMAP 0x00000002
>> + unsigned flags;
>> +};
>> +
>> +/* Virtio 1.0 device state */
>> +struct virtio_pci_modern_device {
>> + struct pci_device *pci;
>> +
>> + /* VIRTIO_PCI_CAP_COMMON_CFG data */
>> + struct virtio_pci_region common;
>> +
>> + /* VIRTIO_PCI_CAP_DEVICE_CFG data */
>> + struct virtio_pci_region device;
>> +
>> + /* VIRTIO_PCI_CAP_ISR_CFG data */
>> + struct virtio_pci_region isr;
>> +
>> + /* VIRTIO_PCI_CAP_NOTIFY_CFG data */
>> + struct virtio_pci_region notify_base;
>> + int notify_map_cap;
>> + u32 notify_offset_multiplier;
>> +};
>> +
>> static inline u32 vp_get_features(unsigned int ioaddr)
>> {
>> return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES);
>> @@ -151,6 +188,145 @@ static inline void vp_del_vq(unsigned int ioaddr, int queue_index)
>> outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN);
>> }
>>
>> +struct vring_virtqueue;
>> +
>> int vp_find_vq(unsigned int ioaddr, int queue_index,
>> struct vring_virtqueue *vq);
>> +
>> +/* Virtio 1.0 I/O routines with both port I/O and memory I/O support
>> + * and endianness conversions */
>> +
>> +static inline void vpm_iowrite8(struct virtio_pci_region *region,
>> + u8 data, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + outb(data, region->base + offset);
>> + } else {
>> + writeb(data, region->base + offset);
>> + }
>> +}
>> +
>> +static inline void vpm_iowrite16(struct virtio_pci_region *region,
>> + u16 data, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + outw(cpu_to_le16(data), region->base + offset);
>> + } else {
>> + writew(cpu_to_le16(data), region->base + offset);
>> + }
>> +}
>> +
>> +static inline void vpm_iowrite32(struct virtio_pci_region *region,
>> + u32 data, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + outl(cpu_to_le32(data), region->base + offset);
>> + } else {
>> + writel(cpu_to_le32(data), region->base + offset);
>> + }
>> +}
>> +
>> +static inline void vpm_iowrite64(struct virtio_pci_region *region,
>> + u64 data,
>> + size_t offset_lo,
>> + size_t offset_hi)
>> +{
>> + vpm_iowrite32(region, (u32)data, offset_lo);
>> + vpm_iowrite32(region, data >> 32, offset_hi);
>> +}
>> +
>> +static inline u8 vpm_ioread8(struct virtio_pci_region *region, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + return inb(region->base + offset);
>> + } else {
>> + return readb(region->base + offset);
>> + }
>> +}
>> +
>> +static inline u16 vpm_ioread16(struct virtio_pci_region *region, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + return le16_to_cpu(inw(region->base + offset));
>> + } else {
>> + return le16_to_cpu(readw(region->base + offset));
>> + }
>> +}
>> +
>> +static inline u32 vpm_ioread32(struct virtio_pci_region *region, size_t offset)
>> +{
>> + if (region->flags & VIRTIO_PCI_REGION_PORT) {
>> + return le32_to_cpu(inl(region->base + offset));
>> + } else {
>> + return le32_to_cpu(readl(region->base + offset));
>> + }
>> +}
>> +
>> +/* Virtio 1.0 device manipulation routines */
>> +
>> +static inline void vpm_reset(struct virtio_pci_modern_device *vdev)
>> +{
>> + vpm_iowrite8(&vdev->common, 0, COMMON_OFFSET(device_status));
>> + (void)vpm_ioread8(&vdev->common, COMMON_OFFSET(device_status));
>> +}
>> +
>> +static inline void vpm_add_status(struct virtio_pci_modern_device *vdev,
>> + u8 status)
>> +{
>> + u8 curr_status = vpm_ioread8(&vdev->common, COMMON_OFFSET(device_status));
>> + vpm_iowrite8(&vdev->common,
>> + curr_status | status, COMMON_OFFSET(device_status));
>> +}
>> +
>> +static inline u64 vpm_get_features(struct virtio_pci_modern_device *vdev)
>> +{
>> + u32 features_lo, features_hi;
>> +
>> + vpm_iowrite32(&vdev->common, 0, COMMON_OFFSET(device_feature_select));
>> + features_lo = vpm_ioread32(&vdev->common, COMMON_OFFSET(device_feature));
>> + vpm_iowrite32(&vdev->common, 1, COMMON_OFFSET(device_feature_select));
>> + features_hi = vpm_ioread32(&vdev->common, COMMON_OFFSET(device_feature));
>> +
>> + return ((u64)features_hi << 32) | features_lo;
>> +}
>> +
>> +static inline void vpm_set_features(struct virtio_pci_modern_device *vdev,
>> + u64 features)
>> +{
>> + u32 features_lo = (u32)features;
>> + u32 features_hi = features >> 32;
>> +
>> + vpm_iowrite32(&vdev->common, 0, COMMON_OFFSET(guest_feature_select));
>> + vpm_iowrite32(&vdev->common, features_lo, COMMON_OFFSET(guest_feature));
>> + vpm_iowrite32(&vdev->common, 1, COMMON_OFFSET(guest_feature_select));
>> + vpm_iowrite32(&vdev->common, features_hi, COMMON_OFFSET(guest_feature));
>> +}
>> +
>> +static inline void vpm_get(struct virtio_pci_modern_device *vdev,
>> + unsigned offset, void *buf, unsigned len)
>> +{
>> + u8 *ptr = buf;
>> + unsigned i;
>> +
>> + for (i = 0; i < len; i++)
>> + ptr[i] = vpm_ioread8(&vdev->device, offset + i);
>> +}
>> +
>> +static inline u8 vpm_get_isr(struct virtio_pci_modern_device *vdev)
>> +{
>> + return vpm_ioread8(&vdev->isr, 0);
>> +}
>> +
>> +void vpm_notify(struct vring_virtqueue *vq);
>> +
>> +int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
>> + unsigned nvqs, struct vring_virtqueue *vqs);
>> +
>> +int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type);
>> +
>> +int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
>> + u32 align, u32 start, u32 size,
>> + struct virtio_pci_region *region);
>> +
>> +void virtio_pci_unmap_capability(struct virtio_pci_region *region);
>> #endif /* _VIRTIO_PCI_H_ */
>> diff --git a/src/include/ipxe/virtio-ring.h b/src/include/ipxe/virtio-ring.h
>> index e44d13c..20a8570 100644
>> --- a/src/include/ipxe/virtio-ring.h
>> +++ b/src/include/ipxe/virtio-ring.h
>> @@ -79,6 +79,7 @@ struct vring_virtqueue {
>> void *vdata[MAX_QUEUE_NUM];
>> /* PCI */
>> int queue_index;
>> + struct virtio_pci_region notification;
>> };
>>
>> struct vring_list {
>> --
>> 2.5.0
More information about the ipxe-devel
mailing list