[ipxe-devel] [PATCH ipxe v2 4/4] [af_packet] new AF_PACKET driver for Linux
David Decotigny
ddecotig at gmail.com
Tue Dec 20 23:13:30 UTC 2016
This code largely inspired from tap.c.
Allows to test ipxe on real NICs from Linux. Example:
make -j -C src bin-x86_64-linux/af_packet.linux
valgrind ./af_packet.linux --net af_packet,if=eth3 --settings uuid=$(uuidgen)
Tested as x86_64 and i386 binary.
---
src/arch/x86/core/linux/linux_api.c | 39 +++++
src/drivers/linux/af_packet.c | 325 ++++++++++++++++++++++++++++++++++++
src/include/ipxe/errfile.h | 1 +
src/include/linux_api.h | 7 +
4 files changed, 372 insertions(+)
create mode 100644 src/drivers/linux/af_packet.c
diff --git a/src/arch/x86/core/linux/linux_api.c b/src/arch/x86/core/linux/linux_api.c
index 0bed9fd..17b1f3f 100644
--- a/src/arch/x86/core/linux/linux_api.c
+++ b/src/arch/x86/core/linux/linux_api.c
@@ -108,3 +108,42 @@ void * linux_mremap ( void *old_address, __kernel_size_t old_size,
int linux_munmap ( void *addr, __kernel_size_t length ) {
return linux_syscall ( __NR_munmap, addr, length );
}
+
+int linux_socket ( int domain, int type_, int protocol ) {
+#ifdef __NR_socket
+ return linux_syscall ( __NR_socket, domain, type_, protocol );
+#else
+#ifndef SOCKOP_socket
+# define SOCKOP_socket 1
+#endif
+ unsigned long sc_args[] = { domain, type_, protocol };
+ return linux_syscall ( __NR_socketcall, SOCKOP_socket, sc_args );
+#endif
+}
+
+int linux_bind ( int fd, const struct sockaddr *addr, socklen_t addrlen ) {
+#ifdef __NR_bind
+ return linux_syscall ( __NR_bind, fd, addr, addrlen );
+#else
+#ifndef SOCKOP_bind
+# define SOCKOP_bind 2
+#endif
+ unsigned long sc_args[] = { fd, (unsigned long)addr, addrlen };
+ return linux_syscall ( __NR_socketcall, SOCKOP_bind, sc_args );
+#endif
+}
+
+ssize_t linux_sendto ( int fd, const void *buf, size_t len, int flags,
+ const struct sockaddr *daddr, socklen_t addrlen ) {
+#ifdef __NR_sendto
+ return linux_syscall ( __NR_sendto, fd, buf, len, flags,
+ daddr, addrlen );
+#else
+#ifndef SOCKOP_sendto
+# define SOCKOP_sendto 11
+#endif
+ unsigned long sc_args[] = { fd, (unsigned long)buf, len,
+ flags, (unsigned long)daddr, addrlen };
+ return linux_syscall ( __NR_socketcall, SOCKOP_sendto, sc_args );
+#endif
+}
diff --git a/src/drivers/linux/af_packet.c b/src/drivers/linux/af_packet.c
new file mode 100644
index 0000000..1622c8c
--- /dev/null
+++ b/src/drivers/linux/af_packet.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2016 David Decotigny <ddecotig at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <linux_api.h>
+#include <ipxe/list.h>
+#include <ipxe/linux.h>
+#include <ipxe/malloc.h>
+#include <ipxe/device.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/ethernet.h>
+#include <ipxe/settings.h>
+#include <ipxe/socket.h>
+
+/* This hack prevents pre-2.6.32 headers from redefining struct sockaddr */
+#define __GLIBC__ 2
+#include <linux/socket.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#undef __GLIBC__
+#include <byteswap.h>
+
+/* linux-specifc syscall params */
+#define LINUX_AF_PACKET 17
+#define LINUX_SOCK_RAW 3
+#define LINUX_SIOCGIFINDEX 0x8933
+#define LINUX_SIOCGIFHWADDR 0x8927
+
+#define RX_BUF_SIZE 1536
+
+/** @file
+ *
+ * The AF_PACKET driver.
+ *
+ * Bind to an existing linux network interface.
+ */
+
+struct af_packet_nic {
+ /** Linux network interface name */
+ char * ifname;
+ /** Packet socket descriptor */
+ int fd;
+ /** ifindex */
+ int ifindex;
+};
+
+/** Open the linux interface */
+static int af_packet_nic_open ( struct net_device * netdev )
+{
+ struct af_packet_nic * nic = netdev->priv;
+ struct sockaddr_ll socket_address;
+ struct ifreq if_data;
+ int ret;
+
+ nic->fd = linux_socket(LINUX_AF_PACKET, LINUX_SOCK_RAW,
+ htons(ETH_P_ALL));
+ if (nic->fd < 0) {
+ DBGC(nic, "af_packet %p socket(AF_PACKET) = %d (%s)\n",
+ nic, nic->fd, linux_strerror(linux_errno));
+ return nic->fd;
+ }
+
+ /* resolve ifindex of ifname */
+ memset(&if_data, 0, sizeof(if_data));
+ strncpy(if_data.ifr_name, nic->ifname, sizeof(if_data.ifr_name));
+ ret = linux_ioctl(nic->fd, LINUX_SIOCGIFINDEX, &if_data);
+ if (ret < 0) {
+ DBGC(nic, "af_packet %p ioctl(SIOCGIFINDEX) = %d (%s)\n",
+ nic, ret, linux_strerror(linux_errno));
+ linux_close(nic->fd);
+ return ret;
+ }
+
+ nic->ifindex = if_data.ifr_ifindex;
+
+ /* bind to interface */
+ memset(&socket_address, 0, sizeof(socket_address));
+ socket_address.sll_family = LINUX_AF_PACKET;
+ socket_address.sll_ifindex = nic->ifindex;
+ socket_address.sll_protocol = htons(ETH_P_ALL);
+ ret = linux_bind(nic->fd, (void *) &socket_address,
+ sizeof(socket_address));
+ if (ret == -1) {
+ DBGC(nic, "af_packet %p bind() = %d (%s)\n",
+ nic, ret, linux_strerror(linux_errno));
+ linux_close(nic->fd);
+ return ret;
+ }
+
+ /* Set nonblocking mode to make af_packet_nic_poll() easier */
+ ret = linux_fcntl(nic->fd, F_SETFL, O_NONBLOCK);
+ if (ret != 0) {
+ DBGC(nic, "af_packet %p fcntl(%d, ...) = %d (%s)\n",
+ nic, nic->fd, ret, linux_strerror(linux_errno));
+ linux_close(nic->fd);
+ return ret;
+ }
+
+ return 0;
+}
+
+/** Close the packet socket */
+static void af_packet_nic_close ( struct net_device *netdev )
+{
+ struct af_packet_nic * nic = netdev->priv;
+ linux_close(nic->fd);
+}
+
+/**
+ * Transmit an ethernet packet.
+ *
+ * The packet can be written to the socket and marked as complete immediately.
+ */
+static int af_packet_nic_transmit ( struct net_device *netdev,
+ struct io_buffer *iobuf )
+{
+ struct af_packet_nic * nic = netdev->priv;
+ struct sockaddr_ll socket_address;
+ const struct ethhdr * eh;
+ int rc;
+
+ memset(&socket_address, 0, sizeof(socket_address));
+ socket_address.sll_family = LINUX_AF_PACKET;
+ socket_address.sll_ifindex = nic->ifindex;
+ socket_address.sll_halen = ETH_ALEN;
+
+ eh = iobuf->data;
+ memcpy(socket_address.sll_addr, eh->h_dest, ETH_ALEN);
+
+ rc = linux_sendto(nic->fd, iobuf->data, iobuf->tail - iobuf->data,
+ 0, (struct sockaddr *)&socket_address,
+ sizeof(socket_address));
+
+ DBGC2(nic, "af_packet %p wrote %d bytes\n", nic, rc);
+ netdev_tx_complete(netdev, iobuf);
+
+ return 0;
+}
+
+/** Poll for new packets */
+static void af_packet_nic_poll ( struct net_device *netdev )
+{
+ struct af_packet_nic * nic = netdev->priv;
+ struct pollfd pfd;
+ struct io_buffer * iobuf;
+ int r;
+
+ pfd.fd = nic->fd;
+ pfd.events = POLLIN;
+ if (linux_poll(&pfd, 1, 0) == -1) {
+ DBGC(nic, "af_packet %p poll failed (%s)\n",
+ nic, linux_strerror(linux_errno));
+ return;
+ }
+ if ((pfd.revents & POLLIN) == 0)
+ return;
+
+ /* At this point we know there is at least one new packet to be read */
+
+ iobuf = alloc_iob(RX_BUF_SIZE);
+ if (! iobuf)
+ goto allocfail;
+
+ while ((r = linux_read(nic->fd, iobuf->data, RX_BUF_SIZE)) > 0) {
+ DBGC2(nic, "af_packet %p read %d bytes\n", nic, r);
+
+ iob_put(iobuf, r);
+ netdev_rx(netdev, iobuf);
+
+ iobuf = alloc_iob(RX_BUF_SIZE);
+ if (! iobuf)
+ goto allocfail;
+ }
+
+ free_iob(iobuf);
+ return;
+
+allocfail:
+ DBGC(nic, "af_packet %p alloc_iob failed\n", nic);
+}
+
+/**
+ * Set irq.
+ *
+ * Not used on linux, provide a dummy implementation.
+ */
+static void af_packet_nic_irq ( struct net_device *netdev, int enable )
+{
+ struct af_packet_nic *nic = netdev->priv;
+
+ DBGC(nic, "af_packet %p irq enable = %d\n", nic, enable);
+}
+
+
+static int af_packet_update_properties ( struct net_device *netdev )
+{
+ struct af_packet_nic *nic = netdev->priv;
+ struct ifreq if_data;
+ int ret;
+
+ /* retrieve default MAC address */
+ int fd = linux_socket(LINUX_AF_PACKET, LINUX_SOCK_RAW, 0);
+ if (fd < 0) {
+ DBGC(nic, "af_packet %p cannot create raw socket (%s)\n",
+ nic, linux_strerror(linux_errno));
+ return fd;
+ }
+
+ /* retrieve host's MAC address */
+ memset(&if_data, 0, sizeof(if_data));
+ strncpy(if_data.ifr_name, nic->ifname, sizeof(if_data.ifr_name));
+ ret = linux_ioctl(fd, LINUX_SIOCGIFHWADDR, &if_data);
+ if (ret < 0) {
+ DBGC(nic, "af_packet %p cannot get mac addr (%s)\n",
+ nic, linux_strerror(linux_errno));
+ linux_close(fd);
+ return ret;
+ }
+
+ linux_close(fd);
+ /* struct sockaddr = { u16 family, u8 pad[14] (equiv. sa_data) }; */
+ memcpy(netdev->ll_addr, if_data.ifr_hwaddr.pad, ETH_ALEN);
+ return 0;
+}
+
+/** AF_PACKET operations */
+static struct net_device_operations af_packet_nic_operations = {
+ .open = af_packet_nic_open,
+ .close = af_packet_nic_close,
+ .transmit = af_packet_nic_transmit,
+ .poll = af_packet_nic_poll,
+ .irq = af_packet_nic_irq,
+};
+
+/** Handle a device request for the af_packet driver */
+static int af_packet_nic_probe ( struct linux_device *device,
+ struct linux_device_request *request )
+{
+ struct linux_setting *if_setting;
+ struct net_device *netdev;
+ struct af_packet_nic *nic;
+ int rc;
+
+ netdev = alloc_etherdev(sizeof(*nic));
+ if (! netdev)
+ return -ENOMEM;
+
+ netdev_init(netdev, &af_packet_nic_operations);
+ nic = netdev->priv;
+ linux_set_drvdata(device, netdev);
+ netdev->dev = &device->dev;
+
+ memset(nic, 0, sizeof(*nic));
+
+ /* Look for the mandatory if setting */
+ if_setting = linux_find_setting("if", &request->settings);
+
+ /* No if setting */
+ if (! if_setting) {
+ printf("af_packet missing a mandatory if setting\n");
+ rc = -EINVAL;
+ goto err_settings;
+ }
+
+ nic->ifname = if_setting->value;
+ snprintf ( device->dev.name, sizeof ( device->dev.name ), "%s",
+ nic->ifname );
+ device->dev.desc.bus_type = BUS_TYPE_TAP;
+ af_packet_update_properties(netdev);
+ if_setting->applied = 1;
+
+ /* Apply rest of the settings */
+ linux_apply_settings(&request->settings, &netdev->settings.settings);
+
+ /* Register network device */
+ if ((rc = register_netdev(netdev)) != 0)
+ goto err_register;
+
+ netdev_link_up(netdev);
+
+ return 0;
+
+err_settings:
+ unregister_netdev(netdev);
+err_register:
+ netdev_nullify(netdev);
+ netdev_put(netdev);
+ return rc;
+}
+
+/** Remove the device */
+static void af_packet_nic_remove ( struct linux_device *device )
+{
+ struct net_device *netdev = linux_get_drvdata(device);
+ unregister_netdev(netdev);
+ netdev_nullify(netdev);
+ netdev_put(netdev);
+}
+
+/** AF_PACKET linux_driver */
+struct linux_driver af_packet_nic_driver __linux_driver = {
+ .name = "af_packet",
+ .probe = af_packet_nic_probe,
+ .remove = af_packet_nic_remove,
+ .can_probe = 1,
+};
diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h
index d0b93d0..1a037b1 100644
--- a/src/include/ipxe/errfile.h
+++ b/src/include/ipxe/errfile.h
@@ -194,6 +194,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#define ERRFILE_pciea ( ERRFILE_DRIVER | 0x00c00000 )
#define ERRFILE_axge ( ERRFILE_DRIVER | 0x00c10000 )
#define ERRFILE_thunderx ( ERRFILE_DRIVER | 0x00c20000 )
+#define ERRFILE_af_packet ( ERRFILE_DRIVER | 0x00c30000 )
#define ERRFILE_aoe ( ERRFILE_NET | 0x00000000 )
#define ERRFILE_arp ( ERRFILE_NET | 0x00010000 )
diff --git a/src/include/linux_api.h b/src/include/linux_api.h
index 28a3cda..fe9fa91 100644
--- a/src/include/linux_api.h
+++ b/src/include/linux_api.h
@@ -46,6 +46,8 @@ typedef __kernel_loff_t loff_t;
#include <linux/poll.h>
typedef unsigned long nfds_t;
typedef uint32_t useconds_t;
+typedef uint32_t socklen_t;
+struct sockaddr;
#define MAP_FAILED ( ( void * ) -1 )
#define SEEK_SET 0
@@ -68,6 +70,11 @@ extern void * linux_mmap ( void *addr, __kernel_size_t length, int prot,
extern void * linux_mremap ( void *old_address, __kernel_size_t old_size,
__kernel_size_t new_size, int flags );
extern int linux_munmap ( void *addr, __kernel_size_t length );
+extern int linux_socket ( int domain, int type_, int protocol );
+extern int linux_bind ( int fd, const struct sockaddr *addr,
+ socklen_t addrlen );
+extern ssize_t linux_sendto ( int fd, const void *buf, size_t len, int flags,
+ const struct sockaddr *daddr, socklen_t addrlen );
extern const char * linux_strerror ( int errnum );
--
2.8.0.rc3.226.g39d4020
More information about the ipxe-devel
mailing list