[ovs-dev] [netdev v5 1/3] netdev: Add new "struct netdev_rx" for capturing packets from a netdev.
Ethan Jackson
ethan at nicira.com
Fri May 10 20:30:48 UTC 2013
You may disagree, but I think 'renaming it "struct netdev"' sounds
better than 'renaming it top "struct netdev"'
In netdev_bsd_rx_open(), should we call netdev_dev_bsd_changed() for
both tap devices and non tap devices?
This was pre-existing, but the call to pcap_dispatch() in
netdev_rx_bsd_recv_pcap() needs a space after the cast.
Acked-by: Ethan Jackson <ethan at nicira.com>
On Fri, May 10, 2013 at 11:50 AM, Ben Pfaff <blp at nicira.com> wrote:
> Separating packet capture from "struct netdev" means that there is no
> remaining per-"struct netdev" state, which will allow us to get rid of
> "struct netdev_dev" (by renaming it to "struct netdev").
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/dpif-netdev.c | 13 ++-
> lib/netdev-bsd.c | 309 ++++++++++++++++++++++++++-----------------------
> lib/netdev-dummy.c | 95 +++++++++------
> lib/netdev-linux.c | 216 ++++++++++++++++++----------------
> lib/netdev-provider.h | 93 ++++++++-------
> lib/netdev-vport.c | 5 +-
> lib/netdev.c | 106 +++++++++--------
> lib/netdev.h | 21 ++--
> 8 files changed, 469 insertions(+), 389 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 40f59c3..78bdedb 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -107,6 +107,7 @@ struct dp_netdev_port {
> struct list node; /* Element in dp_netdev's 'port_list'. */
> struct netdev *netdev;
> struct netdev_saved_flags *sf;
> + struct netdev_rx *rx;
> char *type; /* Port type as requested by user. */
> };
>
> @@ -378,6 +379,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
> struct netdev_saved_flags *sf;
> struct dp_netdev_port *port;
> struct netdev *netdev;
> + struct netdev_rx *rx;
> const char *open_type;
> int mtu;
> int error;
> @@ -393,7 +395,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
> /* XXX reject loopback devices */
> /* XXX reject non-Ethernet devices */
>
> - error = netdev_listen(netdev);
> + error = netdev_rx_open(netdev, &rx);
> if (error
> && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
> VLOG_ERR("%s: cannot receive packets on this network device (%s)",
> @@ -404,6 +406,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>
> error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
> if (error) {
> + netdev_rx_close(rx);
> netdev_close(netdev);
> return error;
> }
> @@ -412,6 +415,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
> port->port_no = port_no;
> port->netdev = netdev;
> port->sf = sf;
> + port->rx = rx;
> port->type = xstrdup(type);
>
> error = netdev_get_mtu(netdev, &mtu);
> @@ -509,6 +513,7 @@ do_del_port(struct dp_netdev *dp, uint32_t port_no)
>
> netdev_close(port->netdev);
> netdev_restore_flags(port->sf);
> + netdev_rx_close(port->rx);
> free(port->type);
> free(port);
>
> @@ -1063,7 +1068,7 @@ dpif_netdev_run(struct dpif *dpif)
> ofpbuf_clear(&packet);
> ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
>
> - error = netdev_recv(port->netdev, &packet);
> + error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
> if (!error) {
> dp_netdev_port_input(dp, port, &packet);
> } else if (error != EAGAIN && error != EOPNOTSUPP) {
> @@ -1083,7 +1088,9 @@ dpif_netdev_wait(struct dpif *dpif)
> struct dp_netdev_port *port;
>
> LIST_FOR_EACH (port, node, &dp->port_list) {
> - netdev_recv_wait(port->netdev);
> + if (port->rx) {
> + netdev_rx_wait(port->rx);
> + }
> }
> }
>
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 8b384ba..2e870d4 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -16,6 +16,7 @@
>
> #include <config.h>
>
> +#include "netdev-provider.h"
> #include <stdlib.h>
> #include <errno.h>
> #include <fcntl.h>
> @@ -41,7 +42,6 @@
> #include "coverage.h"
> #include "dynamic-string.h"
> #include "fatal-signal.h"
> -#include "netdev-provider.h"
> #include "ofpbuf.h"
> #include "openflow/openflow.h"
> #include "packets.h"
> @@ -57,7 +57,7 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
>
> /*
> * This file implements objects to access interfaces.
> - * Externally, interfaces are represented by two structures:
> + * Externally, interfaces are represented by three structures:
> * + struct netdev_dev, representing a network device,
> * containing e.g. name and a refcount;
> * We can have private variables by embedding the
> @@ -66,26 +66,36 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
> *
> * + struct netdev, representing an instance of an open netdev_dev.
> * The structure contains a pointer to the 'struct netdev'
> - * representing the device. Again, private information
> - * such as file descriptor etc. are stored in our
> - * own struct netdev_bsd which includes a struct netdev.
> + * representing the device.
> + *
> + * + struct netdev_rx, which represents a netdev open to capture received
> + * packets. Again, private information such as file descriptor etc. are
> + * stored in our own struct netdev_rx_bsd which includes a struct
> + * netdev_rx.
> *
> - * Both 'struct netdev' and 'struct netdev_dev' are referenced
> - * in containers which hold pointers to the data structures.
> - * We can reach our own struct netdev_XXX_bsd by putting a
> - * struct netdev_XXX within our own struct, and using CONTAINER_OF
> - * to access the parent structure.
> + * 'struct netdev', 'struct netdev_dev', and 'struct netdev_rx' are referenced
> + * in containers which hold pointers to the data structures. We can reach our
> + * own struct netdev_XXX_bsd by putting a struct netdev_XXX within our own
> + * struct, and using CONTAINER_OF to access the parent structure.
> */
> struct netdev_bsd {
> struct netdev netdev;
> +};
> +
> +struct netdev_rx_bsd {
> + struct netdev_rx up;
>
> - int netdev_fd; /* Selectable file descriptor for the network device.
> - This descriptor will be used for polling operations */
> + /* Packet capture descriptor for a system network device.
> + * For a tap device this is NULL. */
> + pcap_t *pcap_handle;
>
> - pcap_t *pcap_handle; /* Packet capture descriptor for a system network
> - device */
> + /* Selectable file descriptor for the network device.
> + * This descriptor will be used for polling operations. */
> + int fd;
> };
>
> +static const struct netdev_rx_class netdev_rx_bsd_class;
> +
> struct netdev_dev_bsd {
> struct netdev_dev netdev_dev;
> unsigned int cache_valid;
> @@ -98,8 +108,11 @@ struct netdev_dev_bsd {
> int mtu;
> int carrier;
>
> - bool tap_opened;
> - int tap_fd; /* TAP character device, if any */
> + int tap_fd; /* TAP character device, if any, otherwise -1. */
> +
> + /* Used for sending packets on non-tap devices. */
> + pcap_t *pcap;
> + int fd;
> };
>
>
> @@ -169,6 +182,13 @@ netdev_dev_bsd_cast(const struct netdev_dev *netdev_dev)
> return CONTAINER_OF(netdev_dev, struct netdev_dev_bsd, netdev_dev);
> }
>
> +static struct netdev_rx_bsd *
> +netdev_rx_bsd_cast(const struct netdev_rx *rx)
> +{
> + netdev_rx_assert_class(rx, &netdev_rx_bsd_class);
> + return CONTAINER_OF(rx, struct netdev_rx_bsd, up);
> +}
> +
> /* Initialize the AF_INET socket used for ioctl operations */
> static int
> netdev_bsd_init(void)
> @@ -299,6 +319,7 @@ netdev_bsd_create_system(const struct netdev_class *class, const char *name,
> netdev_dev = xzalloc(sizeof *netdev_dev);
> netdev_dev->change_seq = 1;
> netdev_dev_init(&netdev_dev->netdev_dev, name, class);
> + netdev_dev->tap_fd = -1;
> *netdev_devp = &netdev_dev->netdev_dev;
>
> return 0;
> @@ -388,10 +409,12 @@ netdev_bsd_destroy(struct netdev_dev *netdev_dev_)
>
> cache_notifier_unref();
>
> - if (netdev_dev->tap_fd >= 0 &&
> - !strcmp(netdev_dev_get_type(netdev_dev_), "tap")) {
> + if (netdev_dev->tap_fd >= 0) {
> destroy_tap(netdev_dev->tap_fd, netdev_dev_get_name(netdev_dev_));
> }
> + if (netdev_dev->pcap) {
> + pcap_close(netdev_dev->pcap);
> + }
> free(netdev_dev);
> }
>
> @@ -405,7 +428,6 @@ netdev_bsd_open_system(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>
> /* Allocate network device. */
> netdev = xcalloc(1, sizeof *netdev);
> - netdev->netdev_fd = -1;
> netdev_init(&netdev->netdev, netdev_dev_);
>
> /* Verify that the netdev really exists by attempting to read its flags */
> @@ -430,56 +452,35 @@ netdev_bsd_close(struct netdev *netdev_)
> {
> struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
>
> - if (netdev->netdev_fd >= 0 && strcmp(netdev_get_type(netdev_), "tap")) {
> - pcap_close(netdev->pcap_handle);
> - }
> -
> free(netdev);
> }
>
> static int
> -netdev_bsd_listen(struct netdev *netdev_)
> +netdev_bsd_open_pcap(const char *name, pcap_t **pcapp, int *fdp)
> {
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> - struct netdev_dev_bsd *netdev_dev =
> - netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> -
> char errbuf[PCAP_ERRBUF_SIZE];
> - int error;
> - int fd = -1;
> + pcap_t *pcap = NULL;
> int one = 1;
> + int error;
> + int fd;
>
> - if (netdev->netdev_fd >= 0) {
> - return 0;
> - }
> -
> - if (!strcmp(netdev_get_type(netdev_), "tap") &&
> - !netdev_dev->tap_opened) {
> - netdev->netdev_fd = netdev_dev->tap_fd;
> - netdev_dev->tap_opened = true;
> - return 0;
> - }
> -
> - /* open the pcap device. The device is opened in non-promiscuous mode
> + /* Open the pcap device. The device is opened in non-promiscuous mode
> * because the interface flags are manually set by the caller. */
> errbuf[0] = '\0';
> - netdev->pcap_handle = pcap_open_live(netdev_get_name(netdev_), PCAP_SNAPLEN,
> - 0, 1000, errbuf);
> - if (netdev->pcap_handle == NULL) {
> - VLOG_ERR("%s: pcap_open_live failed: %s",
> - netdev_get_name(netdev_), errbuf);
> + pcap = pcap_open_live(name, PCAP_SNAPLEN, 0, 1000, errbuf);
> + if (!pcap) {
> + VLOG_ERR_RL(&rl, "%s: pcap_open_live failed: %s", name, errbuf);
> error = EIO;
> goto error;
> - } else if (errbuf[0] != '\0') {
> - VLOG_WARN("%s: pcap_open_live: %s",
> - netdev_get_name(netdev_), errbuf);
> + }
> + if (errbuf[0] != '\0') {
> + VLOG_WARN_RL(&rl, "%s: pcap_open_live: %s", name, errbuf);
> }
>
> - netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev_)));
> -
> - /* initialize netdev->netdev_fd */
> - fd = pcap_get_selectable_fd(netdev->pcap_handle);
> + /* Get the underlying fd. */
> + fd = pcap_get_selectable_fd(pcap);
> if (fd == -1) {
> + VLOG_WARN_RL(&rl, "%s: no selectable file descriptor", name);
> error = errno;
> goto error;
> }
> @@ -487,39 +488,83 @@ netdev_bsd_listen(struct netdev *netdev_)
> /* Set non-blocking mode. Also the BIOCIMMEDIATE ioctl must be called
> * on the file descriptor returned by pcap_get_selectable_fd to achieve
> * a real non-blocking behaviour.*/
> - error = pcap_setnonblock(netdev->pcap_handle, 1, errbuf);
> + error = pcap_setnonblock(pcap, 1, errbuf);
> if (error == -1) {
> error = errno;
> goto error;
> }
>
> - /* This call assure that reads return immediately upon packet reception.
> - * Otherwise, a read will block until either the kernel buffer becomes
> - * full or a timeout occurs. */
> - if(ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
> - VLOG_ERR("ioctl(BIOCIMMEDIATE) on %s device failed: %s",
> - netdev_get_name(netdev_), strerror(errno));
> + /* This call assure that reads return immediately upon packet
> + * reception. Otherwise, a read will block until either the kernel
> + * buffer becomes full or a timeout occurs. */
> + if (ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
> + VLOG_ERR_RL(&rl, "ioctl(BIOCIMMEDIATE) on %s device failed: %s",
> + name, strerror(errno));
> error = errno;
> goto error;
> }
>
> - /* Capture only incoming packets */
> - error = pcap_setdirection(netdev->pcap_handle, PCAP_D_IN);
> + /* Capture only incoming packets. */
> + error = pcap_setdirection(pcap, PCAP_D_IN);
> if (error == -1) {
> error = errno;
> goto error;
> }
>
> - netdev->netdev_fd = fd;
> + *pcapp = pcap;
> + *fdp = fd;
> return 0;
>
> error:
> - if (fd >= 0) {
> - close(netdev->netdev_fd);
> + if (pcap) {
> + pcap_close(pcap);
> }
> + *pcapp = NULL;
> + *fdp = -1;
> return error;
> }
>
> +static int
> +netdev_bsd_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
> +{
> + struct netdev_dev_bsd *netdev_dev =
> + netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> +
> + struct netdev_rx_bsd *rx;
> + pcap_t *pcap;
> + int fd;
> +
> + if (!strcmp(netdev_get_type(netdev_), "tap")) {
> + pcap = NULL;
> + fd = netdev_dev->tap_fd;
> + } else {
> + int error = netdev_bsd_open_pcap(netdev_get_name(netdev_), &pcap, &fd);
> + if (error) {
> + return error;
> + }
> +
> + netdev_dev_bsd_changed(netdev_dev);
> + }
> +
> + rx = xmalloc(sizeof *rx);
> + netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_bsd_class);
> + rx->pcap_handle = pcap;
> + rx->fd = fd;
> +
> + *rxp = &rx->up;
> + return 0;
> +}
> +
> +static void
> +netdev_rx_bsd_destroy(struct netdev_rx *rx_)
> +{
> + struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
> +
> + if (rx->pcap_handle) {
> + pcap_close(rx->pcap_handle);
> + }
> + free(rx);
> +}
>
> /* The recv callback of the netdev class returns the number of bytes of the
> * received packet.
> @@ -566,24 +611,20 @@ proc_pkt(u_char *args_, const struct pcap_pkthdr *hdr, const u_char *packet)
> * This function attempts to receive a packet from the specified network
> * device. It is assumed that the network device is a system device or a tap
> * device opened as a system one. In this case the read operation is performed
> - * on the 'netdev' pcap descriptor.
> + * from rx->pcap.
> */
> static int
> -netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
> +netdev_rx_bsd_recv_pcap(struct netdev_rx_bsd *rx, void *data, size_t size)
> {
> struct pcap_arg arg;
> int ret;
>
> - if (netdev->netdev_fd < 0) {
> - return -EAGAIN;
> - }
> -
> /* prepare the pcap argument to store the packet */
> arg.size = size;
> arg.data = data;
>
> for (;;) {
> - ret = pcap_dispatch(netdev->pcap_handle, 1, proc_pkt, (u_char *)&arg);
> + ret = pcap_dispatch(rx->pcap_handle, 1, proc_pkt, (u_char *)&arg);
>
> if (ret > 0) {
> return arg.retval; /* arg.retval < 0 is handled in the caller */
> @@ -600,25 +641,20 @@ netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
>
> /*
> * This function attempts to receive a packet from the specified network
> - * device. It is assumed that the network device is a tap device and the
> - * 'netdev_fd' member of the 'netdev' structure is initialized with the tap
> - * file descriptor.
> + * device. It is assumed that the network device is a tap device and
> + * 'rx->fd' is initialized with the tap file descriptor.
> */
> static int
> -netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
> +netdev_rx_bsd_recv_tap(struct netdev_rx_bsd *rx, void *data, size_t size)
> {
> - if (netdev->netdev_fd < 0) {
> - return -EAGAIN;
> - }
> -
> for (;;) {
> - ssize_t retval = read(netdev->netdev_fd, data, size);
> + ssize_t retval = read(rx->fd, data, size);
> if (retval >= 0) {
> return retval;
> } else if (errno != EINTR) {
> if (errno != EAGAIN) {
> VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> - strerror(errno), netdev->netdev.netdev_dev->name);
> + strerror(errno), netdev_rx_get_name(&rx->up));
> }
> return -errno;
> }
> @@ -626,58 +662,39 @@ netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
> }
>
>
> -/*
> - * According with the nature of the device a different function must be called.
> - * If the device is the bridge local port the 'netdev_bsd_recv_tap' function
> - * must be called, otherwise the 'netdev_bsd_recv_system' function is called.
> - *
> - * type!="tap" ---> system device.
> - * type=="tap" && netdev_fd == tap_fd ---> internal tap device
> - * type=="tap" && netdev_fd != tap_fd ---> internal tap device
> - * opened as a system
> - * device.
> - */
> static int
> -netdev_bsd_recv(struct netdev *netdev_, void* data, size_t size)
> +netdev_rx_bsd_recv(struct netdev_rx *rx_, void *data, size_t size)
> {
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> - struct netdev_dev_bsd * netdev_dev =
> - netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> + struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> - if (!strcmp(netdev_get_type(netdev_), "tap") &&
> - netdev->netdev_fd == netdev_dev->tap_fd) {
> - return netdev_bsd_recv_tap(netdev, data, size);
> - } else {
> - return netdev_bsd_recv_system(netdev, data, size);
> - }
> + return (rx->pcap_handle
> + ? netdev_rx_bsd_recv_pcap(rx, data, size)
> + : netdev_rx_bsd_recv_tap(rx, data, size));
> }
>
> -
> /*
> * Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'.
> + * when a packet is ready to be received with netdev_rx_recv() on 'rx'.
> */
> static void
> -netdev_bsd_recv_wait(struct netdev *netdev_)
> +netdev_rx_bsd_wait(struct netdev_rx *rx_)
> {
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> + struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> - if (netdev->netdev_fd >= 0) {
> - poll_fd_wait(netdev->netdev_fd, POLLIN);
> - }
> + poll_fd_wait(rx->fd, POLLIN);
> }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
> +/* Discards all packets waiting to be received from 'rx'. */
> static int
> -netdev_bsd_drain(struct netdev *netdev_)
> +netdev_rx_bsd_drain(struct netdev_rx *rx_)
> {
> struct ifreq ifr;
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> + struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> - strcpy(ifr.ifr_name, netdev_get_name(netdev_));
> - if (ioctl(netdev->netdev_fd, BIOCFLUSH, &ifr) == -1) {
> + strcpy(ifr.ifr_name, netdev_rx_get_name(rx_));
> + if (ioctl(rx->fd, BIOCFLUSH, &ifr) == -1) {
> VLOG_DBG_RL(&rl, "%s: ioctl(BIOCFLUSH) failed: %s",
> - netdev_get_name(netdev_), strerror(errno));
> + netdev_rx_get_name(rx_), strerror(errno));
> return errno;
> }
> return 0;
> @@ -690,34 +707,34 @@ netdev_bsd_drain(struct netdev *netdev_)
> static int
> netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
> {
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> - struct netdev_dev_bsd * netdev_dev =
> - netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> + struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> + const char *name = netdev_get_name(netdev_);
>
> - if (netdev->netdev_fd < 0) {
> - return EPIPE;
> + if (dev->tap_fd < 0 && !dev->pcap) {
> + int error = netdev_bsd_open_pcap(name, &dev->pcap, &dev->fd);
> + if (error) {
> + return error;
> + }
> }
>
> for (;;) {
> ssize_t retval;
> - if (!strcmp(netdev_get_type(netdev_), "tap") &&
> - netdev_dev->tap_fd == netdev->netdev_fd) {
> - retval = write(netdev->netdev_fd, data, size);
> + if (dev->tap_fd >= 0) {
> + retval = write(dev->tap_fd, data, size);
> } else {
> - retval = pcap_inject(netdev->pcap_handle, data, size);
> + retval = pcap_inject(dev->pcap, data, size);
> }
> if (retval < 0) {
> if (errno == EINTR) {
> continue;
> } else if (errno != EAGAIN) {
> VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
> - netdev_get_name(netdev_), strerror(errno));
> + name, strerror(errno));
> }
> return errno;
> } else if (retval != size) {
> VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of "
> - "%zu) on %s", retval, size,
> - netdev_get_name(netdev_));
> + "%zu) on %s", retval, size, name);
> return EMSGSIZE;
> } else {
> return 0;
> @@ -733,17 +750,16 @@ netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
> static void
> netdev_bsd_send_wait(struct netdev *netdev_)
> {
> - struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> -
> - if (netdev->netdev_fd < 0) { /* Nothing to do. */
> - return;
> - }
> + struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
>
> - if (strcmp(netdev_get_type(netdev_), "tap")) {
> - poll_fd_wait(netdev->netdev_fd, POLLOUT);
> - } else {
> + if (dev->tap_fd >= 0) {
> /* TAP device always accepts packets. */
> poll_immediate_wake();
> + } else if (dev->pcap) {
> + poll_fd_wait(dev->fd, POLLOUT);
> + } else {
> + /* We haven't even tried to send a packet yet. */
> + poll_immediate_wake();
> }
> }
>
> @@ -1263,11 +1279,7 @@ const struct netdev_class netdev_bsd_class = {
> netdev_bsd_open_system,
> netdev_bsd_close,
>
> - netdev_bsd_listen,
> -
> - netdev_bsd_recv,
> - netdev_bsd_recv_wait,
> - netdev_bsd_drain,
> + netdev_bsd_rx_open,
>
> netdev_bsd_send,
> netdev_bsd_send_wait,
> @@ -1324,11 +1336,7 @@ const struct netdev_class netdev_tap_class = {
> netdev_bsd_open_system,
> netdev_bsd_close,
>
> - netdev_bsd_listen,
> -
> - netdev_bsd_recv,
> - netdev_bsd_recv_wait,
> - netdev_bsd_drain,
> + netdev_bsd_rx_open,
>
> netdev_bsd_send,
> netdev_bsd_send_wait,
> @@ -1370,6 +1378,13 @@ const struct netdev_class netdev_tap_class = {
>
> netdev_bsd_change_seq
> };
> +
> +static const struct netdev_rx_class netdev_rx_bsd_class = {
> + netdev_rx_bsd_destroy,
> + netdev_rx_bsd_recv,
> + netdev_rx_bsd_wait,
> + netdev_rx_bsd_drain,
> +};
>
>
> static void
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index de04f9a..3762d5c 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -49,20 +49,25 @@ struct netdev_dev_dummy {
> struct netdev_stats stats;
> enum netdev_flags flags;
> unsigned int change_seq;
> -
> - struct list devs; /* List of child "netdev_dummy"s. */
> int ifindex;
> +
> + struct list rxes; /* List of child "netdev_rx_dummy"s. */
> };
>
> struct netdev_dummy {
> struct netdev netdev;
> - struct list node; /* In netdev_dev_dummy's "devs" list. */
> +};
> +
> +struct netdev_rx_dummy {
> + struct netdev_rx up;
> + struct list node; /* In netdev_dev_dummy's "rxes" list. */
> struct list recv_queue;
> - bool listening;
> };
>
> static struct shash dummy_netdev_devs = SHASH_INITIALIZER(&dummy_netdev_devs);
>
> +static const struct netdev_rx_class netdev_rx_dummy_class;
> +
> static unixctl_cb_func netdev_dummy_set_admin_state;
> static int netdev_dummy_create(const struct netdev_class *, const char *,
> struct netdev_dev **);
> @@ -93,6 +98,13 @@ netdev_dummy_cast(const struct netdev *netdev)
> return CONTAINER_OF(netdev, struct netdev_dummy, netdev);
> }
>
> +static struct netdev_rx_dummy *
> +netdev_rx_dummy_cast(const struct netdev_rx *rx)
> +{
> + netdev_rx_assert_class(rx, &netdev_rx_dummy_class);
> + return CONTAINER_OF(rx, struct netdev_rx_dummy, up);
> +}
> +
> static int
> netdev_dummy_create(const struct netdev_class *class, const char *name,
> struct netdev_dev **netdev_devp)
> @@ -112,7 +124,7 @@ netdev_dummy_create(const struct netdev_class *class, const char *name,
> netdev_dev->flags = 0;
> netdev_dev->change_seq = 1;
> netdev_dev->ifindex = -EOPNOTSUPP;
> - list_init(&netdev_dev->devs);
> + list_init(&netdev_dev->rxes);
>
> shash_add(&dummy_netdev_devs, name, netdev_dev);
>
> @@ -157,16 +169,12 @@ netdev_dummy_set_config(struct netdev_dev *netdev_dev_,
> static int
> netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
> {
> - struct netdev_dev_dummy *netdev_dev = netdev_dev_dummy_cast(netdev_dev_);
> struct netdev_dummy *netdev;
>
> netdev = xmalloc(sizeof *netdev);
> netdev_init(&netdev->netdev, netdev_dev_);
> - list_init(&netdev->recv_queue);
> - netdev->listening = false;
>
> *netdevp = &netdev->netdev;
> - list_push_back(&netdev_dev->devs, &netdev->node);
> return 0;
> }
>
> @@ -174,31 +182,37 @@ static void
> netdev_dummy_close(struct netdev *netdev_)
> {
> struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> - list_remove(&netdev->node);
> - ofpbuf_list_delete(&netdev->recv_queue);
> free(netdev);
> }
>
> static int
> -netdev_dummy_listen(struct netdev *netdev_)
> +netdev_dummy_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
> {
> - struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> - netdev->listening = true;
> + struct netdev_dev_dummy *dev
> + = netdev_dev_dummy_cast(netdev_get_dev(netdev_));
> + struct netdev_rx_dummy *rx;
> +
> + rx = xmalloc(sizeof *rx);
> + netdev_rx_init(&rx->up, &dev->netdev_dev, &netdev_rx_dummy_class);
> + list_push_back(&dev->rxes, &rx->node);
> + list_init(&rx->recv_queue);
> +
> + *rxp = &rx->up;
> return 0;
> }
>
> static int
> -netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
> +netdev_rx_dummy_recv(struct netdev_rx *rx_, void *buffer, size_t size)
> {
> - struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> + struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> struct ofpbuf *packet;
> size_t packet_size;
>
> - if (list_is_empty(&netdev->recv_queue)) {
> + if (list_is_empty(&rx->recv_queue)) {
> return -EAGAIN;
> }
>
> - packet = ofpbuf_from_list(list_pop_front(&netdev->recv_queue));
> + packet = ofpbuf_from_list(list_pop_front(&rx->recv_queue));
> if (packet->size > size) {
> return -EMSGSIZE;
> }
> @@ -211,19 +225,28 @@ netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
> }
>
> static void
> -netdev_dummy_recv_wait(struct netdev *netdev_)
> +netdev_rx_dummy_destroy(struct netdev_rx *rx_)
> {
> - struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> - if (!list_is_empty(&netdev->recv_queue)) {
> + struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> + list_remove(&rx->node);
> + ofpbuf_list_delete(&rx->recv_queue);
> + free(rx);
> +}
> +
> +static void
> +netdev_rx_dummy_wait(struct netdev_rx *rx_)
> +{
> + struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> + if (!list_is_empty(&rx->recv_queue)) {
> poll_immediate_wake();
> }
> }
>
> static int
> -netdev_dummy_drain(struct netdev *netdev_)
> +netdev_rx_dummy_drain(struct netdev_rx *rx_)
> {
> - struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> - ofpbuf_list_delete(&netdev->recv_queue);
> + struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> + ofpbuf_list_delete(&rx->recv_queue);
> return 0;
> }
>
> @@ -375,10 +398,7 @@ static const struct netdev_class dummy_class = {
> netdev_dummy_open,
> netdev_dummy_close,
>
> - netdev_dummy_listen,
> - netdev_dummy_recv,
> - netdev_dummy_recv_wait,
> - netdev_dummy_drain,
> + netdev_dummy_rx_open,
>
> netdev_dummy_send, /* send */
> NULL, /* send_wait */
> @@ -422,6 +442,13 @@ static const struct netdev_class dummy_class = {
> netdev_dummy_change_seq
> };
>
> +static const struct netdev_rx_class netdev_rx_dummy_class = {
> + netdev_rx_dummy_destroy,
> + netdev_rx_dummy_recv,
> + netdev_rx_dummy_wait,
> + netdev_rx_dummy_drain,
> +};
> +
> static struct ofpbuf *
> eth_from_packet_or_flow(const char *s)
> {
> @@ -478,7 +505,7 @@ netdev_dummy_receive(struct unixctl_conn *conn,
>
> n_listeners = 0;
> for (i = 2; i < argc; i++) {
> - struct netdev_dummy *dev;
> + struct netdev_rx_dummy *rx;
> struct ofpbuf *packet;
>
> packet = eth_from_packet_or_flow(argv[i]);
> @@ -491,12 +518,10 @@ netdev_dummy_receive(struct unixctl_conn *conn,
> dummy_dev->stats.rx_bytes += packet->size;
>
> n_listeners = 0;
> - LIST_FOR_EACH (dev, node, &dummy_dev->devs) {
> - if (dev->listening) {
> - struct ofpbuf *copy = ofpbuf_clone(packet);
> - list_push_back(&dev->recv_queue, ©->list_node);
> - n_listeners++;
> - }
> + LIST_FOR_EACH (rx, node, &dummy_dev->rxes) {
> + struct ofpbuf *copy = ofpbuf_clone(packet);
> + list_push_back(&rx->recv_queue, ©->list_node);
> + n_listeners++;
> }
> ofpbuf_delete(packet);
> }
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 30cd0f6..2e6fedd 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -122,7 +122,6 @@ enum {
>
> struct tap_state {
> int fd;
> - bool opened;
> };
>
> /* Traffic control. */
> @@ -400,9 +399,16 @@ struct netdev_dev_linux {
>
> struct netdev_linux {
> struct netdev netdev;
> +};
> +
> +struct netdev_rx_linux {
> + struct netdev_rx up;
> + bool is_tap;
> int fd;
> };
>
> +static const struct netdev_rx_class netdev_rx_linux_class;
> +
> /* Sockets used for ioctl operations. */
> static int af_inet_sock = -1; /* AF_INET, SOCK_DGRAM. */
>
> @@ -442,6 +448,12 @@ is_netdev_linux_class(const struct netdev_class *netdev_class)
> return netdev_class->init == netdev_linux_init;
> }
>
> +static bool
> +is_tap_netdev(const struct netdev *netdev)
> +{
> + return netdev_dev_get_class(netdev_get_dev(netdev)) == &netdev_tap_class;
> +}
> +
> static struct netdev_dev_linux *
> netdev_dev_linux_cast(const struct netdev_dev *netdev_dev)
> {
> @@ -460,6 +472,13 @@ netdev_linux_cast(const struct netdev *netdev)
>
> return CONTAINER_OF(netdev, struct netdev_linux, netdev);
> }
> +
> +static struct netdev_rx_linux *
> +netdev_rx_linux_cast(const struct netdev_rx *rx)
> +{
> + netdev_rx_assert_class(rx, &netdev_rx_linux_class);
> + return CONTAINER_OF(rx, struct netdev_rx_linux, up);
> +}
>
> static int
> netdev_linux_init(void)
> @@ -729,7 +748,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>
> /* Allocate network device. */
> netdev = xzalloc(sizeof *netdev);
> - netdev->fd = -1;
> netdev_init(&netdev->netdev, netdev_dev_);
>
> /* Verify that the device really exists, by attempting to read its flags.
> @@ -761,67 +779,65 @@ netdev_linux_close(struct netdev *netdev_)
> {
> struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>
> - if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) {
> - close(netdev->fd);
> - }
> free(netdev);
> }
>
> static int
> -netdev_linux_listen(struct netdev *netdev_)
> +netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
> {
> struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> struct netdev_dev_linux *netdev_dev =
> netdev_dev_linux_cast(netdev_get_dev(netdev_));
> - struct sockaddr_ll sll;
> - int ifindex;
> + bool is_tap = is_tap_netdev(netdev_);
> + struct netdev_rx_linux *rx;
> int error;
> int fd;
>
> - if (netdev->fd >= 0) {
> - return 0;
> - }
> + if (is_tap) {
> + fd = netdev_dev->state.tap.fd;
> + } else {
> + struct sockaddr_ll sll;
> + int ifindex;
>
> - if (!strcmp(netdev_get_type(netdev_), "tap")
> - && !netdev_dev->state.tap.opened) {
> - netdev->fd = netdev_dev->state.tap.fd;
> - netdev_dev->state.tap.opened = true;
> - return 0;
> - }
> + /* Create file descriptor. */
> + fd = socket(PF_PACKET, SOCK_RAW, 0);
> + if (fd < 0) {
> + error = errno;
> + VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> + goto error;
> + }
>
> - /* Create file descriptor. */
> - fd = socket(PF_PACKET, SOCK_RAW, 0);
> - if (fd < 0) {
> - error = errno;
> - VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> - goto error;
> - }
> + /* Set non-blocking mode. */
> + error = set_nonblocking(fd);
> + if (error) {
> + goto error;
> + }
>
> - /* Set non-blocking mode. */
> - error = set_nonblocking(fd);
> - if (error) {
> - goto error;
> - }
> + /* Get ethernet device index. */
> + error = get_ifindex(&netdev->netdev, &ifindex);
> + if (error) {
> + goto error;
> + }
>
> - /* Get ethernet device index. */
> - error = get_ifindex(&netdev->netdev, &ifindex);
> - if (error) {
> - goto error;
> + /* Bind to specific ethernet device. */
> + memset(&sll, 0, sizeof sll);
> + sll.sll_family = AF_PACKET;
> + sll.sll_ifindex = ifindex;
> + sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> + if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> + error = errno;
> + VLOG_ERR("%s: failed to bind raw socket (%s)",
> + netdev_get_name(netdev_), strerror(error));
> + goto error;
> + }
> }
>
> - /* Bind to specific ethernet device. */
> - memset(&sll, 0, sizeof sll);
> - sll.sll_family = AF_PACKET;
> - sll.sll_ifindex = ifindex;
> - sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> - if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> - error = errno;
> - VLOG_ERR("%s: failed to bind raw socket (%s)",
> - netdev_get_name(netdev_), strerror(error));
> - goto error;
> - }
> + rx = xmalloc(sizeof *rx);
> + netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_linux_class);
> + rx->is_tap = is_tap;
> + rx->fd = fd;
>
> - netdev->fd = fd;
> + *rxp = &rx->up;
> return 0;
>
> error:
> @@ -831,63 +847,64 @@ error:
> return error;
> }
>
> -static int
> -netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
> +static void
> +netdev_rx_linux_destroy(struct netdev_rx *rx_)
> {
> - struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> + struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
>
> - if (netdev->fd < 0) {
> - /* Device is not listening. */
> - return -EAGAIN;
> + if (!rx->is_tap) {
> + close(rx->fd);
> }
> + free(rx);
> +}
>
> - for (;;) {
> - ssize_t retval;
> +static int
> +netdev_rx_linux_recv(struct netdev_rx *rx_, void *data, size_t size)
> +{
> + struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> + ssize_t retval;
>
> - retval = (netdev_->netdev_dev->netdev_class == &netdev_tap_class
> - ? read(netdev->fd, data, size)
> - : recv(netdev->fd, data, size, MSG_TRUNC));
> - if (retval >= 0) {
> - return retval <= size ? retval : -EMSGSIZE;
> - } else if (errno != EINTR) {
> - if (errno != EAGAIN) {
> - VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> - strerror(errno), netdev_get_name(netdev_));
> - }
> - return -errno;
> + do {
> + retval = (rx->is_tap
> + ? read(rx->fd, data, size)
> + : recv(rx->fd, data, size, MSG_TRUNC));
> + } while (retval < 0 && errno == EINTR);
> +
> + if (retval > size) {
> + return -EMSGSIZE;
> + } else if (retval >= 0) {
> + return retval;
> + } else {
> + if (errno != EAGAIN) {
> + VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> + strerror(errno), netdev_rx_get_name(rx_));
> }
> + return -errno;
> }
> }
>
> -/* Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'. */
> static void
> -netdev_linux_recv_wait(struct netdev *netdev_)
> +netdev_rx_linux_wait(struct netdev_rx *rx_)
> {
> - struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> - if (netdev->fd >= 0) {
> - poll_fd_wait(netdev->fd, POLLIN);
> - }
> + struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> + poll_fd_wait(rx->fd, POLLIN);
> }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
> static int
> -netdev_linux_drain(struct netdev *netdev_)
> +netdev_rx_linux_drain(struct netdev_rx *rx_)
> {
> - struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> - if (netdev->fd < 0) {
> - return 0;
> - } else if (!strcmp(netdev_get_type(netdev_), "tap")) {
> + struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> + if (rx->is_tap) {
> struct ifreq ifr;
> - int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr,
> + int error = netdev_linux_do_ioctl(netdev_rx_get_name(rx_), &ifr,
> SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
> if (error) {
> return error;
> }
> - drain_fd(netdev->fd, ifr.ifr_qlen);
> + drain_fd(rx->fd, ifr.ifr_qlen);
> return 0;
> } else {
> - return drain_rcvbuf(netdev->fd);
> + return drain_rcvbuf(rx->fd);
> }
> }
>
> @@ -903,11 +920,10 @@ netdev_linux_drain(struct netdev *netdev_)
> static int
> netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
> {
> - struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> for (;;) {
> ssize_t retval;
>
> - if (netdev->fd < 0) {
> + if (!is_tap_netdev(netdev_)) {
> /* Use our AF_PACKET socket to send to this device. */
> struct sockaddr_ll sll;
> struct msghdr msg;
> @@ -945,11 +961,14 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
>
> retval = sendmsg(sock, &msg, 0);
> } else {
> - /* Use the netdev's own fd to send to this device. This is
> - * essential for tap devices, because packets sent to a tap device
> - * with an AF_PACKET socket will loop back to be *received* again
> - * on the tap device. */
> - retval = write(netdev->fd, data, size);
> + /* Use the tap fd to send to this device. This is essential for
> + * tap devices, because packets sent to a tap device with an
> + * AF_PACKET socket will loop back to be *received* again on the
> + * tap device. */
> + struct netdev_dev_linux *dev
> + = netdev_dev_linux_cast(netdev_get_dev(netdev_));
> +
> + retval = write(dev->state.tap.fd, data, size);
> }
>
> if (retval < 0) {
> @@ -983,14 +1002,9 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
> * expected to do additional queuing of packets. Thus, this function is
> * unlikely to ever be used. It is included for completeness. */
> static void
> -netdev_linux_send_wait(struct netdev *netdev_)
> +netdev_linux_send_wait(struct netdev *netdev)
> {
> - struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> - if (netdev->fd < 0) {
> - /* Nothing to do. */
> - } else if (strcmp(netdev_get_type(netdev_), "tap")) {
> - poll_fd_wait(netdev->fd, POLLOUT);
> - } else {
> + if (is_tap_netdev(netdev)) {
> /* TAP device always accepts packets.*/
> poll_immediate_wake();
> }
> @@ -1018,7 +1032,7 @@ netdev_linux_set_etheraddr(struct netdev *netdev_,
> }
>
> /* Tap devices must be brought down before setting the address. */
> - if (!strcmp(netdev_get_type(netdev_), "tap")) {
> + if (is_tap_netdev(netdev_)) {
> enum netdev_flags flags;
>
> if (!netdev_get_flags(netdev_, &flags) && (flags & NETDEV_UP)) {
> @@ -2489,10 +2503,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
> netdev_linux_open, \
> netdev_linux_close, \
> \
> - netdev_linux_listen, \
> - netdev_linux_recv, \
> - netdev_linux_recv_wait, \
> - netdev_linux_drain, \
> + netdev_linux_rx_open, \
> \
> netdev_linux_send, \
> netdev_linux_send_wait, \
> @@ -2562,6 +2573,13 @@ const struct netdev_class netdev_internal_class =
> netdev_internal_set_stats,
> NULL, /* get_features */
> netdev_internal_get_status);
> +
> +static const struct netdev_rx_class netdev_rx_linux_class = {
> + netdev_rx_linux_destroy,
> + netdev_rx_linux_recv,
> + netdev_rx_linux_wait,
> + netdev_rx_linux_drain,
> +};
>
> /* HTB traffic control class. */
>
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 00799b1..bfdcd30 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -148,58 +148,19 @@ struct netdev_class {
>
> /* Closes 'netdev'. */
> void (*close)(struct netdev *netdev);
> -
> -/* ## ----------------- ## */
> -/* ## Receiving Packets ## */
> -/* ## ----------------- ## */
> -
> -/* The network provider interface is mostly used for inspecting and configuring
> - * device "metadata", not for sending and receiving packets directly. It may
> - * be impractical to implement these functions on some operating systems and
> - * hardware. These functions may all be NULL in such cases.
> - *
> - * (However, the "dpif-netdev" implementation, which is the easiest way to
> - * integrate Open vSwitch with a new operating system or hardware, does require
> - * the ability to receive packets.) */
>
> - /* Attempts to set up 'netdev' for receiving packets with ->recv().
> - * Returns 0 if successful, otherwise a positive errno value. Return
> + /* Attempts to open a netdev_rx for receiving packets from 'netdev'.
> + * Returns 0 if successful, otherwise a positive errno value. Returns
> * EOPNOTSUPP to indicate that the network device does not implement packet
> * reception through this interface. This function may be set to null if
> * it would always return EOPNOTSUPP anyhow. (This will prevent the
> * network device from being usefully used by the netdev-based "userspace
> - * datapath".)*/
> - int (*listen)(struct netdev *netdev);
> -
> - /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
> - * 'buffer'. If successful, returns the number of bytes in the received
> - * packet, otherwise a negative errno value. Returns -EAGAIN immediately
> - * if no packet is ready to be received.
> - *
> - * Returns -EMSGSIZE, and discards the packet, if the received packet is
> - * longer than 'size' bytes.
> - *
> - * This function can only be expected to return a packet if ->listen() has
> - * been called successfully.
> - *
> - * May be null if not needed, such as for a network device that does not
> - * implement packet reception through the 'recv' member function. */
> - int (*recv)(struct netdev *netdev, void *buffer, size_t size);
> -
> - /* Registers with the poll loop to wake up from the next call to
> - * poll_block() when a packet is ready to be received with netdev_recv() on
> - * 'netdev'.
> + * datapath".)
> *
> - * May be null if not needed, such as for a network device that does not
> - * implement packet reception through the 'recv' member function. */
> - void (*recv_wait)(struct netdev *netdev);
> + * On success, the implementation must set '*rxp' to a 'netdev_rx' for
> + * 'netdev' that it has already initialized (with netdev_rx_init()). */
> + int (*rx_open)(struct netdev *netdev, struct netdev_rx **rxp);
>
> - /* Discards all packets waiting to be received from 'netdev'.
> - *
> - * May be null if not needed, such as for a network device that does not
> - * implement packet reception through the 'recv' member function. */
> - int (*drain)(struct netdev *netdev);
> -
> /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if
> * successful, otherwise a positive errno value. Returns EAGAIN without
> * blocking if the packet cannot be queued immediately. Returns EMSGSIZE
> @@ -591,6 +552,48 @@ struct netdev_class {
> * change, although implementations should try to avoid this. */
> unsigned int (*change_seq)(const struct netdev *netdev);
> };
> +
> +/* A data structure for capturing packets received by a network device.
> + *
> + * This structure should be treated as opaque by network device
> + * implementations. */
> +struct netdev_rx {
> + const struct netdev_rx_class *rx_class;
> + struct netdev_dev *netdev_dev;
> +};
> +
> +void netdev_rx_init(struct netdev_rx *, struct netdev_dev *,
> + const struct netdev_rx_class *);
> +void netdev_rx_uninit(struct netdev_rx *);
> +struct netdev_dev *netdev_rx_get_dev(const struct netdev_rx *);
> +
> +struct netdev_rx_class {
> + /* Destroys 'rx'. */
> + void (*destroy)(struct netdev_rx *rx);
> +
> + /* Attempts to receive a packet from 'rx' into the 'size' bytes in
> + * 'buffer'. If successful, returns the number of bytes in the received
> + * packet, otherwise a negative errno value. Returns -EAGAIN immediately
> + * if no packet is ready to be received.
> + *
> + * Must return -EMSGSIZE, and discard the packet, if the received packet
> + * is longer than 'size' bytes. */
> + int (*recv)(struct netdev_rx *rx, void *buffer, size_t size);
> +
> + /* Registers with the poll loop to wake up from the next call to
> + * poll_block() when a packet is ready to be received with netdev_rx_recv()
> + * on 'rx'. */
> + void (*wait)(struct netdev_rx *rx);
> +
> + /* Discards all packets waiting to be received from 'rx'. */
> + int (*drain)(struct netdev_rx *rx);
> +};
> +
> +static inline void netdev_rx_assert_class(const struct netdev_rx *rx,
> + const struct netdev_rx_class *class_)
> +{
> + ovs_assert(rx->rx_class == class_);
> +}
>
> int netdev_register_provider(const struct netdev_class *);
> int netdev_unregister_provider(const char *type);
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 0711731..c1c3cbb 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -644,10 +644,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats)
> netdev_vport_open, \
> netdev_vport_close, \
> \
> - NULL, /* listen */ \
> - NULL, /* recv */ \
> - NULL, /* recv_wait */ \
> - NULL, /* drain */ \
> + NULL, /* rx_open */ \
> \
> NULL, /* send */ \
> NULL, /* send_wait */ \
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 415cdb4..aa0e012 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -348,49 +348,44 @@ netdev_parse_name(const char *netdev_name_, char **name, char **type)
> }
> }
>
> -/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
> - * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
> - * indicates that the network device does not implement packet reception
> - * through this interface. */
> int
> -netdev_listen(struct netdev *netdev)
> +netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp)
> {
> - int (*listen)(struct netdev *);
> + struct netdev_dev *dev = netdev_get_dev(netdev);
> + int error;
>
> - listen = netdev_get_dev(netdev)->netdev_class->listen;
> - return listen ? (listen)(netdev) : EOPNOTSUPP;
> + error = (dev->netdev_class->rx_open
> + ? dev->netdev_class->rx_open(netdev, rxp)
> + : EOPNOTSUPP);
> + if (!error) {
> + ovs_assert((*rxp)->netdev_dev == dev);
> + dev->ref_cnt++;
> + } else {
> + *rxp = NULL;
> + }
> + return error;
> +}
> +
> +void
> +netdev_rx_close(struct netdev_rx *rx)
> +{
> + if (rx) {
> + struct netdev_dev *dev = rx->netdev_dev;
> +
> + rx->rx_class->destroy(rx);
> + netdev_dev_unref(dev);
> + }
> }
>
> -/* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
> - * must have initialized with sufficient room for the packet. The space
> - * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
> - * bytes, plus the device's MTU (which may be retrieved via netdev_get_mtu()).
> - * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
> - * need not be included.)
> - *
> - * This function can only be expected to return a packet if ->listen() has
> - * been called successfully.
> - *
> - * If a packet is successfully retrieved, returns 0. In this case 'buffer' is
> - * guaranteed to contain at least ETH_TOTAL_MIN bytes. Otherwise, returns a
> - * positive errno value. Returns EAGAIN immediately if no packet is ready to
> - * be returned.
> - *
> - * Some network devices may not implement support for this function. In such
> - * cases this function will always return EOPNOTSUPP. */
> int
> -netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
> +netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
> {
> - int (*recv)(struct netdev *, void *, size_t);
> int retval;
>
> ovs_assert(buffer->size == 0);
> ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
>
> - recv = netdev_get_dev(netdev)->netdev_class->recv;
> - retval = (recv
> - ? (recv)(netdev, buffer->data, ofpbuf_tailroom(buffer))
> - : -EOPNOTSUPP);
> + retval = rx->rx_class->recv(rx, buffer->data, ofpbuf_tailroom(buffer));
> if (retval >= 0) {
> COVERAGE_INC(netdev_received);
> buffer->size += retval;
> @@ -403,27 +398,16 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
> }
> }
>
> -/* Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'. */
> void
> -netdev_recv_wait(struct netdev *netdev)
> +netdev_rx_wait(struct netdev_rx *rx)
> {
> - void (*recv_wait)(struct netdev *);
> -
> - recv_wait = netdev_get_dev(netdev)->netdev_class->recv_wait;
> - if (recv_wait) {
> - recv_wait(netdev);
> - }
> + rx->rx_class->wait(rx);
> }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
> int
> -netdev_drain(struct netdev *netdev)
> +netdev_rx_drain(struct netdev_rx *rx)
> {
> - int (*drain)(struct netdev *);
> -
> - drain = netdev_get_dev(netdev)->netdev_class->drain;
> - return drain ? drain(netdev) : 0;
> + return rx->rx_class->drain ? rx->rx_class->drain(rx) : 0;
> }
>
> /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
> @@ -1459,8 +1443,34 @@ netdev_get_dev(const struct netdev *netdev)
> return netdev->netdev_dev;
> }
>
> -/* Restores all flags that have been saved with netdev_save_flags() and not yet
> - * restored with netdev_restore_flags(). */
> +void
> +netdev_rx_init(struct netdev_rx *rx, struct netdev_dev *dev,
> + const struct netdev_rx_class *class)
> +{
> + ovs_assert(dev->ref_cnt > 0);
> + rx->rx_class = class;
> + rx->netdev_dev = dev;
> +}
> +
> +void
> +netdev_rx_uninit(struct netdev_rx *rx OVS_UNUSED)
> +{
> + /* Nothing to do. */
> +}
> +
> +struct netdev_dev *
> +netdev_rx_get_dev(const struct netdev_rx *rx)
> +{
> + ovs_assert(rx->netdev_dev->ref_cnt > 0);
> + return rx->netdev_dev;
> +}
> +
> +const char *
> +netdev_rx_get_name(const struct netdev_rx *rx)
> +{
> + return netdev_dev_get_name(netdev_rx_get_dev(rx));
> +}
> +
> static void
> restore_all_flags(void *aux OVS_UNUSED)
> {
> diff --git a/lib/netdev.h b/lib/netdev.h
> index 86924aa..852b75d 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -33,6 +33,9 @@ extern "C" {
> * The PORTING file at the top of the source tree has more information in the
> * "Writing a netdev Provider" section. */
>
> +struct netdev;
> +struct netdev_class;
> +struct netdev_rx;
> struct netdev_saved_flags;
> struct ofpbuf;
> struct in_addr;
> @@ -99,9 +102,6 @@ struct netdev_tunnel_config {
> bool dont_fragment;
> };
>
> -struct netdev;
> -struct netdev_class;
> -
> void netdev_run(void);
> void netdev_wait(void);
>
> @@ -127,12 +127,17 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
> int netdev_set_mtu(const struct netdev *, int mtu);
> int netdev_get_ifindex(const struct netdev *);
>
> -/* Packet send and receive. */
> -int netdev_listen(struct netdev *);
> -int netdev_recv(struct netdev *, struct ofpbuf *);
> -void netdev_recv_wait(struct netdev *);
> -int netdev_drain(struct netdev *);
> +/* Packet reception. */
> +int netdev_rx_open(struct netdev *, struct netdev_rx **);
> +void netdev_rx_close(struct netdev_rx *);
> +
> +const char *netdev_rx_get_name(const struct netdev_rx *);
> +
> +int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *);
> +void netdev_rx_wait(struct netdev_rx *);
> +int netdev_rx_drain(struct netdev_rx *);
>
> +/* Packet transmission. */
> int netdev_send(struct netdev *, const struct ofpbuf *);
> void netdev_send_wait(struct netdev *);
>
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list