[ovs-dev] [netdev v5 1/3] netdev: Add new "struct netdev_rx" for capturing packets from a netdev.

Ethan Jackson ethan at nicira.com
Fri May 10 20:30:48 UTC 2013


You may disagree, but I think 'renaming it "struct netdev"' sounds
better than 'renaming it top "struct netdev"'

In netdev_bsd_rx_open(), should we call netdev_dev_bsd_changed() for
both tap devices and non tap devices?

This was pre-existing, but the call to pcap_dispatch() in
netdev_rx_bsd_recv_pcap() needs a space after the cast.

Acked-by: Ethan Jackson <ethan at nicira.com>

On Fri, May 10, 2013 at 11:50 AM, Ben Pfaff <blp at nicira.com> wrote:
> Separating packet capture from "struct netdev" means that there is no
> remaining per-"struct netdev" state, which will allow us to get rid of
> "struct netdev_dev" (by renaming it to "struct netdev").
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/dpif-netdev.c     |   13 ++-
>  lib/netdev-bsd.c      |  309 ++++++++++++++++++++++++++-----------------------
>  lib/netdev-dummy.c    |   95 +++++++++------
>  lib/netdev-linux.c    |  216 ++++++++++++++++++----------------
>  lib/netdev-provider.h |   93 ++++++++-------
>  lib/netdev-vport.c    |    5 +-
>  lib/netdev.c          |  106 +++++++++--------
>  lib/netdev.h          |   21 ++--
>  8 files changed, 469 insertions(+), 389 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 40f59c3..78bdedb 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -107,6 +107,7 @@ struct dp_netdev_port {
>      struct list node;           /* Element in dp_netdev's 'port_list'. */
>      struct netdev *netdev;
>      struct netdev_saved_flags *sf;
> +    struct netdev_rx *rx;
>      char *type;                 /* Port type as requested by user. */
>  };
>
> @@ -378,6 +379,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>      struct netdev_saved_flags *sf;
>      struct dp_netdev_port *port;
>      struct netdev *netdev;
> +    struct netdev_rx *rx;
>      const char *open_type;
>      int mtu;
>      int error;
> @@ -393,7 +395,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>      /* XXX reject loopback devices */
>      /* XXX reject non-Ethernet devices */
>
> -    error = netdev_listen(netdev);
> +    error = netdev_rx_open(netdev, &rx);
>      if (error
>          && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
>          VLOG_ERR("%s: cannot receive packets on this network device (%s)",
> @@ -404,6 +406,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>
>      error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
>      if (error) {
> +        netdev_rx_close(rx);
>          netdev_close(netdev);
>          return error;
>      }
> @@ -412,6 +415,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>      port->port_no = port_no;
>      port->netdev = netdev;
>      port->sf = sf;
> +    port->rx = rx;
>      port->type = xstrdup(type);
>
>      error = netdev_get_mtu(netdev, &mtu);
> @@ -509,6 +513,7 @@ do_del_port(struct dp_netdev *dp, uint32_t port_no)
>
>      netdev_close(port->netdev);
>      netdev_restore_flags(port->sf);
> +    netdev_rx_close(port->rx);
>      free(port->type);
>      free(port);
>
> @@ -1063,7 +1068,7 @@ dpif_netdev_run(struct dpif *dpif)
>          ofpbuf_clear(&packet);
>          ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
>
> -        error = netdev_recv(port->netdev, &packet);
> +        error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
>          if (!error) {
>              dp_netdev_port_input(dp, port, &packet);
>          } else if (error != EAGAIN && error != EOPNOTSUPP) {
> @@ -1083,7 +1088,9 @@ dpif_netdev_wait(struct dpif *dpif)
>      struct dp_netdev_port *port;
>
>      LIST_FOR_EACH (port, node, &dp->port_list) {
> -        netdev_recv_wait(port->netdev);
> +        if (port->rx) {
> +            netdev_rx_wait(port->rx);
> +        }
>      }
>  }
>
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 8b384ba..2e870d4 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -16,6 +16,7 @@
>
>  #include <config.h>
>
> +#include "netdev-provider.h"
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <fcntl.h>
> @@ -41,7 +42,6 @@
>  #include "coverage.h"
>  #include "dynamic-string.h"
>  #include "fatal-signal.h"
> -#include "netdev-provider.h"
>  #include "ofpbuf.h"
>  #include "openflow/openflow.h"
>  #include "packets.h"
> @@ -57,7 +57,7 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
>
>  /*
>   * This file implements objects to access interfaces.
> - * Externally, interfaces are represented by two structures:
> + * Externally, interfaces are represented by three structures:
>   *   + struct netdev_dev, representing a network device,
>   *     containing e.g. name and a refcount;
>   *     We can have private variables by embedding the
> @@ -66,26 +66,36 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
>   *
>   *   + struct netdev, representing an instance of an open netdev_dev.
>   *     The structure contains a pointer to the 'struct netdev'
> - *     representing the device. Again, private information
> - *     such as file descriptor etc. are stored in our
> - *     own struct netdev_bsd which includes a struct netdev.
> + *     representing the device.
> + *
> + *   + struct netdev_rx, which represents a netdev open to capture received
> + *     packets.  Again, private information such as file descriptor etc. are
> + *     stored in our own struct netdev_rx_bsd which includes a struct
> + *     netdev_rx.
>   *
> - * Both 'struct netdev' and 'struct netdev_dev' are referenced
> - * in containers which hold pointers to the data structures.
> - * We can reach our own struct netdev_XXX_bsd by putting a
> - * struct netdev_XXX within our own struct, and using CONTAINER_OF
> - * to access the parent structure.
> + * 'struct netdev', 'struct netdev_dev', and 'struct netdev_rx' are referenced
> + * in containers which hold pointers to the data structures.  We can reach our
> + * own struct netdev_XXX_bsd by putting a struct netdev_XXX within our own
> + * struct, and using CONTAINER_OF to access the parent structure.
>   */
>  struct netdev_bsd {
>      struct netdev netdev;
> +};
> +
> +struct netdev_rx_bsd {
> +    struct netdev_rx up;
>
> -    int netdev_fd;   /* Selectable file descriptor for the network device.
> -                        This descriptor will be used for polling operations */
> +    /* Packet capture descriptor for a system network device.
> +     * For a tap device this is NULL. */
> +    pcap_t *pcap_handle;
>
> -    pcap_t *pcap_handle;  /* Packet capture descriptor for a system network
> -                             device */
> +    /* Selectable file descriptor for the network device.
> +     * This descriptor will be used for polling operations. */
> +    int fd;
>  };
>
> +static const struct netdev_rx_class netdev_rx_bsd_class;
> +
>  struct netdev_dev_bsd {
>      struct netdev_dev netdev_dev;
>      unsigned int cache_valid;
> @@ -98,8 +108,11 @@ struct netdev_dev_bsd {
>      int mtu;
>      int carrier;
>
> -    bool tap_opened;
> -    int tap_fd;         /* TAP character device, if any */
> +    int tap_fd;         /* TAP character device, if any, otherwise -1. */
> +
> +    /* Used for sending packets on non-tap devices. */
> +    pcap_t *pcap;
> +    int fd;
>  };
>
>
> @@ -169,6 +182,13 @@ netdev_dev_bsd_cast(const struct netdev_dev *netdev_dev)
>      return CONTAINER_OF(netdev_dev, struct netdev_dev_bsd, netdev_dev);
>  }
>
> +static struct netdev_rx_bsd *
> +netdev_rx_bsd_cast(const struct netdev_rx *rx)
> +{
> +    netdev_rx_assert_class(rx, &netdev_rx_bsd_class);
> +    return CONTAINER_OF(rx, struct netdev_rx_bsd, up);
> +}
> +
>  /* Initialize the AF_INET socket used for ioctl operations */
>  static int
>  netdev_bsd_init(void)
> @@ -299,6 +319,7 @@ netdev_bsd_create_system(const struct netdev_class *class, const char *name,
>      netdev_dev = xzalloc(sizeof *netdev_dev);
>      netdev_dev->change_seq = 1;
>      netdev_dev_init(&netdev_dev->netdev_dev, name, class);
> +    netdev_dev->tap_fd = -1;
>      *netdev_devp = &netdev_dev->netdev_dev;
>
>      return 0;
> @@ -388,10 +409,12 @@ netdev_bsd_destroy(struct netdev_dev *netdev_dev_)
>
>      cache_notifier_unref();
>
> -    if (netdev_dev->tap_fd >= 0 &&
> -            !strcmp(netdev_dev_get_type(netdev_dev_), "tap")) {
> +    if (netdev_dev->tap_fd >= 0) {
>          destroy_tap(netdev_dev->tap_fd, netdev_dev_get_name(netdev_dev_));
>      }
> +    if (netdev_dev->pcap) {
> +        pcap_close(netdev_dev->pcap);
> +    }
>      free(netdev_dev);
>  }
>
> @@ -405,7 +428,6 @@ netdev_bsd_open_system(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>
>      /* Allocate network device. */
>      netdev = xcalloc(1, sizeof *netdev);
> -    netdev->netdev_fd = -1;
>      netdev_init(&netdev->netdev, netdev_dev_);
>
>      /* Verify that the netdev really exists by attempting to read its flags */
> @@ -430,56 +452,35 @@ netdev_bsd_close(struct netdev *netdev_)
>  {
>      struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
>
> -    if (netdev->netdev_fd >= 0 && strcmp(netdev_get_type(netdev_), "tap")) {
> -        pcap_close(netdev->pcap_handle);
> -    }
> -
>      free(netdev);
>  }
>
>  static int
> -netdev_bsd_listen(struct netdev *netdev_)
> +netdev_bsd_open_pcap(const char *name, pcap_t **pcapp, int *fdp)
>  {
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> -    struct netdev_dev_bsd *netdev_dev =
> -                              netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> -
>      char errbuf[PCAP_ERRBUF_SIZE];
> -    int error;
> -    int fd = -1;
> +    pcap_t *pcap = NULL;
>      int one = 1;
> +    int error;
> +    int fd;
>
> -    if (netdev->netdev_fd >= 0) {
> -        return 0;
> -    }
> -
> -    if (!strcmp(netdev_get_type(netdev_), "tap") &&
> -            !netdev_dev->tap_opened) {
> -        netdev->netdev_fd = netdev_dev->tap_fd;
> -        netdev_dev->tap_opened = true;
> -        return 0;
> -    }
> -
> -    /* open the pcap device. The device is opened in non-promiscuous mode
> +    /* Open the pcap device.  The device is opened in non-promiscuous mode
>       * because the interface flags are manually set by the caller. */
>      errbuf[0] = '\0';
> -    netdev->pcap_handle = pcap_open_live(netdev_get_name(netdev_), PCAP_SNAPLEN,
> -                                    0, 1000, errbuf);
> -    if (netdev->pcap_handle == NULL) {
> -        VLOG_ERR("%s: pcap_open_live failed: %s",
> -               netdev_get_name(netdev_), errbuf);
> +    pcap = pcap_open_live(name, PCAP_SNAPLEN, 0, 1000, errbuf);
> +    if (!pcap) {
> +        VLOG_ERR_RL(&rl, "%s: pcap_open_live failed: %s", name, errbuf);
>          error = EIO;
>          goto error;
> -    } else if (errbuf[0] !=  '\0') {
> -        VLOG_WARN("%s: pcap_open_live: %s",
> -               netdev_get_name(netdev_), errbuf);
> +    }
> +    if (errbuf[0] != '\0') {
> +        VLOG_WARN_RL(&rl, "%s: pcap_open_live: %s", name, errbuf);
>      }
>
> -    netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev_)));
> -
> -    /* initialize netdev->netdev_fd */
> -    fd = pcap_get_selectable_fd(netdev->pcap_handle);
> +    /* Get the underlying fd. */
> +    fd = pcap_get_selectable_fd(pcap);
>      if (fd == -1) {
> +        VLOG_WARN_RL(&rl, "%s: no selectable file descriptor", name);
>          error = errno;
>          goto error;
>      }
> @@ -487,39 +488,83 @@ netdev_bsd_listen(struct netdev *netdev_)
>      /* Set non-blocking mode. Also the BIOCIMMEDIATE ioctl must be called
>       * on the file descriptor returned by pcap_get_selectable_fd to achieve
>       * a real non-blocking behaviour.*/
> -    error = pcap_setnonblock(netdev->pcap_handle, 1, errbuf);
> +    error = pcap_setnonblock(pcap, 1, errbuf);
>      if (error == -1) {
>          error = errno;
>          goto error;
>      }
>
> -    /* This call assure that reads return immediately upon packet reception.
> -     * Otherwise, a read will block until either the kernel buffer becomes
> -     * full or a timeout occurs. */
> -    if(ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
> -        VLOG_ERR("ioctl(BIOCIMMEDIATE) on %s device failed: %s",
> -               netdev_get_name(netdev_), strerror(errno));
> +    /* This call assure that reads return immediately upon packet
> +     * reception.  Otherwise, a read will block until either the kernel
> +     * buffer becomes full or a timeout occurs. */
> +    if (ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
> +        VLOG_ERR_RL(&rl, "ioctl(BIOCIMMEDIATE) on %s device failed: %s",
> +                    name, strerror(errno));
>          error = errno;
>          goto error;
>      }
>
> -    /* Capture only incoming packets */
> -    error = pcap_setdirection(netdev->pcap_handle, PCAP_D_IN);
> +    /* Capture only incoming packets. */
> +    error = pcap_setdirection(pcap, PCAP_D_IN);
>      if (error == -1) {
>          error = errno;
>          goto error;
>      }
>
> -    netdev->netdev_fd = fd;
> +    *pcapp = pcap;
> +    *fdp = fd;
>      return 0;
>
>  error:
> -    if (fd >= 0) {
> -        close(netdev->netdev_fd);
> +    if (pcap) {
> +        pcap_close(pcap);
>      }
> +    *pcapp = NULL;
> +    *fdp = -1;
>      return error;
>  }
>
> +static int
> +netdev_bsd_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
> +{
> +    struct netdev_dev_bsd *netdev_dev =
> +                              netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> +
> +    struct netdev_rx_bsd *rx;
> +    pcap_t *pcap;
> +    int fd;
> +
> +    if (!strcmp(netdev_get_type(netdev_), "tap")) {
> +        pcap = NULL;
> +        fd = netdev_dev->tap_fd;
> +    } else {
> +        int error = netdev_bsd_open_pcap(netdev_get_name(netdev_), &pcap, &fd);
> +        if (error) {
> +            return error;
> +        }
> +
> +        netdev_dev_bsd_changed(netdev_dev);
> +    }
> +
> +    rx = xmalloc(sizeof *rx);
> +    netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_bsd_class);
> +    rx->pcap_handle = pcap;
> +    rx->fd = fd;
> +
> +    *rxp = &rx->up;
> +    return 0;
> +}
> +
> +static void
> +netdev_rx_bsd_destroy(struct netdev_rx *rx_)
> +{
> +    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
> +
> +    if (rx->pcap_handle) {
> +        pcap_close(rx->pcap_handle);
> +    }
> +    free(rx);
> +}
>
>  /* The recv callback of the netdev class returns the number of bytes of the
>   * received packet.
> @@ -566,24 +611,20 @@ proc_pkt(u_char *args_, const struct pcap_pkthdr *hdr, const u_char *packet)
>   * This function attempts to receive a packet from the specified network
>   * device. It is assumed that the network device is a system device or a tap
>   * device opened as a system one. In this case the read operation is performed
> - * on the 'netdev' pcap descriptor.
> + * from rx->pcap.
>   */
>  static int
> -netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
> +netdev_rx_bsd_recv_pcap(struct netdev_rx_bsd *rx, void *data, size_t size)
>  {
>      struct pcap_arg arg;
>      int ret;
>
> -    if (netdev->netdev_fd < 0) {
> -        return -EAGAIN;
> -    }
> -
>      /* prepare the pcap argument to store the packet */
>      arg.size = size;
>      arg.data = data;
>
>      for (;;) {
> -        ret = pcap_dispatch(netdev->pcap_handle, 1, proc_pkt, (u_char *)&arg);
> +        ret = pcap_dispatch(rx->pcap_handle, 1, proc_pkt, (u_char *)&arg);
>
>          if (ret > 0) {
>              return arg.retval; /* arg.retval < 0 is handled in the caller */
> @@ -600,25 +641,20 @@ netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
>
>  /*
>   * This function attempts to receive a packet from the specified network
> - * device. It is assumed that the network device is a tap device and the
> - * 'netdev_fd' member of the 'netdev' structure is initialized with the tap
> - * file descriptor.
> + * device. It is assumed that the network device is a tap device and
> + * 'rx->fd' is initialized with the tap file descriptor.
>   */
>  static int
> -netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
> +netdev_rx_bsd_recv_tap(struct netdev_rx_bsd *rx, void *data, size_t size)
>  {
> -    if (netdev->netdev_fd < 0) {
> -        return -EAGAIN;
> -    }
> -
>      for (;;) {
> -        ssize_t retval = read(netdev->netdev_fd, data, size);
> +        ssize_t retval = read(rx->fd, data, size);
>          if (retval >= 0) {
>              return retval;
>          } else if (errno != EINTR) {
>              if (errno != EAGAIN) {
>                  VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> -                             strerror(errno), netdev->netdev.netdev_dev->name);
> +                             strerror(errno), netdev_rx_get_name(&rx->up));
>              }
>              return -errno;
>          }
> @@ -626,58 +662,39 @@ netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
>  }
>
>
> -/*
> - * According with the nature of the device a different function must be called.
> - * If the device is the bridge local port the 'netdev_bsd_recv_tap' function
> - * must be called, otherwise the 'netdev_bsd_recv_system' function is called.
> - *
> - * type!="tap"                                        --->  system device.
> - * type=="tap" && netdev_fd == tap_fd                 --->  internal tap device
> - * type=="tap" && netdev_fd != tap_fd                 --->  internal tap device
> - *                                                          opened as a system
> - *                                                          device.
> - */
>  static int
> -netdev_bsd_recv(struct netdev *netdev_, void* data, size_t size)
> +netdev_rx_bsd_recv(struct netdev_rx *rx_, void *data, size_t size)
>  {
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> -    struct netdev_dev_bsd * netdev_dev =
> -        netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> +    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> -    if (!strcmp(netdev_get_type(netdev_), "tap") &&
> -            netdev->netdev_fd == netdev_dev->tap_fd) {
> -        return netdev_bsd_recv_tap(netdev, data, size);
> -    } else {
> -        return netdev_bsd_recv_system(netdev, data, size);
> -    }
> +    return (rx->pcap_handle
> +            ? netdev_rx_bsd_recv_pcap(rx, data, size)
> +            : netdev_rx_bsd_recv_tap(rx, data, size));
>  }
>
> -
>  /*
>   * Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'.
> + * when a packet is ready to be received with netdev_rx_recv() on 'rx'.
>   */
>  static void
> -netdev_bsd_recv_wait(struct netdev *netdev_)
> +netdev_rx_bsd_wait(struct netdev_rx *rx_)
>  {
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> +    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> -    if (netdev->netdev_fd >= 0) {
> -        poll_fd_wait(netdev->netdev_fd, POLLIN);
> -    }
> +    poll_fd_wait(rx->fd, POLLIN);
>  }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
> +/* Discards all packets waiting to be received from 'rx'. */
>  static int
> -netdev_bsd_drain(struct netdev *netdev_)
> +netdev_rx_bsd_drain(struct netdev_rx *rx_)
>  {
>      struct ifreq ifr;
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> +    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
>
> -    strcpy(ifr.ifr_name, netdev_get_name(netdev_));
> -    if (ioctl(netdev->netdev_fd, BIOCFLUSH, &ifr) == -1) {
> +    strcpy(ifr.ifr_name, netdev_rx_get_name(rx_));
> +    if (ioctl(rx->fd, BIOCFLUSH, &ifr) == -1) {
>          VLOG_DBG_RL(&rl, "%s: ioctl(BIOCFLUSH) failed: %s",
> -                    netdev_get_name(netdev_), strerror(errno));
> +                    netdev_rx_get_name(rx_), strerror(errno));
>          return errno;
>      }
>      return 0;
> @@ -690,34 +707,34 @@ netdev_bsd_drain(struct netdev *netdev_)
>  static int
>  netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
>  {
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> -    struct netdev_dev_bsd * netdev_dev =
> -        netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> +    struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
> +    const char *name = netdev_get_name(netdev_);
>
> -    if (netdev->netdev_fd < 0) {
> -        return EPIPE;
> +    if (dev->tap_fd < 0 && !dev->pcap) {
> +        int error = netdev_bsd_open_pcap(name, &dev->pcap, &dev->fd);
> +        if (error) {
> +            return error;
> +        }
>      }
>
>      for (;;) {
>          ssize_t retval;
> -        if (!strcmp(netdev_get_type(netdev_), "tap") &&
> -                netdev_dev->tap_fd == netdev->netdev_fd) {
> -            retval = write(netdev->netdev_fd, data, size);
> +        if (dev->tap_fd >= 0) {
> +            retval = write(dev->tap_fd, data, size);
>          } else {
> -            retval = pcap_inject(netdev->pcap_handle, data, size);
> +            retval = pcap_inject(dev->pcap, data, size);
>          }
>          if (retval < 0) {
>              if (errno == EINTR) {
>                  continue;
>              } else if (errno != EAGAIN) {
>                  VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
> -                             netdev_get_name(netdev_), strerror(errno));
> +                             name, strerror(errno));
>              }
>              return errno;
>          } else if (retval != size) {
>              VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of "
> -                         "%zu) on %s", retval, size,
> -                         netdev_get_name(netdev_));
> +                         "%zu) on %s", retval, size, name);
>             return EMSGSIZE;
>          } else {
>              return 0;
> @@ -733,17 +750,16 @@ netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
>  static void
>  netdev_bsd_send_wait(struct netdev *netdev_)
>  {
> -    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
> -
> -    if (netdev->netdev_fd < 0) { /* Nothing to do. */
> -        return;
> -    }
> +    struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
>
> -    if (strcmp(netdev_get_type(netdev_), "tap")) {
> -        poll_fd_wait(netdev->netdev_fd, POLLOUT);
> -    } else {
> +    if (dev->tap_fd >= 0) {
>          /* TAP device always accepts packets. */
>          poll_immediate_wake();
> +    } else if (dev->pcap) {
> +        poll_fd_wait(dev->fd, POLLOUT);
> +    } else {
> +        /* We haven't even tried to send a packet yet. */
> +        poll_immediate_wake();
>      }
>  }
>
> @@ -1263,11 +1279,7 @@ const struct netdev_class netdev_bsd_class = {
>      netdev_bsd_open_system,
>      netdev_bsd_close,
>
> -    netdev_bsd_listen,
> -
> -    netdev_bsd_recv,
> -    netdev_bsd_recv_wait,
> -    netdev_bsd_drain,
> +    netdev_bsd_rx_open,
>
>      netdev_bsd_send,
>      netdev_bsd_send_wait,
> @@ -1324,11 +1336,7 @@ const struct netdev_class netdev_tap_class = {
>      netdev_bsd_open_system,
>      netdev_bsd_close,
>
> -    netdev_bsd_listen,
> -
> -    netdev_bsd_recv,
> -    netdev_bsd_recv_wait,
> -    netdev_bsd_drain,
> +    netdev_bsd_rx_open,
>
>      netdev_bsd_send,
>      netdev_bsd_send_wait,
> @@ -1370,6 +1378,13 @@ const struct netdev_class netdev_tap_class = {
>
>      netdev_bsd_change_seq
>  };
> +
> +static const struct netdev_rx_class netdev_rx_bsd_class = {
> +    netdev_rx_bsd_destroy,
> +    netdev_rx_bsd_recv,
> +    netdev_rx_bsd_wait,
> +    netdev_rx_bsd_drain,
> +};
>
>
>  static void
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index de04f9a..3762d5c 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -49,20 +49,25 @@ struct netdev_dev_dummy {
>      struct netdev_stats stats;
>      enum netdev_flags flags;
>      unsigned int change_seq;
> -
> -    struct list devs;           /* List of child "netdev_dummy"s. */
>      int ifindex;
> +
> +    struct list rxes;           /* List of child "netdev_rx_dummy"s. */
>  };
>
>  struct netdev_dummy {
>      struct netdev netdev;
> -    struct list node;           /* In netdev_dev_dummy's "devs" list. */
> +};
> +
> +struct netdev_rx_dummy {
> +    struct netdev_rx up;
> +    struct list node;           /* In netdev_dev_dummy's "rxes" list. */
>      struct list recv_queue;
> -    bool listening;
>  };
>
>  static struct shash dummy_netdev_devs = SHASH_INITIALIZER(&dummy_netdev_devs);
>
> +static const struct netdev_rx_class netdev_rx_dummy_class;
> +
>  static unixctl_cb_func netdev_dummy_set_admin_state;
>  static int netdev_dummy_create(const struct netdev_class *, const char *,
>                                 struct netdev_dev **);
> @@ -93,6 +98,13 @@ netdev_dummy_cast(const struct netdev *netdev)
>      return CONTAINER_OF(netdev, struct netdev_dummy, netdev);
>  }
>
> +static struct netdev_rx_dummy *
> +netdev_rx_dummy_cast(const struct netdev_rx *rx)
> +{
> +    netdev_rx_assert_class(rx, &netdev_rx_dummy_class);
> +    return CONTAINER_OF(rx, struct netdev_rx_dummy, up);
> +}
> +
>  static int
>  netdev_dummy_create(const struct netdev_class *class, const char *name,
>                      struct netdev_dev **netdev_devp)
> @@ -112,7 +124,7 @@ netdev_dummy_create(const struct netdev_class *class, const char *name,
>      netdev_dev->flags = 0;
>      netdev_dev->change_seq = 1;
>      netdev_dev->ifindex = -EOPNOTSUPP;
> -    list_init(&netdev_dev->devs);
> +    list_init(&netdev_dev->rxes);
>
>      shash_add(&dummy_netdev_devs, name, netdev_dev);
>
> @@ -157,16 +169,12 @@ netdev_dummy_set_config(struct netdev_dev *netdev_dev_,
>  static int
>  netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>  {
> -    struct netdev_dev_dummy *netdev_dev = netdev_dev_dummy_cast(netdev_dev_);
>      struct netdev_dummy *netdev;
>
>      netdev = xmalloc(sizeof *netdev);
>      netdev_init(&netdev->netdev, netdev_dev_);
> -    list_init(&netdev->recv_queue);
> -    netdev->listening = false;
>
>      *netdevp = &netdev->netdev;
> -    list_push_back(&netdev_dev->devs, &netdev->node);
>      return 0;
>  }
>
> @@ -174,31 +182,37 @@ static void
>  netdev_dummy_close(struct netdev *netdev_)
>  {
>      struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> -    list_remove(&netdev->node);
> -    ofpbuf_list_delete(&netdev->recv_queue);
>      free(netdev);
>  }
>
>  static int
> -netdev_dummy_listen(struct netdev *netdev_)
> +netdev_dummy_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
>  {
> -    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> -    netdev->listening = true;
> +    struct netdev_dev_dummy *dev
> +        = netdev_dev_dummy_cast(netdev_get_dev(netdev_));
> +    struct netdev_rx_dummy *rx;
> +
> +    rx = xmalloc(sizeof *rx);
> +    netdev_rx_init(&rx->up, &dev->netdev_dev, &netdev_rx_dummy_class);
> +    list_push_back(&dev->rxes, &rx->node);
> +    list_init(&rx->recv_queue);
> +
> +    *rxp = &rx->up;
>      return 0;
>  }
>
>  static int
> -netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
> +netdev_rx_dummy_recv(struct netdev_rx *rx_, void *buffer, size_t size)
>  {
> -    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> +    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
>      struct ofpbuf *packet;
>      size_t packet_size;
>
> -    if (list_is_empty(&netdev->recv_queue)) {
> +    if (list_is_empty(&rx->recv_queue)) {
>          return -EAGAIN;
>      }
>
> -    packet = ofpbuf_from_list(list_pop_front(&netdev->recv_queue));
> +    packet = ofpbuf_from_list(list_pop_front(&rx->recv_queue));
>      if (packet->size > size) {
>          return -EMSGSIZE;
>      }
> @@ -211,19 +225,28 @@ netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
>  }
>
>  static void
> -netdev_dummy_recv_wait(struct netdev *netdev_)
> +netdev_rx_dummy_destroy(struct netdev_rx *rx_)
>  {
> -    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> -    if (!list_is_empty(&netdev->recv_queue)) {
> +    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> +    list_remove(&rx->node);
> +    ofpbuf_list_delete(&rx->recv_queue);
> +    free(rx);
> +}
> +
> +static void
> +netdev_rx_dummy_wait(struct netdev_rx *rx_)
> +{
> +    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> +    if (!list_is_empty(&rx->recv_queue)) {
>          poll_immediate_wake();
>      }
>  }
>
>  static int
> -netdev_dummy_drain(struct netdev *netdev_)
> +netdev_rx_dummy_drain(struct netdev_rx *rx_)
>  {
> -    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
> -    ofpbuf_list_delete(&netdev->recv_queue);
> +    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
> +    ofpbuf_list_delete(&rx->recv_queue);
>      return 0;
>  }
>
> @@ -375,10 +398,7 @@ static const struct netdev_class dummy_class = {
>      netdev_dummy_open,
>      netdev_dummy_close,
>
> -    netdev_dummy_listen,
> -    netdev_dummy_recv,
> -    netdev_dummy_recv_wait,
> -    netdev_dummy_drain,
> +    netdev_dummy_rx_open,
>
>      netdev_dummy_send,          /* send */
>      NULL,                       /* send_wait */
> @@ -422,6 +442,13 @@ static const struct netdev_class dummy_class = {
>      netdev_dummy_change_seq
>  };
>
> +static const struct netdev_rx_class netdev_rx_dummy_class = {
> +    netdev_rx_dummy_destroy,
> +    netdev_rx_dummy_recv,
> +    netdev_rx_dummy_wait,
> +    netdev_rx_dummy_drain,
> +};
> +
>  static struct ofpbuf *
>  eth_from_packet_or_flow(const char *s)
>  {
> @@ -478,7 +505,7 @@ netdev_dummy_receive(struct unixctl_conn *conn,
>
>      n_listeners = 0;
>      for (i = 2; i < argc; i++) {
> -        struct netdev_dummy *dev;
> +        struct netdev_rx_dummy *rx;
>          struct ofpbuf *packet;
>
>          packet = eth_from_packet_or_flow(argv[i]);
> @@ -491,12 +518,10 @@ netdev_dummy_receive(struct unixctl_conn *conn,
>          dummy_dev->stats.rx_bytes += packet->size;
>
>          n_listeners = 0;
> -        LIST_FOR_EACH (dev, node, &dummy_dev->devs) {
> -            if (dev->listening) {
> -                struct ofpbuf *copy = ofpbuf_clone(packet);
> -                list_push_back(&dev->recv_queue, &copy->list_node);
> -                n_listeners++;
> -            }
> +        LIST_FOR_EACH (rx, node, &dummy_dev->rxes) {
> +            struct ofpbuf *copy = ofpbuf_clone(packet);
> +            list_push_back(&rx->recv_queue, &copy->list_node);
> +            n_listeners++;
>          }
>          ofpbuf_delete(packet);
>      }
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 30cd0f6..2e6fedd 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -122,7 +122,6 @@ enum {
>
>  struct tap_state {
>      int fd;
> -    bool opened;
>  };
>
>  /* Traffic control. */
> @@ -400,9 +399,16 @@ struct netdev_dev_linux {
>
>  struct netdev_linux {
>      struct netdev netdev;
> +};
> +
> +struct netdev_rx_linux {
> +    struct netdev_rx up;
> +    bool is_tap;
>      int fd;
>  };
>
> +static const struct netdev_rx_class netdev_rx_linux_class;
> +
>  /* Sockets used for ioctl operations. */
>  static int af_inet_sock = -1;   /* AF_INET, SOCK_DGRAM. */
>
> @@ -442,6 +448,12 @@ is_netdev_linux_class(const struct netdev_class *netdev_class)
>      return netdev_class->init == netdev_linux_init;
>  }
>
> +static bool
> +is_tap_netdev(const struct netdev *netdev)
> +{
> +    return netdev_dev_get_class(netdev_get_dev(netdev)) == &netdev_tap_class;
> +}
> +
>  static struct netdev_dev_linux *
>  netdev_dev_linux_cast(const struct netdev_dev *netdev_dev)
>  {
> @@ -460,6 +472,13 @@ netdev_linux_cast(const struct netdev *netdev)
>
>      return CONTAINER_OF(netdev, struct netdev_linux, netdev);
>  }
> +
> +static struct netdev_rx_linux *
> +netdev_rx_linux_cast(const struct netdev_rx *rx)
> +{
> +    netdev_rx_assert_class(rx, &netdev_rx_linux_class);
> +    return CONTAINER_OF(rx, struct netdev_rx_linux, up);
> +}
>
>  static int
>  netdev_linux_init(void)
> @@ -729,7 +748,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>
>      /* Allocate network device. */
>      netdev = xzalloc(sizeof *netdev);
> -    netdev->fd = -1;
>      netdev_init(&netdev->netdev, netdev_dev_);
>
>      /* Verify that the device really exists, by attempting to read its flags.
> @@ -761,67 +779,65 @@ netdev_linux_close(struct netdev *netdev_)
>  {
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>
> -    if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) {
> -        close(netdev->fd);
> -    }
>      free(netdev);
>  }
>
>  static int
> -netdev_linux_listen(struct netdev *netdev_)
> +netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
>  {
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct netdev_dev_linux *netdev_dev =
>                                  netdev_dev_linux_cast(netdev_get_dev(netdev_));
> -    struct sockaddr_ll sll;
> -    int ifindex;
> +    bool is_tap = is_tap_netdev(netdev_);
> +    struct netdev_rx_linux *rx;
>      int error;
>      int fd;
>
> -    if (netdev->fd >= 0) {
> -        return 0;
> -    }
> +    if (is_tap) {
> +        fd = netdev_dev->state.tap.fd;
> +    } else {
> +        struct sockaddr_ll sll;
> +        int ifindex;
>
> -    if (!strcmp(netdev_get_type(netdev_), "tap")
> -        && !netdev_dev->state.tap.opened) {
> -        netdev->fd = netdev_dev->state.tap.fd;
> -        netdev_dev->state.tap.opened = true;
> -        return 0;
> -    }
> +        /* Create file descriptor. */
> +        fd = socket(PF_PACKET, SOCK_RAW, 0);
> +        if (fd < 0) {
> +            error = errno;
> +            VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> +            goto error;
> +        }
>
> -    /* Create file descriptor. */
> -    fd = socket(PF_PACKET, SOCK_RAW, 0);
> -    if (fd < 0) {
> -        error = errno;
> -        VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> -        goto error;
> -    }
> +        /* Set non-blocking mode. */
> +        error = set_nonblocking(fd);
> +        if (error) {
> +            goto error;
> +        }
>
> -    /* Set non-blocking mode. */
> -    error = set_nonblocking(fd);
> -    if (error) {
> -        goto error;
> -    }
> +        /* Get ethernet device index. */
> +        error = get_ifindex(&netdev->netdev, &ifindex);
> +        if (error) {
> +            goto error;
> +        }
>
> -    /* Get ethernet device index. */
> -    error = get_ifindex(&netdev->netdev, &ifindex);
> -    if (error) {
> -        goto error;
> +        /* Bind to specific ethernet device. */
> +        memset(&sll, 0, sizeof sll);
> +        sll.sll_family = AF_PACKET;
> +        sll.sll_ifindex = ifindex;
> +        sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> +        if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> +            error = errno;
> +            VLOG_ERR("%s: failed to bind raw socket (%s)",
> +                     netdev_get_name(netdev_), strerror(error));
> +            goto error;
> +        }
>      }
>
> -    /* Bind to specific ethernet device. */
> -    memset(&sll, 0, sizeof sll);
> -    sll.sll_family = AF_PACKET;
> -    sll.sll_ifindex = ifindex;
> -    sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> -    if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> -        error = errno;
> -        VLOG_ERR("%s: failed to bind raw socket (%s)",
> -                 netdev_get_name(netdev_), strerror(error));
> -        goto error;
> -    }
> +    rx = xmalloc(sizeof *rx);
> +    netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_linux_class);
> +    rx->is_tap = is_tap;
> +    rx->fd = fd;
>
> -    netdev->fd = fd;
> +    *rxp = &rx->up;
>      return 0;
>
>  error:
> @@ -831,63 +847,64 @@ error:
>      return error;
>  }
>
> -static int
> -netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
> +static void
> +netdev_rx_linux_destroy(struct netdev_rx *rx_)
>  {
> -    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> +    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
>
> -    if (netdev->fd < 0) {
> -        /* Device is not listening. */
> -        return -EAGAIN;
> +    if (!rx->is_tap) {
> +        close(rx->fd);
>      }
> +    free(rx);
> +}
>
> -    for (;;) {
> -        ssize_t retval;
> +static int
> +netdev_rx_linux_recv(struct netdev_rx *rx_, void *data, size_t size)
> +{
> +    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> +    ssize_t retval;
>
> -        retval = (netdev_->netdev_dev->netdev_class == &netdev_tap_class
> -                  ? read(netdev->fd, data, size)
> -                  : recv(netdev->fd, data, size, MSG_TRUNC));
> -        if (retval >= 0) {
> -            return retval <= size ? retval : -EMSGSIZE;
> -        } else if (errno != EINTR) {
> -            if (errno != EAGAIN) {
> -                VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> -                             strerror(errno), netdev_get_name(netdev_));
> -            }
> -            return -errno;
> +    do {
> +        retval = (rx->is_tap
> +                  ? read(rx->fd, data, size)
> +                  : recv(rx->fd, data, size, MSG_TRUNC));
> +    } while (retval < 0 && errno == EINTR);
> +
> +    if (retval > size) {
> +        return -EMSGSIZE;
> +    } else if (retval >= 0) {
> +        return retval;
> +    } else {
> +        if (errno != EAGAIN) {
> +            VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
> +                         strerror(errno), netdev_rx_get_name(rx_));
>          }
> +        return -errno;
>      }
>  }
>
> -/* Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'. */
>  static void
> -netdev_linux_recv_wait(struct netdev *netdev_)
> +netdev_rx_linux_wait(struct netdev_rx *rx_)
>  {
> -    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> -    if (netdev->fd >= 0) {
> -        poll_fd_wait(netdev->fd, POLLIN);
> -    }
> +    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> +    poll_fd_wait(rx->fd, POLLIN);
>  }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
>  static int
> -netdev_linux_drain(struct netdev *netdev_)
> +netdev_rx_linux_drain(struct netdev_rx *rx_)
>  {
> -    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> -    if (netdev->fd < 0) {
> -        return 0;
> -    } else if (!strcmp(netdev_get_type(netdev_), "tap")) {
> +    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
> +    if (rx->is_tap) {
>          struct ifreq ifr;
> -        int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr,
> +        int error = netdev_linux_do_ioctl(netdev_rx_get_name(rx_), &ifr,
>                                            SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
>          if (error) {
>              return error;
>          }
> -        drain_fd(netdev->fd, ifr.ifr_qlen);
> +        drain_fd(rx->fd, ifr.ifr_qlen);
>          return 0;
>      } else {
> -        return drain_rcvbuf(netdev->fd);
> +        return drain_rcvbuf(rx->fd);
>      }
>  }
>
> @@ -903,11 +920,10 @@ netdev_linux_drain(struct netdev *netdev_)
>  static int
>  netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
>  {
> -    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      for (;;) {
>          ssize_t retval;
>
> -        if (netdev->fd < 0) {
> +        if (!is_tap_netdev(netdev_)) {
>              /* Use our AF_PACKET socket to send to this device. */
>              struct sockaddr_ll sll;
>              struct msghdr msg;
> @@ -945,11 +961,14 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
>
>              retval = sendmsg(sock, &msg, 0);
>          } else {
> -            /* Use the netdev's own fd to send to this device.  This is
> -             * essential for tap devices, because packets sent to a tap device
> -             * with an AF_PACKET socket will loop back to be *received* again
> -             * on the tap device. */
> -            retval = write(netdev->fd, data, size);
> +            /* Use the tap fd to send to this device.  This is essential for
> +             * tap devices, because packets sent to a tap device with an
> +             * AF_PACKET socket will loop back to be *received* again on the
> +             * tap device. */
> +            struct netdev_dev_linux *dev
> +                = netdev_dev_linux_cast(netdev_get_dev(netdev_));
> +
> +            retval = write(dev->state.tap.fd, data, size);
>          }
>
>          if (retval < 0) {
> @@ -983,14 +1002,9 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
>   * expected to do additional queuing of packets.  Thus, this function is
>   * unlikely to ever be used.  It is included for completeness. */
>  static void
> -netdev_linux_send_wait(struct netdev *netdev_)
> +netdev_linux_send_wait(struct netdev *netdev)
>  {
> -    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> -    if (netdev->fd < 0) {
> -        /* Nothing to do. */
> -    } else if (strcmp(netdev_get_type(netdev_), "tap")) {
> -        poll_fd_wait(netdev->fd, POLLOUT);
> -    } else {
> +    if (is_tap_netdev(netdev)) {
>          /* TAP device always accepts packets.*/
>          poll_immediate_wake();
>      }
> @@ -1018,7 +1032,7 @@ netdev_linux_set_etheraddr(struct netdev *netdev_,
>      }
>
>      /* Tap devices must be brought down before setting the address. */
> -    if (!strcmp(netdev_get_type(netdev_), "tap")) {
> +    if (is_tap_netdev(netdev_)) {
>          enum netdev_flags flags;
>
>          if (!netdev_get_flags(netdev_, &flags) && (flags & NETDEV_UP)) {
> @@ -2489,10 +2503,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
>      netdev_linux_open,                                          \
>      netdev_linux_close,                                         \
>                                                                  \
> -    netdev_linux_listen,                                        \
> -    netdev_linux_recv,                                          \
> -    netdev_linux_recv_wait,                                     \
> -    netdev_linux_drain,                                         \
> +    netdev_linux_rx_open,                                       \
>                                                                  \
>      netdev_linux_send,                                          \
>      netdev_linux_send_wait,                                     \
> @@ -2562,6 +2573,13 @@ const struct netdev_class netdev_internal_class =
>          netdev_internal_set_stats,
>          NULL,                  /* get_features */
>          netdev_internal_get_status);
> +
> +static const struct netdev_rx_class netdev_rx_linux_class = {
> +    netdev_rx_linux_destroy,
> +    netdev_rx_linux_recv,
> +    netdev_rx_linux_wait,
> +    netdev_rx_linux_drain,
> +};
>
>  /* HTB traffic control class. */
>
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 00799b1..bfdcd30 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -148,58 +148,19 @@ struct netdev_class {
>
>      /* Closes 'netdev'. */
>      void (*close)(struct netdev *netdev);
> -
> -/* ## ----------------- ## */
> -/* ## Receiving Packets ## */
> -/* ## ----------------- ## */
> -
> -/* The network provider interface is mostly used for inspecting and configuring
> - * device "metadata", not for sending and receiving packets directly.  It may
> - * be impractical to implement these functions on some operating systems and
> - * hardware.  These functions may all be NULL in such cases.
> - *
> - * (However, the "dpif-netdev" implementation, which is the easiest way to
> - * integrate Open vSwitch with a new operating system or hardware, does require
> - * the ability to receive packets.) */
>
> -    /* Attempts to set up 'netdev' for receiving packets with ->recv().
> -     * Returns 0 if successful, otherwise a positive errno value.  Return
> +    /* Attempts to open a netdev_rx for receiving packets from 'netdev'.
> +     * Returns 0 if successful, otherwise a positive errno value.  Returns
>       * EOPNOTSUPP to indicate that the network device does not implement packet
>       * reception through this interface.  This function may be set to null if
>       * it would always return EOPNOTSUPP anyhow.  (This will prevent the
>       * network device from being usefully used by the netdev-based "userspace
> -     * datapath".)*/
> -    int (*listen)(struct netdev *netdev);
> -
> -    /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
> -     * 'buffer'.  If successful, returns the number of bytes in the received
> -     * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
> -     * if no packet is ready to be received.
> -     *
> -     * Returns -EMSGSIZE, and discards the packet, if the received packet is
> -     * longer than 'size' bytes.
> -     *
> -     * This function can only be expected to return a packet if ->listen() has
> -     * been called successfully.
> -     *
> -     * May be null if not needed, such as for a network device that does not
> -     * implement packet reception through the 'recv' member function. */
> -    int (*recv)(struct netdev *netdev, void *buffer, size_t size);
> -
> -    /* Registers with the poll loop to wake up from the next call to
> -     * poll_block() when a packet is ready to be received with netdev_recv() on
> -     * 'netdev'.
> +     * datapath".)
>       *
> -     * May be null if not needed, such as for a network device that does not
> -     * implement packet reception through the 'recv' member function. */
> -    void (*recv_wait)(struct netdev *netdev);
> +     * On success, the implementation must set '*rxp' to a 'netdev_rx' for
> +     * 'netdev' that it has already initialized (with netdev_rx_init()). */
> +    int (*rx_open)(struct netdev *netdev, struct netdev_rx **rxp);
>
> -    /* Discards all packets waiting to be received from 'netdev'.
> -     *
> -     * May be null if not needed, such as for a network device that does not
> -     * implement packet reception through the 'recv' member function. */
> -    int (*drain)(struct netdev *netdev);
> -
>      /* Sends the 'size'-byte packet in 'buffer' on 'netdev'.  Returns 0 if
>       * successful, otherwise a positive errno value.  Returns EAGAIN without
>       * blocking if the packet cannot be queued immediately.  Returns EMSGSIZE
> @@ -591,6 +552,48 @@ struct netdev_class {
>       * change, although implementations should try to avoid this. */
>      unsigned int (*change_seq)(const struct netdev *netdev);
>  };
> +
> +/* A data structure for capturing packets received by a network device.
> + *
> + * This structure should be treated as opaque by network device
> + * implementations. */
> +struct netdev_rx {
> +    const struct netdev_rx_class *rx_class;
> +    struct netdev_dev *netdev_dev;
> +};
> +
> +void netdev_rx_init(struct netdev_rx *, struct netdev_dev *,
> +                    const struct netdev_rx_class *);
> +void netdev_rx_uninit(struct netdev_rx *);
> +struct netdev_dev *netdev_rx_get_dev(const struct netdev_rx *);
> +
> +struct netdev_rx_class {
> +    /* Destroys 'rx'. */
> +    void (*destroy)(struct netdev_rx *rx);
> +
> +    /* Attempts to receive a packet from 'rx' into the 'size' bytes in
> +     * 'buffer'.  If successful, returns the number of bytes in the received
> +     * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
> +     * if no packet is ready to be received.
> +     *
> +     * Must return -EMSGSIZE, and discard the packet, if the received packet
> +     * is longer than 'size' bytes. */
> +    int (*recv)(struct netdev_rx *rx, void *buffer, size_t size);
> +
> +    /* Registers with the poll loop to wake up from the next call to
> +     * poll_block() when a packet is ready to be received with netdev_rx_recv()
> +     * on 'rx'. */
> +    void (*wait)(struct netdev_rx *rx);
> +
> +    /* Discards all packets waiting to be received from 'rx'. */
> +    int (*drain)(struct netdev_rx *rx);
> +};
> +
> +static inline void netdev_rx_assert_class(const struct netdev_rx *rx,
> +                                          const struct netdev_rx_class *class_)
> +{
> +    ovs_assert(rx->rx_class == class_);
> +}
>
>  int netdev_register_provider(const struct netdev_class *);
>  int netdev_unregister_provider(const char *type);
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 0711731..c1c3cbb 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -644,10 +644,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats)
>      netdev_vport_open,                                      \
>      netdev_vport_close,                                     \
>                                                              \
> -    NULL,                       /* listen */                \
> -    NULL,                       /* recv */                  \
> -    NULL,                       /* recv_wait */             \
> -    NULL,                       /* drain */                 \
> +    NULL,                       /* rx_open */               \
>                                                              \
>      NULL,                       /* send */                  \
>      NULL,                       /* send_wait */             \
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 415cdb4..aa0e012 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -348,49 +348,44 @@ netdev_parse_name(const char *netdev_name_, char **name, char **type)
>      }
>  }
>
> -/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
> - * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
> - * indicates that the network device does not implement packet reception
> - * through this interface. */
>  int
> -netdev_listen(struct netdev *netdev)
> +netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp)
>  {
> -    int (*listen)(struct netdev *);
> +    struct netdev_dev *dev = netdev_get_dev(netdev);
> +    int error;
>
> -    listen = netdev_get_dev(netdev)->netdev_class->listen;
> -    return listen ? (listen)(netdev) : EOPNOTSUPP;
> +    error = (dev->netdev_class->rx_open
> +             ? dev->netdev_class->rx_open(netdev, rxp)
> +             : EOPNOTSUPP);
> +    if (!error) {
> +        ovs_assert((*rxp)->netdev_dev == dev);
> +        dev->ref_cnt++;
> +    } else {
> +        *rxp = NULL;
> +    }
> +    return error;
> +}
> +
> +void
> +netdev_rx_close(struct netdev_rx *rx)
> +{
> +    if (rx) {
> +        struct netdev_dev *dev = rx->netdev_dev;
> +
> +        rx->rx_class->destroy(rx);
> +        netdev_dev_unref(dev);
> +    }
>  }
>
> -/* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
> - * must have initialized with sufficient room for the packet.  The space
> - * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
> - * bytes, plus the device's MTU (which may be retrieved via netdev_get_mtu()).
> - * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
> - * need not be included.)
> - *
> - * This function can only be expected to return a packet if ->listen() has
> - * been called successfully.
> - *
> - * If a packet is successfully retrieved, returns 0.  In this case 'buffer' is
> - * guaranteed to contain at least ETH_TOTAL_MIN bytes.  Otherwise, returns a
> - * positive errno value.  Returns EAGAIN immediately if no packet is ready to
> - * be returned.
> - *
> - * Some network devices may not implement support for this function.  In such
> - * cases this function will always return EOPNOTSUPP. */
>  int
> -netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
> +netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
>  {
> -    int (*recv)(struct netdev *, void *, size_t);
>      int retval;
>
>      ovs_assert(buffer->size == 0);
>      ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
>
> -    recv = netdev_get_dev(netdev)->netdev_class->recv;
> -    retval = (recv
> -              ? (recv)(netdev, buffer->data, ofpbuf_tailroom(buffer))
> -              : -EOPNOTSUPP);
> +    retval = rx->rx_class->recv(rx, buffer->data, ofpbuf_tailroom(buffer));
>      if (retval >= 0) {
>          COVERAGE_INC(netdev_received);
>          buffer->size += retval;
> @@ -403,27 +398,16 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
>      }
>  }
>
> -/* Registers with the poll loop to wake up from the next call to poll_block()
> - * when a packet is ready to be received with netdev_recv() on 'netdev'. */
>  void
> -netdev_recv_wait(struct netdev *netdev)
> +netdev_rx_wait(struct netdev_rx *rx)
>  {
> -    void (*recv_wait)(struct netdev *);
> -
> -    recv_wait = netdev_get_dev(netdev)->netdev_class->recv_wait;
> -    if (recv_wait) {
> -        recv_wait(netdev);
> -    }
> +    rx->rx_class->wait(rx);
>  }
>
> -/* Discards all packets waiting to be received from 'netdev'. */
>  int
> -netdev_drain(struct netdev *netdev)
> +netdev_rx_drain(struct netdev_rx *rx)
>  {
> -    int (*drain)(struct netdev *);
> -
> -    drain = netdev_get_dev(netdev)->netdev_class->drain;
> -    return drain ? drain(netdev) : 0;
> +    return rx->rx_class->drain ? rx->rx_class->drain(rx) : 0;
>  }
>
>  /* Sends 'buffer' on 'netdev'.  Returns 0 if successful, otherwise a positive
> @@ -1459,8 +1443,34 @@ netdev_get_dev(const struct netdev *netdev)
>      return netdev->netdev_dev;
>  }
>
> -/* Restores all flags that have been saved with netdev_save_flags() and not yet
> - * restored with netdev_restore_flags(). */
> +void
> +netdev_rx_init(struct netdev_rx *rx, struct netdev_dev *dev,
> +               const struct netdev_rx_class *class)
> +{
> +    ovs_assert(dev->ref_cnt > 0);
> +    rx->rx_class = class;
> +    rx->netdev_dev = dev;
> +}
> +
> +void
> +netdev_rx_uninit(struct netdev_rx *rx OVS_UNUSED)
> +{
> +    /* Nothing to do. */
> +}
> +
> +struct netdev_dev *
> +netdev_rx_get_dev(const struct netdev_rx *rx)
> +{
> +    ovs_assert(rx->netdev_dev->ref_cnt > 0);
> +    return rx->netdev_dev;
> +}
> +
> +const char *
> +netdev_rx_get_name(const struct netdev_rx *rx)
> +{
> +    return netdev_dev_get_name(netdev_rx_get_dev(rx));
> +}
> +
>  static void
>  restore_all_flags(void *aux OVS_UNUSED)
>  {
> diff --git a/lib/netdev.h b/lib/netdev.h
> index 86924aa..852b75d 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -33,6 +33,9 @@ extern "C" {
>   * The PORTING file at the top of the source tree has more information in the
>   * "Writing a netdev Provider" section. */
>
> +struct netdev;
> +struct netdev_class;
> +struct netdev_rx;
>  struct netdev_saved_flags;
>  struct ofpbuf;
>  struct in_addr;
> @@ -99,9 +102,6 @@ struct netdev_tunnel_config {
>      bool dont_fragment;
>  };
>
> -struct netdev;
> -struct netdev_class;
> -
>  void netdev_run(void);
>  void netdev_wait(void);
>
> @@ -127,12 +127,17 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
>  int netdev_set_mtu(const struct netdev *, int mtu);
>  int netdev_get_ifindex(const struct netdev *);
>
> -/* Packet send and receive. */
> -int netdev_listen(struct netdev *);
> -int netdev_recv(struct netdev *, struct ofpbuf *);
> -void netdev_recv_wait(struct netdev *);
> -int netdev_drain(struct netdev *);
> +/* Packet reception. */
> +int netdev_rx_open(struct netdev *, struct netdev_rx **);
> +void netdev_rx_close(struct netdev_rx *);
> +
> +const char *netdev_rx_get_name(const struct netdev_rx *);
> +
> +int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *);
> +void netdev_rx_wait(struct netdev_rx *);
> +int netdev_rx_drain(struct netdev_rx *);
>
> +/* Packet transmission. */
>  int netdev_send(struct netdev *, const struct ofpbuf *);
>  void netdev_send_wait(struct netdev *);
>
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list