[ovs-dev] [netdev v3 2/4] netdev: Add new "struct netdev_rx" for capturing packets from a netdev.

Ben Pfaff blp at nicira.com
Thu May 9 22:35:07 UTC 2013


Separating packet capture from "struct netdev" means that there is no
remaining per-"struct netdev" state, which will allow us to get rid of
"struct netdev_dev" (by renaming it to "struct netdev").

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 lib/dpif-netdev.c     |   13 ++-
 lib/netdev-bsd.c      |  309 ++++++++++++++++++++++++++-----------------------
 lib/netdev-dummy.c    |   95 ++++++++++------
 lib/netdev-linux.c    |  216 +++++++++++++++++++----------------
 lib/netdev-provider.h |   93 ++++++++-------
 lib/netdev-vport.c    |    5 +-
 lib/netdev.c          |  107 +++++++++--------
 lib/netdev.h          |   21 ++--
 8 files changed, 469 insertions(+), 390 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 40f59c3..78bdedb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -107,6 +107,7 @@ struct dp_netdev_port {
     struct list node;           /* Element in dp_netdev's 'port_list'. */
     struct netdev *netdev;
     struct netdev_saved_flags *sf;
+    struct netdev_rx *rx;
     char *type;                 /* Port type as requested by user. */
 };
 
@@ -378,6 +379,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     struct netdev_saved_flags *sf;
     struct dp_netdev_port *port;
     struct netdev *netdev;
+    struct netdev_rx *rx;
     const char *open_type;
     int mtu;
     int error;
@@ -393,7 +395,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     /* XXX reject loopback devices */
     /* XXX reject non-Ethernet devices */
 
-    error = netdev_listen(netdev);
+    error = netdev_rx_open(netdev, &rx);
     if (error
         && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
         VLOG_ERR("%s: cannot receive packets on this network device (%s)",
@@ -404,6 +406,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
 
     error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
     if (error) {
+        netdev_rx_close(rx);
         netdev_close(netdev);
         return error;
     }
@@ -412,6 +415,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     port->port_no = port_no;
     port->netdev = netdev;
     port->sf = sf;
+    port->rx = rx;
     port->type = xstrdup(type);
 
     error = netdev_get_mtu(netdev, &mtu);
@@ -509,6 +513,7 @@ do_del_port(struct dp_netdev *dp, uint32_t port_no)
 
     netdev_close(port->netdev);
     netdev_restore_flags(port->sf);
+    netdev_rx_close(port->rx);
     free(port->type);
     free(port);
 
@@ -1063,7 +1068,7 @@ dpif_netdev_run(struct dpif *dpif)
         ofpbuf_clear(&packet);
         ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
 
-        error = netdev_recv(port->netdev, &packet);
+        error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
         if (!error) {
             dp_netdev_port_input(dp, port, &packet);
         } else if (error != EAGAIN && error != EOPNOTSUPP) {
@@ -1083,7 +1088,9 @@ dpif_netdev_wait(struct dpif *dpif)
     struct dp_netdev_port *port;
 
     LIST_FOR_EACH (port, node, &dp->port_list) {
-        netdev_recv_wait(port->netdev);
+        if (port->rx) {
+            netdev_rx_wait(port->rx);
+        }
     }
 }
 
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 7ab9d3e..8950a4c 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -16,6 +16,7 @@
 
 #include <config.h>
 
+#include "netdev-provider.h"
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -41,7 +42,6 @@
 #include "coverage.h"
 #include "dynamic-string.h"
 #include "fatal-signal.h"
-#include "netdev-provider.h"
 #include "ofpbuf.h"
 #include "openflow/openflow.h"
 #include "packets.h"
@@ -57,7 +57,7 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
 
 /*
  * This file implements objects to access interfaces.
- * Externally, interfaces are represented by two structures:
+ * Externally, interfaces are represented by three structures:
  *   + struct netdev_dev, representing a network device,
  *     containing e.g. name and a refcount;
  *     We can have private variables by embedding the
@@ -66,26 +66,36 @@ VLOG_DEFINE_THIS_MODULE(netdev_bsd);
  *
  *   + struct netdev, representing an instance of an open netdev_dev.
  *     The structure contains a pointer to the 'struct netdev'
- *     representing the device. Again, private information
- *     such as file descriptor etc. are stored in our
- *     own struct netdev_bsd which includes a struct netdev.
+ *     representing the device.
+ *
+ *   + struct netdev_rx, which represents a netdev open to capture received
+ *     packets.  Again, private information such as file descriptor etc. are
+ *     stored in our own struct netdev_rx_bsd which includes a struct
+ *     netdev_rx.
  *
- * Both 'struct netdev' and 'struct netdev_dev' are referenced
- * in containers which hold pointers to the data structures.
- * We can reach our own struct netdev_XXX_bsd by putting a
- * struct netdev_XXX within our own struct, and using CONTAINER_OF
- * to access the parent structure.
+ * 'struct netdev', 'struct netdev_dev', and 'struct netdev_rx' are referenced
+ * in containers which hold pointers to the data structures.  We can reach our
+ * own struct netdev_XXX_bsd by putting a struct netdev_XXX within our own
+ * struct, and using CONTAINER_OF to access the parent structure.
  */
 struct netdev_bsd {
     struct netdev netdev;
+};
+
+struct netdev_rx_bsd {
+    struct netdev_rx up;
 
-    int netdev_fd;   /* Selectable file descriptor for the network device.
-                        This descriptor will be used for polling operations */
+    /* Packet capture descriptor for a system network device.
+     * For a tap device this is NULL. */
+    pcap_t *pcap_handle;
 
-    pcap_t *pcap_handle;  /* Packet capture descriptor for a system network
-                             device */
+    /* Selectable file descriptor for the network device.
+     * This descriptor will be used for polling operations. */
+    int fd;
 };
 
+static const struct netdev_rx_class netdev_rx_bsd_class;
+
 struct netdev_dev_bsd {
     struct netdev_dev netdev_dev;
     unsigned int cache_valid;
@@ -98,8 +108,11 @@ struct netdev_dev_bsd {
     int mtu;
     int carrier;
 
-    bool tap_opened;
-    int tap_fd;         /* TAP character device, if any */
+    int tap_fd;         /* TAP character device, if any, otherwise -1. */
+
+    /* Used for sending packets on non-tap devices. */
+    pcap_t *pcap;
+    int fd;
 };
 
 
@@ -169,6 +182,13 @@ netdev_dev_bsd_cast(const struct netdev_dev *netdev_dev)
     return CONTAINER_OF(netdev_dev, struct netdev_dev_bsd, netdev_dev);
 }
 
+static struct netdev_rx_bsd *
+netdev_rx_bsd_cast(const struct netdev_rx *rx)
+{
+    netdev_rx_assert_class(rx, &netdev_rx_bsd_class);
+    return CONTAINER_OF(rx, struct netdev_rx_bsd, up);
+}
+
 /* Initialize the AF_INET socket used for ioctl operations */
 static int
 netdev_bsd_init(void)
@@ -299,6 +319,7 @@ netdev_bsd_create_system(const struct netdev_class *class, const char *name,
     netdev_dev = xzalloc(sizeof *netdev_dev);
     netdev_dev->change_seq = 1;
     netdev_dev_init(&netdev_dev->netdev_dev, name, class);
+    netdev_dev->tap_fd = -1;
     *netdev_devp = &netdev_dev->netdev_dev;
 
     return 0;
@@ -388,10 +409,12 @@ netdev_bsd_destroy(struct netdev_dev *netdev_dev_)
 
     cache_notifier_unref();
 
-    if (netdev_dev->tap_fd >= 0 &&
-            !strcmp(netdev_dev_get_type(netdev_dev_), "tap")) {
+    if (netdev_dev->tap_fd >= 0) {
         destroy_tap(netdev_dev->tap_fd, netdev_dev_get_name(netdev_dev_));
     }
+    if (netdev_dev->pcap) {
+        pcap_close(netdev_dev->pcap);
+    }
     free(netdev_dev);
 }
 
@@ -405,7 +428,6 @@ netdev_bsd_open_system(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 
     /* Allocate network device. */
     netdev = xcalloc(1, sizeof *netdev);
-    netdev->netdev_fd = -1;
     netdev_init(&netdev->netdev, netdev_dev_);
 
     /* Verify that the netdev really exists by attempting to read its flags */
@@ -430,56 +452,35 @@ netdev_bsd_close(struct netdev *netdev_)
 {
     struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
 
-    if (netdev->netdev_fd >= 0 && strcmp(netdev_get_type(netdev_), "tap")) {
-        pcap_close(netdev->pcap_handle);
-    }
-
     free(netdev);
 }
 
 static int
-netdev_bsd_listen(struct netdev *netdev_)
+netdev_bsd_open_pcap(const char *name, pcap_t **pcapp, int *fdp)
 {
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
-    struct netdev_dev_bsd *netdev_dev =
-                              netdev_dev_bsd_cast(netdev_get_dev(netdev_));
-
     char errbuf[PCAP_ERRBUF_SIZE];
-    int error;
-    int fd = -1;
+    pcap_t *pcap = NULL;
     int one = 1;
+    int error;
+    int fd;
 
-    if (netdev->netdev_fd >= 0) {
-        return 0;
-    }
-
-    if (!strcmp(netdev_get_type(netdev_), "tap") &&
-            !netdev_dev->tap_opened) {
-        netdev->netdev_fd = netdev_dev->tap_fd;
-        netdev_dev->tap_opened = true;
-        return 0;
-    }
-
-    /* open the pcap device. The device is opened in non-promiscuous mode
+    /* Open the pcap device.  The device is opened in non-promiscuous mode
      * because the interface flags are manually set by the caller. */
     errbuf[0] = '\0';
-    netdev->pcap_handle = pcap_open_live(netdev_get_name(netdev_), PCAP_SNAPLEN,
-				     0, 1000, errbuf);
-    if (netdev->pcap_handle == NULL) {
-        VLOG_ERR("%s: pcap_open_live failed: %s",
-		netdev_get_name(netdev_), errbuf);
+    pcap = pcap_open_live(name, PCAP_SNAPLEN, 0, 1000, errbuf);
+    if (!pcap) {
+        VLOG_ERR_RL(&rl, "%s: pcap_open_live failed: %s", name, errbuf);
         error = EIO;
         goto error;
-    } else if (errbuf[0] !=  '\0') {
-        VLOG_WARN("%s: pcap_open_live: %s",
-		netdev_get_name(netdev_), errbuf);
+    }
+    if (errbuf[0] != '\0') {
+        VLOG_WARN_RL(&rl, "%s: pcap_open_live: %s", name, errbuf);
     }
 
-    netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev_)));
-
-    /* initialize netdev->netdev_fd */
-    fd = pcap_get_selectable_fd(netdev->pcap_handle);
+    /* Get the underlying fd. */
+    fd = pcap_get_selectable_fd(pcap);
     if (fd == -1) {
+        VLOG_WARN_RL(&rl, "%s: no selectable file descriptor", name);
         error = errno;
         goto error;
     }
@@ -487,39 +488,83 @@ netdev_bsd_listen(struct netdev *netdev_)
     /* Set non-blocking mode. Also the BIOCIMMEDIATE ioctl must be called
      * on the file descriptor returned by pcap_get_selectable_fd to achieve
      * a real non-blocking behaviour.*/
-    error = pcap_setnonblock(netdev->pcap_handle, 1, errbuf);
+    error = pcap_setnonblock(pcap, 1, errbuf);
     if (error == -1) {
         error = errno;
         goto error;
     }
 
-    /* This call assure that reads return immediately upon packet reception.
-     * Otherwise, a read will block until either the kernel buffer becomes
-     * full or a timeout occurs. */
-    if(ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
-        VLOG_ERR("ioctl(BIOCIMMEDIATE) on %s device failed: %s",
-		netdev_get_name(netdev_), strerror(errno));
+    /* This call assure that reads return immediately upon packet
+     * reception.  Otherwise, a read will block until either the kernel
+     * buffer becomes full or a timeout occurs. */
+    if (ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) {
+        VLOG_ERR_RL(&rl, "ioctl(BIOCIMMEDIATE) on %s device failed: %s",
+                    name, strerror(errno));
         error = errno;
         goto error;
     }
 
-    /* Capture only incoming packets */
-    error = pcap_setdirection(netdev->pcap_handle, PCAP_D_IN);
+    /* Capture only incoming packets. */
+    error = pcap_setdirection(pcap, PCAP_D_IN);
     if (error == -1) {
         error = errno;
         goto error;
     }
 
-    netdev->netdev_fd = fd;
+    *pcapp = pcap;
+    *fdp = fd;
     return 0;
 
 error:
-    if (fd >= 0) {
-        close(netdev->netdev_fd);
+    if (pcap) {
+        pcap_close(pcap);
     }
+    *pcapp = NULL;
+    *fdp = -1;
     return error;
 }
 
+static int
+netdev_bsd_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
+{
+    struct netdev_dev_bsd *netdev_dev =
+                              netdev_dev_bsd_cast(netdev_get_dev(netdev_));
+
+    struct netdev_rx_bsd *rx;
+    pcap_t *pcap;
+    int fd;
+
+    if (!strcmp(netdev_get_type(netdev_), "tap")) {
+        pcap = NULL;
+        fd = netdev_dev->tap_fd;
+    } else {
+        int error = netdev_bsd_open_pcap(netdev_get_name(netdev_), &pcap, &fd);
+        if (error) {
+            return error;
+        }
+
+        netdev_dev_bsd_changed(netdev_dev);
+    }
+
+    rx = xmalloc(sizeof *rx);
+    netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_bsd_class);
+    rx->pcap_handle = pcap;
+    rx->fd = fd;
+
+    *rxp = &rx->up;
+    return 0;
+}
+
+static void
+netdev_rx_bsd_destroy(struct netdev_rx *rx_)
+{
+    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
+
+    if (rx->pcap_handle) {
+        pcap_close(rx->pcap_handle);
+    }
+    free(rx);
+}
 
 /* The recv callback of the netdev class returns the number of bytes of the
  * received packet.
@@ -566,24 +611,20 @@ proc_pkt(u_char *args_, const struct pcap_pkthdr *hdr, const u_char *packet)
  * This function attempts to receive a packet from the specified network
  * device. It is assumed that the network device is a system device or a tap
  * device opened as a system one. In this case the read operation is performed
- * on the 'netdev' pcap descriptor.
+ * from rx->pcap.
  */
 static int
-netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
+netdev_rx_bsd_recv_pcap(struct netdev_rx_bsd *rx, void *data, size_t size)
 {
     struct pcap_arg arg;
     int ret;
 
-    if (netdev->netdev_fd < 0) {
-        return -EAGAIN;
-    }
-
     /* prepare the pcap argument to store the packet */
     arg.size = size;
     arg.data = data;
 
     for (;;) {
-        ret = pcap_dispatch(netdev->pcap_handle, 1, proc_pkt, (u_char *)&arg);
+        ret = pcap_dispatch(rx->pcap_handle, 1, proc_pkt, (u_char *)&arg);
 
         if (ret > 0) {
             return arg.retval;	/* arg.retval < 0 is handled in the caller */
@@ -600,25 +641,20 @@ netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size)
 
 /*
  * This function attempts to receive a packet from the specified network
- * device. It is assumed that the network device is a tap device and the
- * 'netdev_fd' member of the 'netdev' structure is initialized with the tap
- * file descriptor.
+ * device. It is assumed that the network device is a tap device and
+ * 'rx->fd' is initialized with the tap file descriptor.
  */
 static int
-netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
+netdev_rx_bsd_recv_tap(struct netdev_rx_bsd *rx, void *data, size_t size)
 {
-    if (netdev->netdev_fd < 0) {
-        return -EAGAIN;
-    }
-
     for (;;) {
-        ssize_t retval = read(netdev->netdev_fd, data, size);
+        ssize_t retval = read(rx->fd, data, size);
         if (retval >= 0) {
             return retval;
         } else if (errno != EINTR) {
             if (errno != EAGAIN) {
                 VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
-                             strerror(errno), netdev->netdev.netdev_dev->name);
+                             strerror(errno), netdev_rx_get_name(&rx->up));
             }
             return -errno;
         }
@@ -626,58 +662,39 @@ netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size)
 }
 
 
-/*
- * According with the nature of the device a different function must be called.
- * If the device is the bridge local port the 'netdev_bsd_recv_tap' function
- * must be called, otherwise the 'netdev_bsd_recv_system' function is called.
- *
- * type!="tap"                                        --->  system device.
- * type=="tap" && netdev_fd == tap_fd                 --->  internal tap device
- * type=="tap" && netdev_fd != tap_fd                 --->  internal tap device
- *                                                          opened as a system
- *                                                          device.
- */
 static int
-netdev_bsd_recv(struct netdev *netdev_, void* data, size_t size)
+netdev_rx_bsd_recv(struct netdev_rx *rx_, void *data, size_t size)
 {
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
-    struct netdev_dev_bsd * netdev_dev =
-        netdev_dev_bsd_cast(netdev_get_dev(netdev_));
+    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
 
-    if (!strcmp(netdev_get_type(netdev_), "tap") &&
-            netdev->netdev_fd == netdev_dev->tap_fd) {
-        return netdev_bsd_recv_tap(netdev, data, size);
-    } else {
-        return netdev_bsd_recv_system(netdev, data, size);
-    }
+    return (rx->pcap_handle
+            ? netdev_rx_bsd_recv_pcap(rx, data, size)
+            : netdev_rx_bsd_recv_tap(rx, data, size));
 }
 
-
 /*
  * Registers with the poll loop to wake up from the next call to poll_block()
- * when a packet is ready to be received with netdev_recv() on 'netdev'.
+ * when a packet is ready to be received with netdev_rx_recv() on 'rx'.
  */
 static void
-netdev_bsd_recv_wait(struct netdev *netdev_)
+netdev_rx_bsd_wait(struct netdev_rx *rx_)
 {
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
+    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
 
-    if (netdev->netdev_fd >= 0) {
-        poll_fd_wait(netdev->netdev_fd, POLLIN);
-    }
+    poll_fd_wait(rx->fd, POLLIN);
 }
 
-/* Discards all packets waiting to be received from 'netdev'. */
+/* Discards all packets waiting to be received from 'rx'. */
 static int
-netdev_bsd_drain(struct netdev *netdev_)
+netdev_rx_bsd_drain(struct netdev_rx *rx_)
 {
     struct ifreq ifr;
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
+    struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_);
 
-    strcpy(ifr.ifr_name, netdev_get_name(netdev_));
-    if (ioctl(netdev->netdev_fd, BIOCFLUSH, &ifr) == -1) {
+    strcpy(ifr.ifr_name, netdev_rx_get_name(rx_));
+    if (ioctl(rx->fd, BIOCFLUSH, &ifr) == -1) {
         VLOG_DBG_RL(&rl, "%s: ioctl(BIOCFLUSH) failed: %s",
-                    netdev_get_name(netdev_), strerror(errno));
+                    netdev_rx_get_name(rx_), strerror(errno));
         return errno;
     }
     return 0;
@@ -690,34 +707,34 @@ netdev_bsd_drain(struct netdev *netdev_)
 static int
 netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
 {
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
-    struct netdev_dev_bsd * netdev_dev =
-        netdev_dev_bsd_cast(netdev_get_dev(netdev_));
+    struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
+    const char *name = netdev_get_name(netdev_);
 
-    if (netdev->netdev_fd < 0) {
-        return EPIPE;
+    if (dev->tap_fd < 0 && !dev->pcap) {
+        int error = netdev_bsd_open_pcap(name, &dev->pcap, &dev->fd);
+        if (error) {
+            return error;
+        }
     }
 
     for (;;) {
         ssize_t retval;
-        if (!strcmp(netdev_get_type(netdev_), "tap") &&
-                netdev_dev->tap_fd == netdev->netdev_fd) {
-            retval = write(netdev->netdev_fd, data, size);
+        if (dev->tap_fd >= 0) {
+            retval = write(dev->tap_fd, data, size);
         } else {
-            retval = pcap_inject(netdev->pcap_handle, data, size);
+            retval = pcap_inject(dev->pcap, data, size);
         }
         if (retval < 0) {
             if (errno == EINTR) {
                 continue;
             } else if (errno != EAGAIN) {
                 VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
-                             netdev_get_name(netdev_), strerror(errno));
+                             name, strerror(errno));
             }
             return errno;
         } else if (retval != size) {
             VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of "
-                         "%zu) on %s", retval, size,
-                         netdev_get_name(netdev_));
+                         "%zu) on %s", retval, size, name);
            return EMSGSIZE;
         } else {
             return 0;
@@ -733,17 +750,16 @@ netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size)
 static void
 netdev_bsd_send_wait(struct netdev *netdev_)
 {
-    struct netdev_bsd *netdev = netdev_bsd_cast(netdev_);
-
-    if (netdev->netdev_fd < 0) { /* Nothing to do. */
-        return;
-    }
+    struct netdev_dev_bsd *dev = netdev_dev_bsd_cast(netdev_get_dev(netdev_));
 
-    if (strcmp(netdev_get_type(netdev_), "tap")) {
-        poll_fd_wait(netdev->netdev_fd, POLLOUT);
-    } else {
+    if (dev->tap_fd >= 0) {
         /* TAP device always accepts packets. */
         poll_immediate_wake();
+    } else if (dev->pcap) {
+        poll_fd_wait(dev->fd, POLLOUT);
+    } else {
+        /* We haven't even tried to send a packet yet. */
+        poll_immediate_wake();
     }
 }
 
@@ -1260,11 +1276,7 @@ const struct netdev_class netdev_bsd_class = {
     netdev_bsd_open_system,
     netdev_bsd_close,
 
-    netdev_bsd_listen,
-
-    netdev_bsd_recv,
-    netdev_bsd_recv_wait,
-    netdev_bsd_drain,
+    netdev_bsd_rx_open,
 
     netdev_bsd_send,
     netdev_bsd_send_wait,
@@ -1321,11 +1333,7 @@ const struct netdev_class netdev_tap_class = {
     netdev_bsd_open_system,
     netdev_bsd_close,
 
-    netdev_bsd_listen,
-
-    netdev_bsd_recv,
-    netdev_bsd_recv_wait,
-    netdev_bsd_drain,
+    netdev_bsd_rx_open,
 
     netdev_bsd_send,
     netdev_bsd_send_wait,
@@ -1367,6 +1375,13 @@ const struct netdev_class netdev_tap_class = {
 
     netdev_bsd_change_seq
 };
+
+static const struct netdev_rx_class netdev_rx_bsd_class = {
+    netdev_rx_bsd_destroy,
+    netdev_rx_bsd_recv,
+    netdev_rx_bsd_wait,
+    netdev_rx_bsd_drain,
+};
 
 
 static void
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index de04f9a..3762d5c 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -49,20 +49,25 @@ struct netdev_dev_dummy {
     struct netdev_stats stats;
     enum netdev_flags flags;
     unsigned int change_seq;
-
-    struct list devs;           /* List of child "netdev_dummy"s. */
     int ifindex;
+
+    struct list rxes;           /* List of child "netdev_rx_dummy"s. */
 };
 
 struct netdev_dummy {
     struct netdev netdev;
-    struct list node;           /* In netdev_dev_dummy's "devs" list. */
+};
+
+struct netdev_rx_dummy {
+    struct netdev_rx up;
+    struct list node;           /* In netdev_dev_dummy's "rxes" list. */
     struct list recv_queue;
-    bool listening;
 };
 
 static struct shash dummy_netdev_devs = SHASH_INITIALIZER(&dummy_netdev_devs);
 
+static const struct netdev_rx_class netdev_rx_dummy_class;
+
 static unixctl_cb_func netdev_dummy_set_admin_state;
 static int netdev_dummy_create(const struct netdev_class *, const char *,
                                struct netdev_dev **);
@@ -93,6 +98,13 @@ netdev_dummy_cast(const struct netdev *netdev)
     return CONTAINER_OF(netdev, struct netdev_dummy, netdev);
 }
 
+static struct netdev_rx_dummy *
+netdev_rx_dummy_cast(const struct netdev_rx *rx)
+{
+    netdev_rx_assert_class(rx, &netdev_rx_dummy_class);
+    return CONTAINER_OF(rx, struct netdev_rx_dummy, up);
+}
+
 static int
 netdev_dummy_create(const struct netdev_class *class, const char *name,
                     struct netdev_dev **netdev_devp)
@@ -112,7 +124,7 @@ netdev_dummy_create(const struct netdev_class *class, const char *name,
     netdev_dev->flags = 0;
     netdev_dev->change_seq = 1;
     netdev_dev->ifindex = -EOPNOTSUPP;
-    list_init(&netdev_dev->devs);
+    list_init(&netdev_dev->rxes);
 
     shash_add(&dummy_netdev_devs, name, netdev_dev);
 
@@ -157,16 +169,12 @@ netdev_dummy_set_config(struct netdev_dev *netdev_dev_,
 static int
 netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 {
-    struct netdev_dev_dummy *netdev_dev = netdev_dev_dummy_cast(netdev_dev_);
     struct netdev_dummy *netdev;
 
     netdev = xmalloc(sizeof *netdev);
     netdev_init(&netdev->netdev, netdev_dev_);
-    list_init(&netdev->recv_queue);
-    netdev->listening = false;
 
     *netdevp = &netdev->netdev;
-    list_push_back(&netdev_dev->devs, &netdev->node);
     return 0;
 }
 
@@ -174,31 +182,37 @@ static void
 netdev_dummy_close(struct netdev *netdev_)
 {
     struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    list_remove(&netdev->node);
-    ofpbuf_list_delete(&netdev->recv_queue);
     free(netdev);
 }
 
 static int
-netdev_dummy_listen(struct netdev *netdev_)
+netdev_dummy_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
 {
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    netdev->listening = true;
+    struct netdev_dev_dummy *dev
+        = netdev_dev_dummy_cast(netdev_get_dev(netdev_));
+    struct netdev_rx_dummy *rx;
+
+    rx = xmalloc(sizeof *rx);
+    netdev_rx_init(&rx->up, &dev->netdev_dev, &netdev_rx_dummy_class);
+    list_push_back(&dev->rxes, &rx->node);
+    list_init(&rx->recv_queue);
+
+    *rxp = &rx->up;
     return 0;
 }
 
 static int
-netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
+netdev_rx_dummy_recv(struct netdev_rx *rx_, void *buffer, size_t size)
 {
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
+    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
     struct ofpbuf *packet;
     size_t packet_size;
 
-    if (list_is_empty(&netdev->recv_queue)) {
+    if (list_is_empty(&rx->recv_queue)) {
         return -EAGAIN;
     }
 
-    packet = ofpbuf_from_list(list_pop_front(&netdev->recv_queue));
+    packet = ofpbuf_from_list(list_pop_front(&rx->recv_queue));
     if (packet->size > size) {
         return -EMSGSIZE;
     }
@@ -211,19 +225,28 @@ netdev_dummy_recv(struct netdev *netdev_, void *buffer, size_t size)
 }
 
 static void
-netdev_dummy_recv_wait(struct netdev *netdev_)
+netdev_rx_dummy_destroy(struct netdev_rx *rx_)
 {
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    if (!list_is_empty(&netdev->recv_queue)) {
+    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
+    list_remove(&rx->node);
+    ofpbuf_list_delete(&rx->recv_queue);
+    free(rx);
+}
+
+static void
+netdev_rx_dummy_wait(struct netdev_rx *rx_)
+{
+    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
+    if (!list_is_empty(&rx->recv_queue)) {
         poll_immediate_wake();
     }
 }
 
 static int
-netdev_dummy_drain(struct netdev *netdev_)
+netdev_rx_dummy_drain(struct netdev_rx *rx_)
 {
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    ofpbuf_list_delete(&netdev->recv_queue);
+    struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
+    ofpbuf_list_delete(&rx->recv_queue);
     return 0;
 }
 
@@ -375,10 +398,7 @@ static const struct netdev_class dummy_class = {
     netdev_dummy_open,
     netdev_dummy_close,
 
-    netdev_dummy_listen,
-    netdev_dummy_recv,
-    netdev_dummy_recv_wait,
-    netdev_dummy_drain,
+    netdev_dummy_rx_open,
 
     netdev_dummy_send,          /* send */
     NULL,                       /* send_wait */
@@ -422,6 +442,13 @@ static const struct netdev_class dummy_class = {
     netdev_dummy_change_seq
 };
 
+static const struct netdev_rx_class netdev_rx_dummy_class = {
+    netdev_rx_dummy_destroy,
+    netdev_rx_dummy_recv,
+    netdev_rx_dummy_wait,
+    netdev_rx_dummy_drain,
+};
+
 static struct ofpbuf *
 eth_from_packet_or_flow(const char *s)
 {
@@ -478,7 +505,7 @@ netdev_dummy_receive(struct unixctl_conn *conn,
 
     n_listeners = 0;
     for (i = 2; i < argc; i++) {
-        struct netdev_dummy *dev;
+        struct netdev_rx_dummy *rx;
         struct ofpbuf *packet;
 
         packet = eth_from_packet_or_flow(argv[i]);
@@ -491,12 +518,10 @@ netdev_dummy_receive(struct unixctl_conn *conn,
         dummy_dev->stats.rx_bytes += packet->size;
 
         n_listeners = 0;
-        LIST_FOR_EACH (dev, node, &dummy_dev->devs) {
-            if (dev->listening) {
-                struct ofpbuf *copy = ofpbuf_clone(packet);
-                list_push_back(&dev->recv_queue, &copy->list_node);
-                n_listeners++;
-            }
+        LIST_FOR_EACH (rx, node, &dummy_dev->rxes) {
+            struct ofpbuf *copy = ofpbuf_clone(packet);
+            list_push_back(&rx->recv_queue, &copy->list_node);
+            n_listeners++;
         }
         ofpbuf_delete(packet);
     }
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 30cd0f6..2e6fedd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -122,7 +122,6 @@ enum {
 
 struct tap_state {
     int fd;
-    bool opened;
 };
 
 /* Traffic control. */
@@ -400,9 +399,16 @@ struct netdev_dev_linux {
 
 struct netdev_linux {
     struct netdev netdev;
+};
+
+struct netdev_rx_linux {
+    struct netdev_rx up;
+    bool is_tap;
     int fd;
 };
 
+static const struct netdev_rx_class netdev_rx_linux_class;
+
 /* Sockets used for ioctl operations. */
 static int af_inet_sock = -1;   /* AF_INET, SOCK_DGRAM. */
 
@@ -442,6 +448,12 @@ is_netdev_linux_class(const struct netdev_class *netdev_class)
     return netdev_class->init == netdev_linux_init;
 }
 
+static bool
+is_tap_netdev(const struct netdev *netdev)
+{
+    return netdev_dev_get_class(netdev_get_dev(netdev)) == &netdev_tap_class;
+}
+
 static struct netdev_dev_linux *
 netdev_dev_linux_cast(const struct netdev_dev *netdev_dev)
 {
@@ -460,6 +472,13 @@ netdev_linux_cast(const struct netdev *netdev)
 
     return CONTAINER_OF(netdev, struct netdev_linux, netdev);
 }
+
+static struct netdev_rx_linux *
+netdev_rx_linux_cast(const struct netdev_rx *rx)
+{
+    netdev_rx_assert_class(rx, &netdev_rx_linux_class);
+    return CONTAINER_OF(rx, struct netdev_rx_linux, up);
+}
 
 static int
 netdev_linux_init(void)
@@ -729,7 +748,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 
     /* Allocate network device. */
     netdev = xzalloc(sizeof *netdev);
-    netdev->fd = -1;
     netdev_init(&netdev->netdev, netdev_dev_);
 
     /* Verify that the device really exists, by attempting to read its flags.
@@ -761,67 +779,65 @@ netdev_linux_close(struct netdev *netdev_)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-    if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) {
-        close(netdev->fd);
-    }
     free(netdev);
 }
 
 static int
-netdev_linux_listen(struct netdev *netdev_)
+netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     struct netdev_dev_linux *netdev_dev =
                                 netdev_dev_linux_cast(netdev_get_dev(netdev_));
-    struct sockaddr_ll sll;
-    int ifindex;
+    bool is_tap = is_tap_netdev(netdev_);
+    struct netdev_rx_linux *rx;
     int error;
     int fd;
 
-    if (netdev->fd >= 0) {
-        return 0;
-    }
+    if (is_tap) {
+        fd = netdev_dev->state.tap.fd;
+    } else {
+        struct sockaddr_ll sll;
+        int ifindex;
 
-    if (!strcmp(netdev_get_type(netdev_), "tap")
-        && !netdev_dev->state.tap.opened) {
-        netdev->fd = netdev_dev->state.tap.fd;
-        netdev_dev->state.tap.opened = true;
-        return 0;
-    }
+        /* Create file descriptor. */
+        fd = socket(PF_PACKET, SOCK_RAW, 0);
+        if (fd < 0) {
+            error = errno;
+            VLOG_ERR("failed to create raw socket (%s)", strerror(error));
+            goto error;
+        }
 
-    /* Create file descriptor. */
-    fd = socket(PF_PACKET, SOCK_RAW, 0);
-    if (fd < 0) {
-        error = errno;
-        VLOG_ERR("failed to create raw socket (%s)", strerror(error));
-        goto error;
-    }
+        /* Set non-blocking mode. */
+        error = set_nonblocking(fd);
+        if (error) {
+            goto error;
+        }
 
-    /* Set non-blocking mode. */
-    error = set_nonblocking(fd);
-    if (error) {
-        goto error;
-    }
+        /* Get ethernet device index. */
+        error = get_ifindex(&netdev->netdev, &ifindex);
+        if (error) {
+            goto error;
+        }
 
-    /* Get ethernet device index. */
-    error = get_ifindex(&netdev->netdev, &ifindex);
-    if (error) {
-        goto error;
+        /* Bind to specific ethernet device. */
+        memset(&sll, 0, sizeof sll);
+        sll.sll_family = AF_PACKET;
+        sll.sll_ifindex = ifindex;
+        sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
+        if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
+            error = errno;
+            VLOG_ERR("%s: failed to bind raw socket (%s)",
+                     netdev_get_name(netdev_), strerror(error));
+            goto error;
+        }
     }
 
-    /* Bind to specific ethernet device. */
-    memset(&sll, 0, sizeof sll);
-    sll.sll_family = AF_PACKET;
-    sll.sll_ifindex = ifindex;
-    sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
-    if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
-        error = errno;
-        VLOG_ERR("%s: failed to bind raw socket (%s)",
-                 netdev_get_name(netdev_), strerror(error));
-        goto error;
-    }
+    rx = xmalloc(sizeof *rx);
+    netdev_rx_init(&rx->up, netdev_get_dev(netdev_), &netdev_rx_linux_class);
+    rx->is_tap = is_tap;
+    rx->fd = fd;
 
-    netdev->fd = fd;
+    *rxp = &rx->up;
     return 0;
 
 error:
@@ -831,63 +847,64 @@ error:
     return error;
 }
 
-static int
-netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
+static void
+netdev_rx_linux_destroy(struct netdev_rx *rx_)
 {
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
 
-    if (netdev->fd < 0) {
-        /* Device is not listening. */
-        return -EAGAIN;
+    if (!rx->is_tap) {
+        close(rx->fd);
     }
+    free(rx);
+}
 
-    for (;;) {
-        ssize_t retval;
+static int
+netdev_rx_linux_recv(struct netdev_rx *rx_, void *data, size_t size)
+{
+    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
+    ssize_t retval;
 
-        retval = (netdev_->netdev_dev->netdev_class == &netdev_tap_class
-                  ? read(netdev->fd, data, size)
-                  : recv(netdev->fd, data, size, MSG_TRUNC));
-        if (retval >= 0) {
-            return retval <= size ? retval : -EMSGSIZE;
-        } else if (errno != EINTR) {
-            if (errno != EAGAIN) {
-                VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
-                             strerror(errno), netdev_get_name(netdev_));
-            }
-            return -errno;
+    do {
+        retval = (rx->is_tap
+                  ? read(rx->fd, data, size)
+                  : recv(rx->fd, data, size, MSG_TRUNC));
+    } while (retval < 0 && errno == EINTR);
+
+    if (retval > size) {
+        return -EMSGSIZE;
+    } else if (retval >= 0) {
+        return retval;
+    } else {
+        if (errno != EAGAIN) {
+            VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
+                         strerror(errno), netdev_rx_get_name(rx_));
         }
+        return -errno;
     }
 }
 
-/* Registers with the poll loop to wake up from the next call to poll_block()
- * when a packet is ready to be received with netdev_recv() on 'netdev'. */
 static void
-netdev_linux_recv_wait(struct netdev *netdev_)
+netdev_rx_linux_wait(struct netdev_rx *rx_)
 {
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-    if (netdev->fd >= 0) {
-        poll_fd_wait(netdev->fd, POLLIN);
-    }
+    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
+    poll_fd_wait(rx->fd, POLLIN);
 }
 
-/* Discards all packets waiting to be received from 'netdev'. */
 static int
-netdev_linux_drain(struct netdev *netdev_)
+netdev_rx_linux_drain(struct netdev_rx *rx_)
 {
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-    if (netdev->fd < 0) {
-        return 0;
-    } else if (!strcmp(netdev_get_type(netdev_), "tap")) {
+    struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
+    if (rx->is_tap) {
         struct ifreq ifr;
-        int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr,
+        int error = netdev_linux_do_ioctl(netdev_rx_get_name(rx_), &ifr,
                                           SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
         if (error) {
             return error;
         }
-        drain_fd(netdev->fd, ifr.ifr_qlen);
+        drain_fd(rx->fd, ifr.ifr_qlen);
         return 0;
     } else {
-        return drain_rcvbuf(netdev->fd);
+        return drain_rcvbuf(rx->fd);
     }
 }
 
@@ -903,11 +920,10 @@ netdev_linux_drain(struct netdev *netdev_)
 static int
 netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
 {
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     for (;;) {
         ssize_t retval;
 
-        if (netdev->fd < 0) {
+        if (!is_tap_netdev(netdev_)) {
             /* Use our AF_PACKET socket to send to this device. */
             struct sockaddr_ll sll;
             struct msghdr msg;
@@ -945,11 +961,14 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
 
             retval = sendmsg(sock, &msg, 0);
         } else {
-            /* Use the netdev's own fd to send to this device.  This is
-             * essential for tap devices, because packets sent to a tap device
-             * with an AF_PACKET socket will loop back to be *received* again
-             * on the tap device. */
-            retval = write(netdev->fd, data, size);
+            /* Use the tap fd to send to this device.  This is essential for
+             * tap devices, because packets sent to a tap device with an
+             * AF_PACKET socket will loop back to be *received* again on the
+             * tap device. */
+            struct netdev_dev_linux *dev
+                = netdev_dev_linux_cast(netdev_get_dev(netdev_));
+
+            retval = write(dev->state.tap.fd, data, size);
         }
 
         if (retval < 0) {
@@ -983,14 +1002,9 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
  * expected to do additional queuing of packets.  Thus, this function is
  * unlikely to ever be used.  It is included for completeness. */
 static void
-netdev_linux_send_wait(struct netdev *netdev_)
+netdev_linux_send_wait(struct netdev *netdev)
 {
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-    if (netdev->fd < 0) {
-        /* Nothing to do. */
-    } else if (strcmp(netdev_get_type(netdev_), "tap")) {
-        poll_fd_wait(netdev->fd, POLLOUT);
-    } else {
+    if (is_tap_netdev(netdev)) {
         /* TAP device always accepts packets.*/
         poll_immediate_wake();
     }
@@ -1018,7 +1032,7 @@ netdev_linux_set_etheraddr(struct netdev *netdev_,
     }
 
     /* Tap devices must be brought down before setting the address. */
-    if (!strcmp(netdev_get_type(netdev_), "tap")) {
+    if (is_tap_netdev(netdev_)) {
         enum netdev_flags flags;
 
         if (!netdev_get_flags(netdev_, &flags) && (flags & NETDEV_UP)) {
@@ -2489,10 +2503,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
     netdev_linux_open,                                          \
     netdev_linux_close,                                         \
                                                                 \
-    netdev_linux_listen,                                        \
-    netdev_linux_recv,                                          \
-    netdev_linux_recv_wait,                                     \
-    netdev_linux_drain,                                         \
+    netdev_linux_rx_open,                                       \
                                                                 \
     netdev_linux_send,                                          \
     netdev_linux_send_wait,                                     \
@@ -2562,6 +2573,13 @@ const struct netdev_class netdev_internal_class =
         netdev_internal_set_stats,
         NULL,                  /* get_features */
         netdev_internal_get_status);
+
+static const struct netdev_rx_class netdev_rx_linux_class = {
+    netdev_rx_linux_destroy,
+    netdev_rx_linux_recv,
+    netdev_rx_linux_wait,
+    netdev_rx_linux_drain,
+};
 
 /* HTB traffic control class. */
 
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 00799b1..bfdcd30 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -148,58 +148,19 @@ struct netdev_class {
 
     /* Closes 'netdev'. */
     void (*close)(struct netdev *netdev);
-
-/* ## ----------------- ## */
-/* ## Receiving Packets ## */
-/* ## ----------------- ## */
-
-/* The network provider interface is mostly used for inspecting and configuring
- * device "metadata", not for sending and receiving packets directly.  It may
- * be impractical to implement these functions on some operating systems and
- * hardware.  These functions may all be NULL in such cases.
- *
- * (However, the "dpif-netdev" implementation, which is the easiest way to
- * integrate Open vSwitch with a new operating system or hardware, does require
- * the ability to receive packets.) */
 
-    /* Attempts to set up 'netdev' for receiving packets with ->recv().
-     * Returns 0 if successful, otherwise a positive errno value.  Return
+    /* Attempts to open a netdev_rx for receiving packets from 'netdev'.
+     * Returns 0 if successful, otherwise a positive errno value.  Returns
      * EOPNOTSUPP to indicate that the network device does not implement packet
      * reception through this interface.  This function may be set to null if
      * it would always return EOPNOTSUPP anyhow.  (This will prevent the
      * network device from being usefully used by the netdev-based "userspace
-     * datapath".)*/
-    int (*listen)(struct netdev *netdev);
-
-    /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
-     * 'buffer'.  If successful, returns the number of bytes in the received
-     * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
-     * if no packet is ready to be received.
-     *
-     * Returns -EMSGSIZE, and discards the packet, if the received packet is
-     * longer than 'size' bytes.
-     *
-     * This function can only be expected to return a packet if ->listen() has
-     * been called successfully.
-     *
-     * May be null if not needed, such as for a network device that does not
-     * implement packet reception through the 'recv' member function. */
-    int (*recv)(struct netdev *netdev, void *buffer, size_t size);
-
-    /* Registers with the poll loop to wake up from the next call to
-     * poll_block() when a packet is ready to be received with netdev_recv() on
-     * 'netdev'.
+     * datapath".)
      *
-     * May be null if not needed, such as for a network device that does not
-     * implement packet reception through the 'recv' member function. */
-    void (*recv_wait)(struct netdev *netdev);
+     * On success, the implementation must set '*rxp' to a 'netdev_rx' for
+     * 'netdev' that it has already initialized (with netdev_rx_init()). */
+    int (*rx_open)(struct netdev *netdev, struct netdev_rx **rxp);
 
-    /* Discards all packets waiting to be received from 'netdev'.
-     *
-     * May be null if not needed, such as for a network device that does not
-     * implement packet reception through the 'recv' member function. */
-    int (*drain)(struct netdev *netdev);
-
     /* Sends the 'size'-byte packet in 'buffer' on 'netdev'.  Returns 0 if
      * successful, otherwise a positive errno value.  Returns EAGAIN without
      * blocking if the packet cannot be queued immediately.  Returns EMSGSIZE
@@ -591,6 +552,48 @@ struct netdev_class {
      * change, although implementations should try to avoid this. */
     unsigned int (*change_seq)(const struct netdev *netdev);
 };
+
+/* A data structure for capturing packets received by a network device.
+ *
+ * This structure should be treated as opaque by network device
+ * implementations. */
+struct netdev_rx {
+    const struct netdev_rx_class *rx_class;
+    struct netdev_dev *netdev_dev;
+};
+
+void netdev_rx_init(struct netdev_rx *, struct netdev_dev *,
+                    const struct netdev_rx_class *);
+void netdev_rx_uninit(struct netdev_rx *);
+struct netdev_dev *netdev_rx_get_dev(const struct netdev_rx *);
+
+struct netdev_rx_class {
+    /* Destroys 'rx'. */
+    void (*destroy)(struct netdev_rx *rx);
+
+    /* Attempts to receive a packet from 'rx' into the 'size' bytes in
+     * 'buffer'.  If successful, returns the number of bytes in the received
+     * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
+     * if no packet is ready to be received.
+     *
+     * Must return -EMSGSIZE, and discard the packet, if the received packet
+     * is longer than 'size' bytes. */
+    int (*recv)(struct netdev_rx *rx, void *buffer, size_t size);
+
+    /* Registers with the poll loop to wake up from the next call to
+     * poll_block() when a packet is ready to be received with netdev_rx_recv()
+     * on 'rx'. */
+    void (*wait)(struct netdev_rx *rx);
+
+    /* Discards all packets waiting to be received from 'rx'. */
+    int (*drain)(struct netdev_rx *rx);
+};
+
+static inline void netdev_rx_assert_class(const struct netdev_rx *rx,
+                                          const struct netdev_rx_class *class_)
+{
+    ovs_assert(rx->rx_class == class_);
+}
 
 int netdev_register_provider(const struct netdev_class *);
 int netdev_unregister_provider(const char *type);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 2dd8a93..9fa5f33 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -629,10 +629,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats)
     netdev_vport_open,                                      \
     netdev_vport_close,                                     \
                                                             \
-    NULL,                       /* listen */                \
-    NULL,                       /* recv */                  \
-    NULL,                       /* recv_wait */             \
-    NULL,                       /* drain */                 \
+    NULL,                       /* rx_open */               \
                                                             \
     NULL,                       /* send */                  \
     NULL,                       /* send_wait */             \
diff --git a/lib/netdev.c b/lib/netdev.c
index 9590f83..628169e 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -346,49 +346,44 @@ netdev_parse_name(const char *netdev_name_, char **name, char **type)
     }
 }
 
-/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
- * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
- * indicates that the network device does not implement packet reception
- * through this interface. */
 int
-netdev_listen(struct netdev *netdev)
+netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp)
 {
-    int (*listen)(struct netdev *);
+    struct netdev_dev *dev = netdev_get_dev(netdev);
+    int error;
 
-    listen = netdev_get_dev(netdev)->netdev_class->listen;
-    return listen ? (listen)(netdev) : EOPNOTSUPP;
+    error = (dev->netdev_class->rx_open
+             ? dev->netdev_class->rx_open(netdev, rxp)
+             : EOPNOTSUPP);
+    if (!error) {
+        ovs_assert((*rxp)->netdev_dev == dev);
+        dev->ref_cnt++;
+    } else {
+        *rxp = NULL;
+    }
+    return error;
+}
+
+void
+netdev_rx_close(struct netdev_rx *rx)
+{
+    if (rx) {
+        struct netdev_dev *dev = rx->netdev_dev;
+
+        rx->rx_class->destroy(rx);
+        netdev_dev_unref(dev);
+    }
 }
 
-/* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
- * must have initialized with sufficient room for the packet.  The space
- * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
- * bytes, plus the device's MTU (which may be retrieved via netdev_get_mtu()).
- * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
- * need not be included.)
- *
- * This function can only be expected to return a packet if ->listen() has
- * been called successfully.
- *
- * If a packet is successfully retrieved, returns 0.  In this case 'buffer' is
- * guaranteed to contain at least ETH_TOTAL_MIN bytes.  Otherwise, returns a
- * positive errno value.  Returns EAGAIN immediately if no packet is ready to
- * be returned.
- *
- * Some network devices may not implement support for this function.  In such
- * cases this function will always return EOPNOTSUPP. */
 int
-netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
+netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
 {
-    int (*recv)(struct netdev *, void *, size_t);
     int retval;
 
     ovs_assert(buffer->size == 0);
     ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
 
-    recv = netdev_get_dev(netdev)->netdev_class->recv;
-    retval = (recv
-              ? (recv)(netdev, buffer->data, ofpbuf_tailroom(buffer))
-              : -EOPNOTSUPP);
+    retval = rx->rx_class->recv(rx, buffer->data, ofpbuf_tailroom(buffer));
     if (retval >= 0) {
         COVERAGE_INC(netdev_received);
         buffer->size += retval;
@@ -401,27 +396,16 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
     }
 }
 
-/* Registers with the poll loop to wake up from the next call to poll_block()
- * when a packet is ready to be received with netdev_recv() on 'netdev'. */
 void
-netdev_recv_wait(struct netdev *netdev)
+netdev_rx_wait(struct netdev_rx *rx)
 {
-    void (*recv_wait)(struct netdev *);
-
-    recv_wait = netdev_get_dev(netdev)->netdev_class->recv_wait;
-    if (recv_wait) {
-        recv_wait(netdev);
-    }
+    rx->rx_class->wait(rx);
 }
 
-/* Discards all packets waiting to be received from 'netdev'. */
 int
-netdev_drain(struct netdev *netdev)
+netdev_rx_drain(struct netdev_rx *rx)
 {
-    int (*drain)(struct netdev *);
-
-    drain = netdev_get_dev(netdev)->netdev_class->drain;
-    return drain ? drain(netdev) : 0;
+    return rx->rx_class->drain ? rx->rx_class->drain(rx) : 0;
 }
 
 /* Sends 'buffer' on 'netdev'.  Returns 0 if successful, otherwise a positive
@@ -1435,7 +1419,6 @@ netdev_uninit(struct netdev *netdev, bool close)
     }
 }
 
-
 /* Returns the class type of 'netdev'.
  *
  * The caller must not free the returned value. */
@@ -1459,8 +1442,34 @@ netdev_get_dev(const struct netdev *netdev)
     return netdev->netdev_dev;
 }
 
-/* Restores all flags that have been saved with netdev_save_flags() and not yet
- * restored with netdev_restore_flags(). */
+void
+netdev_rx_init(struct netdev_rx *rx, struct netdev_dev *dev,
+               const struct netdev_rx_class *class)
+{
+    ovs_assert(dev->ref_cnt > 0);
+    rx->rx_class = class;
+    rx->netdev_dev = dev;
+}
+
+void
+netdev_rx_uninit(struct netdev_rx *rx OVS_UNUSED)
+{
+    /* Nothing to do. */
+}
+
+struct netdev_dev *
+netdev_rx_get_dev(const struct netdev_rx *rx)
+{
+    ovs_assert(rx->netdev_dev->ref_cnt > 0);
+    return rx->netdev_dev;
+}
+
+const char *
+netdev_rx_get_name(const struct netdev_rx *rx)
+{
+    return netdev_dev_get_name(netdev_rx_get_dev(rx));
+}
+
 static void
 restore_all_flags(void *aux OVS_UNUSED)
 {
diff --git a/lib/netdev.h b/lib/netdev.h
index ffa831d..2453882 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -33,6 +33,9 @@ extern "C" {
  * The PORTING file at the top of the source tree has more information in the
  * "Writing a netdev Provider" section. */
 
+struct netdev;
+struct netdev_class;
+struct netdev_rx;
 struct netdev_saved_flags;
 struct ofpbuf;
 struct in_addr;
@@ -97,9 +100,6 @@ struct netdev_tunnel_config {
     bool dont_fragment;
 };
 
-struct netdev;
-struct netdev_class;
-
 void netdev_run(void);
 void netdev_wait(void);
 
@@ -125,12 +125,17 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
 int netdev_set_mtu(const struct netdev *, int mtu);
 int netdev_get_ifindex(const struct netdev *);
 
-/* Packet send and receive. */
-int netdev_listen(struct netdev *);
-int netdev_recv(struct netdev *, struct ofpbuf *);
-void netdev_recv_wait(struct netdev *);
-int netdev_drain(struct netdev *);
+/* Packet reception. */
+int netdev_rx_open(struct netdev *, struct netdev_rx **);
+void netdev_rx_close(struct netdev_rx *);
+
+const char *netdev_rx_get_name(const struct netdev_rx *);
+
+int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *);
+void netdev_rx_wait(struct netdev_rx *);
+int netdev_rx_drain(struct netdev_rx *);
 
+/* Packet transmission. */
 int netdev_send(struct netdev *, const struct ofpbuf *);
 void netdev_send_wait(struct netdev *);
 
-- 
1.7.2.5




More information about the dev mailing list