[ovs-dev] [netdevs 2/4] netdev: Clean up and refactor packet receive interface.

Ben Pfaff blp at nicira.com
Fri Aug 5 21:42:53 UTC 2011


The Open vSwitch tree only has one user of the ability for a netdev to
receive packets from a network device.  Thus, this commit simplifies the
common-case use of the netdev interface by replacing the "ethertype" option
from "struct netdev_options" by a new netdev_listen() call.

The only user of netdev_listen() wants to receive all packets from a
network device, so this commit also removes the ability to restrict the
received packets to a particular protocol.  (This ability was once used by
the Open vSwitch integrated DHCP client, but that code has been removed.)

This commit also simplifies and improves the implementation of the code
in netdev-linux that started listening to a network device.  Before, I had
not figured out how to avoid receiving all packets on all devices before
binding to a particular device, but I took a closer look at the kernel code
and figured it out.

I've tested that the userspace datapath (dpif-netdev), the only user of
netdev_recv(), still works after this change.
---
 lib/dpif-netdev.c     |    9 ++++-
 lib/netdev-dummy.c    |   24 +++++++++--
 lib/netdev-linux.c    |  109 ++++++++++++++++++++++++++-----------------------
 lib/netdev-provider.h |   44 +++++++++++++------
 lib/netdev-vport.c    |    4 +-
 lib/netdev.c          |   28 +++++++++----
 lib/netdev.h          |    8 +---
 ofproto/ofproto.c     |    1 -
 utilities/ovs-dpctl.c |    2 -
 vswitchd/bridge.c     |    2 -
 10 files changed, 138 insertions(+), 93 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 14b9192..fa6b549 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -350,7 +350,6 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     /* Open and validate network device. */
     memset(&netdev_options, 0, sizeof netdev_options);
     netdev_options.name = devname;
-    netdev_options.ethertype = NETDEV_ETH_TYPE_ANY;
     if (dp->class == &dpif_dummy_class) {
         netdev_options.type = "dummy";
     } else if (internal) {
@@ -364,6 +363,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     /* XXX reject loopback devices */
     /* XXX reject non-Ethernet devices */
 
+    error = netdev_listen(netdev);
+    if (error) {
+        VLOG_ERR("%s: cannot receive packets on this network device (%s)",
+                 devname, strerror(errno));
+        netdev_close(netdev);
+        return error;
+    }
+
     error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
     if (error) {
         netdev_close(netdev);
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 9cd06f1..15d97cf 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Nicira Networks.
+ * Copyright (c) 2010, 2011 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,8 +102,7 @@ netdev_dummy_destroy(struct netdev_dev *netdev_dev_)
 }
 
 static int
-netdev_dummy_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
-                  struct netdev **netdevp)
+netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 {
     struct netdev_dummy *netdev;
 
@@ -122,6 +121,22 @@ netdev_dummy_close(struct netdev *netdev_)
 }
 
 static int
+netdev_dummy_listen(struct netdev *netdev_ OVS_UNUSED)
+{
+    /* It's OK to listen on a dummy device.  It just never receives any
+     * packets. */
+    return 0;
+}
+
+static int
+netdev_dummy_recv(struct netdev *netdev_ OVS_UNUSED,
+                  void *buffer OVS_UNUSED, size_t size OVS_UNUSED)
+{
+    /* A dummy device never receives any packets. */
+    return -EAGAIN;
+}
+
+static int
 netdev_dummy_set_etheraddr(struct netdev *netdev,
                            const uint8_t mac[ETH_ADDR_LEN])
 {
@@ -234,7 +249,8 @@ static const struct netdev_class dummy_class = {
 
     NULL,                       /* enumerate */
 
-    NULL,                       /* recv */
+    netdev_dummy_listen,        /* listen */
+    netdev_dummy_recv,          /* recv */
     NULL,                       /* recv_wait */
     NULL,                       /* drain */
 
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 385c0b8..ac511f0 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -639,8 +639,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_)
 }
 
 static int
-netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
-                  struct netdev **netdevp)
+netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 {
     struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_);
     struct netdev_linux *netdev;
@@ -676,54 +675,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
          * directions appearing to be reversed. */
         netdev->fd = netdev_dev->state.tap.fd;
         netdev_dev->state.tap.opened = true;
-    } else if (ethertype != NETDEV_ETH_TYPE_NONE) {
-        struct sockaddr_ll sll;
-        int protocol;
-        int ifindex;
-
-        /* Create file descriptor. */
-        protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
-                    : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
-                    : ethertype);
-        netdev->fd = socket(PF_PACKET, SOCK_RAW,
-                            (OVS_FORCE int) htons(protocol));
-        if (netdev->fd < 0) {
-            error = errno;
-            goto error;
-        }
-
-        /* Set non-blocking mode. */
-        error = set_nonblocking(netdev->fd);
-        if (error) {
-            goto error;
-        }
-
-        /* Get ethernet device index. */
-        error = get_ifindex(&netdev->netdev, &ifindex);
-        if (error) {
-            goto error;
-        }
-
-        /* Bind to specific ethernet device. */
-        memset(&sll, 0, sizeof sll);
-        sll.sll_family = AF_PACKET;
-        sll.sll_ifindex = ifindex;
-        if (bind(netdev->fd,
-                 (struct sockaddr *) &sll, sizeof sll) < 0) {
-            error = errno;
-            VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_),
-                     strerror(error));
-            goto error;
-        }
-
-        /* Between the socket() and bind() calls above, the socket receives all
-         * packets of the requested type on all system interfaces.  We do not
-         * want to receive that data, but there is no way to avoid it.  So we
-         * must now drain out the receive queue. */
-        error = drain_rcvbuf(netdev->fd);
-        if (error) {
-            goto error;
-        }
     }
 
     *netdevp = &netdev->netdev;
@@ -769,12 +720,67 @@ netdev_linux_enumerate(struct sset *sset)
 }
 
 static int
+netdev_linux_listen(struct netdev *netdev_)
+{
+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+    struct sockaddr_ll sll;
+    int ifindex;
+    int error;
+    int fd;
+
+    if (netdev->fd >= 0) {
+        return 0;
+    }
+
+    /* Create file descriptor. */
+    fd = socket(PF_PACKET, SOCK_RAW, 0);
+    if (fd < 0) {
+        error = errno;
+        VLOG_ERR("failed to create raw socket (%s)", strerror(error));
+        goto error;
+    }
+
+    /* Set non-blocking mode. */
+    error = set_nonblocking(fd);
+    if (error) {
+        goto error;
+    }
+
+    /* Get ethernet device index. */
+    error = get_ifindex(&netdev->netdev, &ifindex);
+    if (error) {
+        goto error;
+    }
+
+    /* Bind to specific ethernet device. */
+    memset(&sll, 0, sizeof sll);
+    sll.sll_family = AF_PACKET;
+    sll.sll_ifindex = ifindex;
+    sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
+    if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
+        error = errno;
+        VLOG_ERR("%s: failed to bind raw socket (%s)",
+                 netdev_get_name(netdev_), strerror(error));
+        goto error;
+    }
+
+    netdev->fd = fd;
+    return 0;
+
+error:
+    if (fd >= 0) {
+        close(fd);
+    }
+    return error;
+}
+
+static int
 netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
     if (netdev->fd < 0) {
-        /* Device was opened with NETDEV_ETH_TYPE_NONE. */
+        /* Device is not listening. */
         return -EAGAIN;
     }
 
@@ -2254,6 +2260,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
                                                                 \
     ENUMERATE,                                                  \
                                                                 \
+    netdev_linux_listen,                                        \
     netdev_linux_recv,                                          \
     netdev_linux_recv_wait,                                     \
     netdev_linux_drain,                                         \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 7bb4eac..093a25d 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -147,14 +147,8 @@ struct netdev_class {
                          const struct shash *args);
 
     /* Attempts to open a network device.  On success, sets 'netdevp'
-     * to the new network device.
-     *
-     * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order
-     * to capture frames of that type received on the device.  It may also be
-     * one of the 'enum netdev_pseudo_ethertype' values to receive frames in
-     * one of those categories. */
-    int (*open)(struct netdev_dev *netdev_dev, int ethertype,
-                struct netdev **netdevp);
+     * to the new network device. */
+    int (*open)(struct netdev_dev *netdev_dev, struct netdev **netdevp);
 
     /* Closes 'netdev'. */
     void (*close)(struct netdev *netdev);
@@ -168,17 +162,39 @@ struct netdev_class {
      * If this netdev class does not support enumeration, this may be a null
      * pointer. */
     int (*enumerate)(struct sset *all_names);
+
+/* ## ----------------- ## */
+/* ## Receiving Packets ## */
+/* ## ----------------- ## */
+
+/* The network provider interface is mostly used for inspecting and configuring
+ * device "metadata", not for sending and receiving packets directly.  It may
+ * be impractical to implement these functions on some operating systems and
+ * hardware.  These functions may all be NULL in such cases.
+ *
+ * (However, the "dpif-netdev" implementation, which is the easiest way to
+ * integrate Open vSwitch with a new operating system or hardware, does require
+ * the ability to receive packets.) */
+
+    /* Attempts to set up 'netdev' for receiving packets with ->recv().
+     * Returns 0 if successful, otherwise a positive errno value.  Return
+     * EOPNOTSUPP to indicate that the network device does not implement packet
+     * reception through this interface.  This function may be set to null if
+     * it would always return EOPNOTSUPP anyhow.  (This will prevent the
+     * network device from being usefully used by the netdev-based "userspace
+     * datapath".)*/
+    int (*listen)(struct netdev *netdev);
 
     /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
      * 'buffer'.  If successful, returns the number of bytes in the received
      * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
      * if no packet is ready to be received.
      *
-     * May return -EOPNOTSUPP if a network device does not implement packet
-     * reception through this interface.  This function may be set to null if
-     * it would always return -EOPNOTSUPP anyhow.  (This will prevent the
-     * network device from being usefully used by the netdev-based "userspace
-     * datapath".) */
+     * This function can only be expected to return a packet if ->listen() has
+     * been called successfully.
+     *
+     * May be null if not needed, such as for a network device that does not
+     * implement packet reception through the 'recv' member function. */
     int (*recv)(struct netdev *netdev, void *buffer, size_t size);
 
     /* Registers with the poll loop to wake up from the next call to
@@ -194,7 +210,7 @@ struct netdev_class {
      * May be null if not needed, such as for a network device that does not
      * implement packet reception through the 'recv' member function. */
     int (*drain)(struct netdev *netdev);
-
+
     /* Sends the 'size'-byte packet in 'buffer' on 'netdev'.  Returns 0 if
      * successful, otherwise a positive errno value.  Returns EAGAIN without
      * blocking if the packet cannot be queued immediately.  Returns EMSGSIZE
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index b9c1bfe..e3e480d 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -258,8 +258,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_)
 }
 
 static int
-netdev_vport_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
-                struct netdev **netdevp)
+netdev_vport_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
 {
     struct netdev_vport *netdev;
 
@@ -937,6 +936,7 @@ config_equal_ipsec(const struct shash *nd_args, const struct shash *args)
                                                             \
     NULL,                       /* enumerate */             \
                                                             \
+    NULL,                       /* listen */                \
     NULL,                       /* recv */                  \
     NULL,                       /* recv_wait */             \
     NULL,                       /* drain */                 \
diff --git a/lib/netdev.c b/lib/netdev.c
index b8592c1..9954929 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -204,12 +204,8 @@ update_device_args(struct netdev_dev *dev, const struct shash *args)
  * to the new network device, otherwise to null.
  *
  * If this is the first time the device has been opened, then create is called
- * before opening.  The device is created using the given type and arguments.
- *
- * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to
- * capture frames of that type received on the device.  It may also be one of
- * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those
- * categories. */
+ * before opening.  The device is created using the given type and
+ * arguments. */
 int
 netdev_open(struct netdev_options *options, struct netdev **netdevp)
 {
@@ -250,8 +246,7 @@ netdev_open(struct netdev_options *options, struct netdev **netdevp)
         return EINVAL;
     }
 
-    error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype,
-                netdevp);
+    error = netdev_dev->netdev_class->open(netdev_dev, netdevp);
 
     if (!error) {
         netdev_dev->ref_cnt++;
@@ -271,7 +266,6 @@ netdev_open_default(const char *name, struct netdev **netdevp)
 
     memset(&options, 0, sizeof options);
     options.name = name;
-    options.ethertype = NETDEV_ETH_TYPE_NONE;
 
     return netdev_open(&options, netdevp);
 }
@@ -387,6 +381,19 @@ netdev_enumerate(struct sset *sset)
     return error;
 }
 
+/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
+ * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
+ * indicates that the network device does not implement packet reception
+ * through this interface. */
+int
+netdev_listen(struct netdev *netdev)
+{
+    int (*listen)(struct netdev *);
+
+    listen = netdev_get_dev(netdev)->netdev_class->listen;
+    return listen ? (listen)(netdev) : EOPNOTSUPP;
+}
+
 /* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
  * must have initialized with sufficient room for the packet.  The space
  * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
@@ -394,6 +401,9 @@ netdev_enumerate(struct sset *sset)
  * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
  * need not be included.)
  *
+ * This function can only be expected to return a packet if ->listen() has
+ * been called successfully.
+ *
  * If a packet is successfully retrieved, returns 0.  In this case 'buffer' is
  * guaranteed to contain at least ETH_TOTAL_MIN bytes.  Otherwise, returns a
  * positive errno value.  Returns EAGAIN immediately if no packet is ready to
diff --git a/lib/netdev.h b/lib/netdev.h
index ba4a8e3..7e16bd3 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -44,12 +44,6 @@ enum netdev_flags {
     NETDEV_LOOPBACK = 0x0004    /* This is a loopback device. */
 };
 
-enum netdev_pseudo_ethertype {
-    NETDEV_ETH_TYPE_NONE = -128, /* Receive no frames. */
-    NETDEV_ETH_TYPE_ANY,         /* Receive all frames. */
-    NETDEV_ETH_TYPE_802_2        /* Receive all IEEE 802.2 frames. */
-};
-
 /* Network device statistics.
  *
  * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */
@@ -85,7 +79,6 @@ struct netdev_options {
     const char *name;
     const char *type;
     const struct shash *args;
-    int ethertype;
 };
 
 struct netdev;
@@ -117,6 +110,7 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
 int netdev_get_ifindex(const struct netdev *);
 
 /* Packet send and receive. */
+int netdev_listen(struct netdev *);
 int netdev_recv(struct netdev *, struct ofpbuf *);
 void netdev_recv_wait(struct netdev *);
 int netdev_drain(struct netdev *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index f40f995..8054d05 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1115,7 +1115,6 @@ ofport_open(const struct ofproto_port *ofproto_port, struct ofp_phy_port *opp)
     memset(&netdev_options, 0, sizeof netdev_options);
     netdev_options.name = ofproto_port->name;
     netdev_options.type = ofproto_port->type;
-    netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
 
     error = netdev_open(&netdev_options, &netdev);
     if (error) {
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 7962c7a..1c31c71 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -232,7 +232,6 @@ do_add_if(int argc OVS_UNUSED, char *argv[])
         options.name = strtok_r(argv[i], ",", &save_ptr);
         options.type = "system";
         options.args = &args;
-        options.ethertype = NETDEV_ETH_TYPE_NONE;
 
         if (!options.name) {
             ovs_error(0, "%s is not a valid network device name", argv[i]);
@@ -384,7 +383,6 @@ show_dpif(struct dpif *dpif)
             netdev_options.name = dpif_port.name;
             netdev_options.type = dpif_port.type;
             netdev_options.args = NULL;
-            netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
             error = netdev_open(&netdev_options, &netdev);
             if (!error) {
                 const struct shash_node **nodes;
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 9b30791..f43902b 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -859,7 +859,6 @@ bridge_add_ofproto_ports(struct bridge *br)
                 options.name = iface->name;
                 options.type = iface->type;
                 options.args = &args;
-                options.ethertype = NETDEV_ETH_TYPE_NONE;
                 error = netdev_open(&options, &iface->netdev);
             } else {
                 error = netdev_set_config(iface->netdev, &args);
@@ -925,7 +924,6 @@ bridge_add_ofproto_ports(struct bridge *br)
                 options.name = port->name;
                 options.type = "internal";
                 options.args = NULL;
-                options.ethertype = NETDEV_ETH_TYPE_NONE;
                 error = netdev_open(&options, &netdev);
                 if (!error) {
                     ofproto_port_add(br->ofproto, netdev, NULL);
-- 
1.7.4.4




More information about the dev mailing list