[ovs-dev] [PATCHv2 1/5] netdev: Globally track port status changes

Joe Stringer joestringer at nicira.com
Thu Nov 14 23:28:25 UTC 2013


Previously, we tracked status changes for ofports on a per-device basis.
Each time in the main thread's loop, we would loop through all ofports
and manually check whether the status has changed for corresponding
devices.

This patch shifts change_seq above the netdevice layer, with one atomic
variable tracking status change for all ports. In the average case where
ports are not constantly going up or down, this allows us to check
change_seq once per loop and not poll any ports. In the worst case,
execution is expected to be similar to how it is currently.

As change_seq is no longer tracked per-device, it doesn't make sense to
cache this status in each ofport struct. As such, we shift this into the
ofproto struct.

In a test environment of 5000 internal ports and 50 tunnel ports with
bfd, this reduces CPU usage from about 45% to about 35%.

Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v2: Use 'struct seq' to track changes rather than atomic_int
    Document how netdev implementations should report changes
---
 lib/bond.c                 |    4 ++--
 lib/netdev-bsd.c           |   22 ++++---------------
 lib/netdev-dummy.c         |   25 +++------------------
 lib/netdev-linux.c         |   23 ++------------------
 lib/netdev-provider.h      |   23 ++++++++++----------
 lib/netdev-vport.c         |   20 +++++------------
 lib/netdev.c               |   38 +++++++++++++++++++++++---------
 lib/netdev.h               |    5 ++++-
 ofproto/ofproto-provider.h |    2 +-
 ofproto/ofproto.c          |   52 ++++++++++++++++++++------------------------
 ofproto/tunnel.c           |    4 ++--
 11 files changed, 86 insertions(+), 132 deletions(-)

diff --git a/lib/bond.c b/lib/bond.c
index c202840..fc22a42 100644
--- a/lib/bond.c
+++ b/lib/bond.c
@@ -441,7 +441,7 @@ bond_run(struct bond *bond, enum lacp_status lacp_status)
     /* Enable slaves based on link status and LACP feedback. */
     HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
         bond_link_status_update(slave);
-        slave->change_seq = netdev_change_seq(slave->netdev);
+        slave->change_seq = netdev_change_seq();
     }
     if (!bond->active_slave || !bond->active_slave->enabled) {
         bond_choose_active_slave(bond);
@@ -472,7 +472,7 @@ bond_wait(struct bond *bond)
             poll_timer_wait_until(slave->delay_expires);
         }
 
-        if (slave->change_seq != netdev_change_seq(slave->netdev)) {
+        if (slave->change_seq != netdev_change_seq()) {
             poll_immediate_wake();
         }
     }
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 3eb29ea..ae611fe 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -86,7 +86,6 @@ struct netdev_bsd {
     struct ovs_mutex mutex;
 
     unsigned int cache_valid;
-    unsigned int change_seq;
 
     int ifindex;
     uint8_t etheraddr[ETH_ADDR_LEN];
@@ -198,12 +197,9 @@ netdev_bsd_wait(void)
 }
 
 static void
-netdev_bsd_changed(struct netdev_bsd *dev)
+netdev_bsd_changed(struct netdev_bsd *dev OVS_UNUSED)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify_change();
 }
 
 /* Invalidate cache in case of interface status change. */
@@ -294,7 +290,7 @@ netdev_bsd_construct_system(struct netdev *netdev_)
     }
 
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    netdev_notify_change();
     netdev->tap_fd = -1;
     netdev->kernel_name = xstrdup(netdev_->name);
 
@@ -329,7 +325,7 @@ netdev_bsd_construct_tap(struct netdev *netdev_)
      * to retrieve the name of the tap device. */
     ovs_mutex_init(&netdev->mutex);
     netdev->tap_fd = open("/dev/tap", O_RDWR);
-    netdev->change_seq = 1;
+    netdev_notify_change();
     if (netdev->tap_fd < 0) {
         error = errno;
         VLOG_WARN("opening \"/dev/tap\" failed: %s", ovs_strerror(error));
@@ -1470,12 +1466,6 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off,
     return error;
 }
 
-static unsigned int
-netdev_bsd_change_seq(const struct netdev *netdev)
-{
-    return netdev_bsd_cast(netdev)->change_seq;
-}
-
 
 const struct netdev_class netdev_bsd_class = {
     "system",
@@ -1531,8 +1521,6 @@ const struct netdev_class netdev_bsd_class = {
 
     netdev_bsd_update_flags,
 
-    netdev_bsd_change_seq,
-
     netdev_bsd_rx_alloc,
     netdev_bsd_rx_construct,
     netdev_bsd_rx_destruct,
@@ -1596,8 +1584,6 @@ const struct netdev_class netdev_tap_class = {
 
     netdev_bsd_update_flags,
 
-    netdev_bsd_change_seq,
-
     netdev_bsd_rx_alloc,
     netdev_bsd_rx_construct,
     netdev_bsd_rx_destruct,
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index fd30454..d008961 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -66,7 +66,6 @@ struct netdev_dummy {
     int mtu OVS_GUARDED;
     struct netdev_stats stats OVS_GUARDED;
     enum netdev_flags flags OVS_GUARDED;
-    unsigned int change_seq OVS_GUARDED;
     int ifindex OVS_GUARDED;
 
     struct pstream *pstream OVS_GUARDED;
@@ -285,7 +284,7 @@ netdev_dummy_construct(struct netdev *netdev_)
     netdev->hwaddr[5] = n;
     netdev->mtu = 1500;
     netdev->flags = 0;
-    netdev->change_seq = 1;
+    netdev_dummy_changed(netdev);
     netdev->ifindex = -EOPNOTSUPP;
 
     netdev->pstream = NULL;
@@ -686,29 +685,13 @@ netdev_dummy_update_flags(struct netdev *netdev_,
 
     return error;
 }
-
-static unsigned int
-netdev_dummy_change_seq(const struct netdev *netdev_)
-{
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    unsigned int change_seq;
-
-    ovs_mutex_lock(&netdev->mutex);
-    change_seq = netdev->change_seq;
-    ovs_mutex_unlock(&netdev->mutex);
-
-    return change_seq;
-}
 
 /* Helper functions. */
 
 static void
-netdev_dummy_changed(struct netdev_dummy *dev)
+netdev_dummy_changed(struct netdev_dummy *dev OVS_UNUSED)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify();
 }
 
 static const struct netdev_class dummy_class = {
@@ -766,8 +749,6 @@ static const struct netdev_class dummy_class = {
 
     netdev_dummy_update_flags,
 
-    netdev_dummy_change_seq,
-
     netdev_dummy_rx_alloc,
     netdev_dummy_rx_construct,
     netdev_dummy_rx_destruct,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7e75144..9106355 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -357,7 +357,6 @@ struct netdev_linux {
     struct ovs_mutex mutex;
 
     unsigned int cache_valid;
-    unsigned int change_seq;
 
     bool miimon;                    /* Link status of last poll. */
     long long int miimon_interval;  /* Miimon Poll rate. Disabled if <= 0. */
@@ -585,10 +584,7 @@ netdev_linux_changed(struct netdev_linux *dev,
                      unsigned int ifi_flags, unsigned int mask)
     OVS_REQUIRES(dev->mutex)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify();
 
     if ((dev->ifi_flags ^ ifi_flags) & IFF_RUNNING) {
         dev->carrier_resets++;
@@ -640,7 +636,7 @@ static void
 netdev_linux_common_construct(struct netdev_linux *netdev)
 {
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    netdev_notify();
 }
 
 /* Creates system and internal devices. */
@@ -2598,19 +2594,6 @@ netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off,
     return error;
 }
 
-static unsigned int
-netdev_linux_change_seq(const struct netdev *netdev_)
-{
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-    unsigned int change_seq;
-
-    ovs_mutex_lock(&netdev->mutex);
-    change_seq = netdev->change_seq;
-    ovs_mutex_unlock(&netdev->mutex);
-
-    return change_seq;
-}
-
 #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, SET_STATS,  \
                            GET_FEATURES, GET_STATUS)            \
 {                                                               \
@@ -2669,8 +2652,6 @@ netdev_linux_change_seq(const struct netdev *netdev_)
                                                                 \
     netdev_linux_update_flags,                                  \
                                                                 \
-    netdev_linux_change_seq,                                    \
-                                                                \
     netdev_linux_rx_alloc,                                      \
     netdev_linux_rx_construct,                                  \
     netdev_linux_rx_destruct,                                   \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 9ab58fb..618b7bd 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -148,7 +148,17 @@ struct netdev *netdev_rx_get_netdev(const struct netdev_rx *);
  * Each "dealloc" function frees raw memory that was allocated by the the
  * "alloc" function.  The memory's base and derived members might not have ever
  * been initialized (but if "construct" returned successfully, then it has been
- * "destruct"ed already).  The "dealloc" function is not allowed to fail. */
+ * "destruct"ed already).  The "dealloc" function is not allowed to fail.
+ *
+ *
+ * Change Notification
+ * ===================
+ *
+ * Minimally, implementations are required to report changes to netdev flags,
+ * features, ethernet address or carrier through netdev_notify(). Changes to
+ * other properties are allowed to cause notification through this interface,
+ * although implementations should try to avoid this. netdev.c contains more
+ * information about this notification interface. */
 struct netdev_class {
     /* Type of netdevs in this class, e.g. "system", "tap", "gre", etc.
      *
@@ -609,17 +619,6 @@ struct netdev_class {
     int (*update_flags)(struct netdev *netdev, enum netdev_flags off,
                         enum netdev_flags on, enum netdev_flags *old_flags);
 
-    /* Returns a sequence number which indicates changes in one of 'netdev''s
-     * properties.  The returned sequence number must be nonzero so that
-     * callers have a value which they may use as a reset when tracking
-     * 'netdev'.
-     *
-     * Minimally, the returned sequence number is required to change whenever
-     * 'netdev''s flags, features, ethernet address, or carrier changes.  The
-     * returned sequence number is allowed to change even when 'netdev' doesn't
-     * change, although implementations should try to avoid this. */
-    unsigned int (*change_seq)(const struct netdev *netdev);
-
 /* ## ------------------- ## */
 /* ## netdev_rx Functions ## */
 /* ## ------------------- ## */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index ae4f626..9d4e1cb 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -52,7 +52,6 @@ struct netdev_vport {
     /* Protects all members below. */
     struct ovs_mutex mutex;
 
-    unsigned int change_seq;
     uint8_t etheraddr[ETH_ADDR_LEN];
     struct netdev_stats stats;
 
@@ -172,8 +171,10 @@ netdev_vport_construct(struct netdev *netdev_)
     struct netdev_vport *netdev = netdev_vport_cast(netdev_);
 
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    ovs_mutex_lock(&netdev->mutex);
+    netdev_vport_changed(netdev);
     eth_addr_random(netdev->etheraddr);
+    ovs_mutex_unlock(&netdev->mutex);
 
     route_table_register();
 
@@ -264,12 +265,6 @@ netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
     return 0;
 }
 
-static unsigned int
-netdev_vport_change_seq(const struct netdev *netdev)
-{
-    return netdev_vport_cast(netdev)->change_seq;
-}
-
 static void
 netdev_vport_run(void)
 {
@@ -285,12 +280,9 @@ netdev_vport_wait(void)
 /* Helper functions. */
 
 static void
-netdev_vport_changed(struct netdev_vport *ndv)
+netdev_vport_changed(struct netdev_vport *ndv OVS_UNUSED)
 {
-    ndv->change_seq++;
-    if (!ndv->change_seq) {
-        ndv->change_seq++;
-    }
+    netdev_notify();
 }
 
 /* Code specific to tunnel types. */
@@ -742,8 +734,6 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats)
                                                             \
     netdev_vport_update_flags,                              \
                                                             \
-    netdev_vport_change_seq,                                \
-                                                            \
     NULL,                   /* rx_alloc */                  \
     NULL,                   /* rx_construct */              \
     NULL,                   /* rx_destruct */               \
diff --git a/lib/netdev.c b/lib/netdev.c
index 5ed6062..8a08704 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -36,6 +36,7 @@
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "poll-loop.h"
+#include "seq.h"
 #include "shash.h"
 #include "smap.h"
 #include "sset.h"
@@ -82,6 +83,8 @@ struct netdev_registered_class {
     atomic_int ref_cnt;         /* Number of 'struct netdev's of this class. */
 };
 
+static struct seq *netdev_seq;
+
 /* This is set pretty low because we probably won't learn anything from the
  * additional log messages. */
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -110,6 +113,7 @@ netdev_initialize(void)
         netdev_register_provider(&netdev_bsd_class);
 #endif
 
+        netdev_seq = seq_create();
         ovsthread_once_done(&once);
     }
 }
@@ -1494,18 +1498,32 @@ netdev_dump_queue_stats(const struct netdev *netdev,
             : EOPNOTSUPP);
 }
 
-/* Returns a sequence number which indicates changes in one of 'netdev''s
- * properties.  The returned sequence will be nonzero so that callers have a
- * value which they may use as a reset when tracking 'netdev'.
- *
- * The returned sequence number will change whenever 'netdev''s flags,
- * features, ethernet address, or carrier changes.  It may change for other
- * reasons as well, or no reason at all. */
-unsigned int
-netdev_change_seq(const struct netdev *netdev)
+/* Returns a sequence number which indicates changes in port properties.
+ * The returned sequence number tracks all port changes globally, and will
+ * change whenever a netdev's flags, features, ethernet address or carrier
+ * changes, or whenever a port's bfd, cfm, lacp or stp status changes. It
+ * may change for other reasons as well, or no reason at all. */
+uint64_t
+netdev_change_seq(void)
+{
+    return seq_read(netdev_seq);
+}
+
+/* Causes the following poll_block() to wake up on any netdev changes that
+ * happen after 'seq'. This should be used in the manner described in seq.h. */
+void
+netdev_seq_wait(uint64_t seq)
+{
+    seq_wait(netdev_seq, seq);
+}
+
+/* Notify interested parties that changes have occurred to a port. */
+void
+netdev_notify(void)
 {
-    return netdev->netdev_class->change_seq(netdev);
+    seq_change(netdev_seq);
 }
+
 
 /* Returns the class type of 'netdev'.
  *
diff --git a/lib/netdev.h b/lib/netdev.h
index bafa50e..d412bb6 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -66,6 +66,7 @@ struct netdev_saved_flags;
 struct ofpbuf;
 struct in_addr;
 struct in6_addr;
+struct seq;
 struct smap;
 struct sset;
 
@@ -313,7 +314,9 @@ typedef void netdev_dump_queue_stats_cb(unsigned int queue_id,
 int netdev_dump_queue_stats(const struct netdev *,
                             netdev_dump_queue_stats_cb *, void *aux);
 
-unsigned int netdev_change_seq(const struct netdev *netdev);
+uint64_t netdev_change_seq(void);
+void netdev_seq_wait(uint64_t seq);
+void netdev_notify(void);
 
 #ifdef  __cplusplus
 }
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 2844e4c..cad12ef 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -85,6 +85,7 @@ struct ofproto {
     uint16_t alloc_port_no;     /* Last allocated OpenFlow port number. */
     uint16_t max_ports;         /* Max possible OpenFlow port num, plus one. */
     struct hmap ofport_usage;   /* Map ofport to last used time. */
+    uint64_t change_seq;        /* Change sequence for netdev status. */
 
     /* Flow tables. */
     long long int eviction_group_timer; /* For rate limited reheapification. */
@@ -170,7 +171,6 @@ struct ofport {
     struct netdev *netdev;
     struct ofputil_phy_port pp;
     ofp_port_t ofp_port;        /* OpenFlow port number. */
-    unsigned int change_seq;
     long long int created;      /* Time created, in msec. */
     int mtu;
 };
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 2ccbcee..a45e0db 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1406,10 +1406,8 @@ any_pending_ops(const struct ofproto *p)
 int
 ofproto_run(struct ofproto *p)
 {
-    struct sset changed_netdevs;
-    const char *changed_netdev;
-    struct ofport *ofport;
     int error;
+    uint64_t new_seq;
 
     error = p->ofproto_class->run(p);
     if (error && error != EAGAIN) {
@@ -1460,24 +1458,29 @@ ofproto_run(struct ofproto *p)
         }
     }
 
-    /* Update OpenFlow port status for any port whose netdev has changed.
-     *
-     * Refreshing a given 'ofport' can cause an arbitrary ofport to be
-     * destroyed, so it's not safe to update ports directly from the
-     * HMAP_FOR_EACH loop, or even to use HMAP_FOR_EACH_SAFE.  Instead, we
-     * need this two-phase approach. */
-    sset_init(&changed_netdevs);
-    HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
-        unsigned int change_seq = netdev_change_seq(ofport->netdev);
-        if (ofport->change_seq != change_seq) {
-            ofport->change_seq = change_seq;
-            sset_add(&changed_netdevs, netdev_get_name(ofport->netdev));
+    new_seq = netdev_change_seq();
+    if (new_seq != p->change_seq) {
+        struct sset devnames;
+        const char *devname;
+        struct ofport *ofport;
+
+        /* Update OpenFlow port status for any port whose netdev has changed.
+         *
+         * Refreshing a given 'ofport' can cause an arbitrary ofport to be
+         * destroyed, so it's not safe to update ports directly from the
+         * HMAP_FOR_EACH loop, or even to use HMAP_FOR_EACH_SAFE.  Instead, we
+         * need this two-phase approach. */
+        sset_init(&devnames);
+        HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
+            sset_add(&devnames, netdev_get_name(ofport->netdev));
         }
+        SSET_FOR_EACH (devname, &devnames) {
+            update_port(p, devname);
+        }
+        sset_destroy(&devnames);
+
+        p->change_seq = new_seq;
     }
-    SSET_FOR_EACH (changed_netdev, &changed_netdevs) {
-        update_port(p, changed_netdev);
-    }
-    sset_destroy(&changed_netdevs);
 
     switch (p->state) {
     case S_OPENFLOW:
@@ -1567,18 +1570,13 @@ ofproto_run_fast(struct ofproto *p)
 void
 ofproto_wait(struct ofproto *p)
 {
-    struct ofport *ofport;
-
     p->ofproto_class->wait(p);
     if (p->ofproto_class->port_poll_wait) {
         p->ofproto_class->port_poll_wait(p);
     }
 
-    HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
-        if (ofport->change_seq != netdev_change_seq(ofport->netdev)) {
-            poll_immediate_wake();
-        }
-    }
+    /* Wake on device changes. */
+    netdev_seq_wait(p->change_seq);
 
     switch (p->state) {
     case S_OPENFLOW:
@@ -2138,7 +2136,6 @@ ofport_install(struct ofproto *p,
     }
     ofport->ofproto = p;
     ofport->netdev = netdev;
-    ofport->change_seq = netdev_change_seq(netdev);
     ofport->pp = *pp;
     ofport->ofp_port = pp->port_no;
     ofport->created = time_msec();
@@ -2373,7 +2370,6 @@ update_port(struct ofproto *ofproto, const char *name)
              * Don't close the old netdev yet in case port_modified has to
              * remove a retained reference to it.*/
             port->netdev = netdev;
-            port->change_seq = netdev_change_seq(netdev);
 
             if (port->ofproto->ofproto_class->port_modified) {
                 port->ofproto->ofproto_class->port_modified(port);
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index b238cd0..7e39ea7 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -97,7 +97,7 @@ tnl_port_add__(const struct ofport_dpif *ofport, const struct netdev *netdev,
     tnl_port = xzalloc(sizeof *tnl_port);
     tnl_port->ofport = ofport;
     tnl_port->netdev = netdev_ref(netdev);
-    tnl_port->netdev_seq = netdev_change_seq(tnl_port->netdev);
+    tnl_port->netdev_seq = netdev_change_seq();
 
     tnl_port->match.in_key = cfg->in_key;
     tnl_port->match.ip_src = cfg->ip_src;
@@ -159,7 +159,7 @@ tnl_port_reconfigure(const struct ofport_dpif *ofport,
         changed = tnl_port_add__(ofport, netdev, odp_port, false);
     } else if (tnl_port->netdev != netdev
                || tnl_port->match.odp_port != odp_port
-               || tnl_port->netdev_seq != netdev_change_seq(netdev)) {
+               || tnl_port->netdev_seq != netdev_change_seq()) {
         VLOG_DBG("reconfiguring %s", tnl_port_get_name(tnl_port));
         tnl_port_del__(ofport);
         tnl_port_add__(ofport, netdev, odp_port, true);
-- 
1.7.9.5




More information about the dev mailing list