[ovs-dev] [RFC] bridge: Keep bond active slave selection across OVS restart

Andy Zhou azhou at nicira.com
Fri Oct 3 08:28:07 UTC 2014


Whenever OVS restarts, it pseudo-randomly picks an interface
of a bond port to be the active slave. This can cause traffic
disruption in case the upstream switch does not support LACP, or
in case of multi-chassis switches that do not support mLACP.

This patch helps the situation by always record the last active
slave into ovsdb. When OVS restarts, the stored last active slave
has the highest priority to be selected again. In case this interface
is available, due to configuration changes or being offline, OVS then
consider other interfaces with the bond as it does today.

In a nutshell, this patch makes the active slave selection stickier
across OVS restart.

VMware-BZ:  1332235
---
 NEWS                       |  1 +
 ofproto/bond.c             | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 ofproto/bond.h             |  6 +++++
 vswitchd/bridge.c          | 13 ++++++++++
 vswitchd/vswitch.ovsschema |  8 ++++--
 vswitchd/vswitch.xml       |  5 ++++
 6 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index a8bd45b..74984a7 100644
--- a/NEWS
+++ b/NEWS
@@ -35,6 +35,7 @@ Post-v2.3.0
      (IEEE 802.1D-2004).  More conformance and interoperability testing is
      still needed, so this should not be enabled on production environments.
    - Stats are no longer updated on fake bond interface.
+   - Keep active bond slave selection across OVS restart.
 
 
 v2.3.0 - 14 Aug 2014
diff --git a/ofproto/bond.c b/ofproto/bond.c
index cb6b895..cab9477 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -46,6 +46,7 @@
 #include "timeval.h"
 #include "unixctl.h"
 #include "vlog.h"
+#include "lib/vswitch-idl.h"
 
 VLOG_DEFINE_THIS_MODULE(bond);
 
@@ -131,6 +132,11 @@ struct bond {
     uint32_t recirc_id;          /* Non zero if recirculation can be used.*/
     struct hmap pr_rule_ops;     /* Helps to maintain post recirculation rules.*/
 
+    uint8_t last_active_iface[6]; /* The MAC address of the last active
+                                     interface. */
+    const struct ovsrec_port *port_cfg; /* OVSDB table to record the last
+                                           active slave. */
+
     /* Legacy compatibility. */
     bool lacp_fallback_ab; /* Fallback to active-backup on LACP failure. */
 
@@ -446,10 +452,52 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
         bond_entry_reset(bond);
     }
 
+    /* copy the default active inerface MAC address from OVSDB. */
+    memcpy(bond->last_active_iface, s->last_active_iface,
+           sizeof s->last_active_iface);
+
+    bond->port_cfg = s->port_cfg;
+
     ovs_rwlock_unlock(&rwlock);
     return revalidate;
 }
 
+static struct bond_slave *
+bond_find_slave_by_mac(const struct bond *bond, const uint8_t mac[6])
+{
+    struct bond_slave *slave;
+
+    /* Find the last active slave */
+    HMAP_FOR_EACH(slave, hmap_node, &bond->slaves) {
+        uint8_t slave_mac[6];
+
+        if (netdev_get_etheraddr(slave->netdev, slave_mac))
+            continue;
+
+        if (!memcmp(slave_mac, mac, sizeof(slave_mac))) {
+            return slave;
+        }
+    }
+
+    return NULL;
+}
+
+static void
+bond_record_active_slave_change(struct bond *bond)
+{
+    uint8_t mac[6];
+    struct ds mac_s;
+
+    netdev_get_etheraddr(bond->active_slave->netdev, mac);
+    memcpy(bond->last_active_iface, mac, sizeof mac);
+
+    ds_init(&mac_s);
+    ds_put_format(&mac_s, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
+    ovsrec_port_set_bond_last_active_iface(bond->port_cfg,
+                                           ds_cstr(&mac_s));
+    ds_destroy(&mac_s);
+}
+
 static void
 bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
     OVS_REQ_WRLOCK(rwlock)
@@ -1270,6 +1318,12 @@ bond_print_details(struct ds *ds, const struct bond *bond)
         break;
     }
 
+    ds_put_cstr(ds, "last active slave: ");
+    ds_put_format(ds, ETH_ADDR_FMT,
+                  ETH_ADDR_ARGS(bond->last_active_iface));
+    slave = bond_find_slave_by_mac(bond, bond->last_active_iface);
+    ds_put_format(ds,"(%s)\n", slave ? slave->name : "none");
+
     HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
         shash_add(&slave_shash, slave->name, slave);
     }
@@ -1440,6 +1494,7 @@ bond_unixctl_set_active_slave(struct unixctl_conn *conn,
                   bond->name, slave->name);
         bond->send_learning_packets = true;
         unixctl_command_reply(conn, "done");
+        bond_record_active_slave_change(bond);
     } else {
         unixctl_command_reply(conn, "no change");
     }
@@ -1747,6 +1802,11 @@ bond_choose_slave(const struct bond *bond)
 {
     struct bond_slave *slave, *best;
 
+    /* Find the last active slave. */
+    slave = bond_find_slave_by_mac(bond, bond->last_active_iface);
+    if (slave && slave->enabled)
+        return slave;
+
     /* Find an enabled slave. */
     HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
         if (slave->enabled) {
@@ -1787,6 +1847,10 @@ bond_choose_active_slave(struct bond *bond)
         }
 
         bond->send_learning_packets = true;
+
+        if (bond->active_slave != old_active_slave) {
+            bond_record_active_slave_change(bond);
+        }
     } else if (old_active_slave) {
         VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);
     }
diff --git a/ofproto/bond.h b/ofproto/bond.h
index c487ec2..a6ac8a6 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -53,6 +53,12 @@ struct bond_settings {
     int down_delay;             /* ms before disabling a down slave. */
 
     bool lacp_fallback_ab_cfg;  /* Fallback to active-backup on LACP failure. */
+
+    uint8_t last_active_iface[6];   /* The MAC address of the interface
+                                   that was active during the last
+                                   ovs run. */
+    const struct ovsrec_port *port_cfg; /* OVSDB table to record the
+                                           last_active_slave. */
 };
 
 /* Program startup. */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index c5c6096..6aeef3d 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -62,6 +62,7 @@
 #include "vlog.h"
 #include "sflow_api.h"
 #include "vlan-bitmap.h"
+#include "packets.h"
 
 VLOG_DEFINE_THIS_MODULE(bridge);
 
@@ -389,6 +390,7 @@ bridge_init(const char *remote)
     ovsdb_idl_omit_alert(idl, &ovsrec_port_col_rstp_status);
     ovsdb_idl_omit_alert(idl, &ovsrec_port_col_rstp_statistics);
     ovsdb_idl_omit_alert(idl, &ovsrec_port_col_statistics);
+    ovsdb_idl_omit_alert(idl, &ovsrec_port_col_bond_last_active_iface);
     ovsdb_idl_omit(idl, &ovsrec_port_col_external_ids);
 
     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_admin_state);
@@ -3766,6 +3768,7 @@ port_configure_bond(struct port *port, struct bond_settings *s)
 {
     const char *detect_s;
     struct iface *iface;
+    const char *mac_s;
     int miimon_interval;
 
     s->name = port->name;
@@ -3822,6 +3825,16 @@ port_configure_bond(struct port *port, struct bond_settings *s)
     LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
         netdev_set_miimon_interval(iface->netdev, miimon_interval);
     }
+
+    mac_s = port->cfg->bond_last_active_iface;
+    if (!mac_s || !ovs_scan(mac_s, ETH_ADDR_SCAN_FMT,
+                            ETH_ADDR_SCAN_ARGS(s->last_active_iface))) {
+
+        /* OVSDB did not store the last active interface */
+        memset(s->last_active_iface, 0, sizeof(s->last_active_iface));
+    }
+
+    s->port_cfg = port->cfg;
 }
 
 /* Returns true if 'port' is synthetic, that is, if we constructed it locally
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 3d78f1d..474b795 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "7.9.0",
- "cksum": "2301439325 21345",
+ "version": "8.0.0",
+ "cksum": "1237897135 21502",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -160,6 +160,10 @@
          "type": "integer"},
        "bond_downdelay": {
          "type": "integer"},
+       "bond_last_active_iface": {
+         "type": {"key": {"type": "string"},
+                  "min": 0, "max": 1},
+                  "ephemeral": true},
        "bond_fake_iface": {
          "type": "boolean"},
        "fake_bridge": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index dca5439..f0e01d7 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1257,6 +1257,11 @@
         </column>
       </group>
 
+      <column name="bond_last_active_iface">
+          For a bonded port, record the mac address of the last active slave.
+          Openvswitch internal use only.
+      </column>
+
       <column name="bond_fake_iface">
         For a bonded port, whether to create a fake internal interface with the
         name of the port.  Use only for compatibility with legacy software that
-- 
1.9.1




More information about the dev mailing list