[ovs-dev] [PATCH v3 1/4] ovs-vswitchd: An option to wait for userspace flow restore to complete.

Gurucharan Shetty shettyg at nicira.com
Thu May 30 09:19:35 UTC 2013


While upgrading openvswitch, it helps to restore openflow flows before
starting packet processing.  Typically we want to restart openvswitch,
add the openflow flows and then start packet processing.

To do this, we look for the other_config:flow-restore-wait column
in the Open_vSwitch table during startup. If set as true, we disable
receiving packets from the datapath, expiring or flushing flows and
running any periodic ofproto activities. This option does not prevent
the addition and deletion of ports. Once this option is set to false,
we return to normal processing.

An upcoming commit will use this feature in Open vSwitch startup scripts.

Bug #16086.
Signed-off-by: Gurucharan Shetty <gshetty at nicira.com>
---
 ofproto/ofproto-dpif.c |   49 +++++++++++++++++++++++++++++++++++++++++++++---
 ofproto/ofproto.c      |   16 ++++++++++++++++
 ofproto/ofproto.h      |    2 ++
 vswitchd/bridge.c      |    7 +++++++
 vswitchd/vswitch.xml   |   44 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 633dc5a..06b47fa 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -659,6 +659,7 @@ struct dpif_backer {
     struct tag_set revalidate_set; /* Revalidate only matching facets. */
 
     struct hmap drop_keys; /* Set of dropped odp keys. */
+    bool recv_set_enable; /* Enables or disables receiving packets. */
 };
 
 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
@@ -932,6 +933,21 @@ type_run(const char *type)
         push_all_stats();
     }
 
+    /* If vswitchd started with other_config:flow_restore_wait set as "true",
+     * and the configuration has now changed to "false", enable receiving
+     * packets from the datapath. */
+    if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
+        backer->recv_set_enable = true;
+
+        error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
+        if (error) {
+            VLOG_ERR("Failed to enable receiving packets in dpif.");
+            return error;
+        }
+        dpif_flow_flush(backer->dpif);
+        backer->need_revalidate = REV_RECONFIGURE;
+    }
+
     if (backer->need_revalidate
         || !tag_set_is_empty(&backer->revalidate_set)) {
         struct tag_set revalidate_set = backer->revalidate_set;
@@ -1025,7 +1041,10 @@ type_run(const char *type)
         }
     }
 
-    if (timer_expired(&backer->next_expiration)) {
+    if (!backer->recv_set_enable) {
+        /* A minimum of 1000 ms delay. */
+        timer_set_duration(&backer->next_expiration, 1000);
+    } else if (timer_expired(&backer->next_expiration)) {
         int delay = expire(backer);
         timer_set_duration(&backer->next_expiration, delay);
     }
@@ -1091,6 +1110,11 @@ dpif_backer_run_fast(struct dpif_backer *backer, int max_batch)
 {
     unsigned int work;
 
+    /* If recv_set_enable is false, we should not handle upcalls. */
+    if (!backer->recv_set_enable) {
+        return 0;
+    }
+
     /* Handle one or more batches of upcalls, until there's nothing left to do
      * or until we do a fixed total amount of work.
      *
@@ -1290,9 +1314,12 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     backer->need_revalidate = 0;
     simap_init(&backer->tnl_backers);
     tag_set_init(&backer->revalidate_set);
+    backer->recv_set_enable = !ofproto_get_flow_restore_wait();
     *backerp = backer;
 
-    dpif_flow_flush(backer->dpif);
+    if (backer->recv_set_enable) {
+        dpif_flow_flush(backer->dpif);
+    }
 
     /* Loop through the ports already on the datapath and remove any
      * that we don't need anymore. */
@@ -1316,7 +1343,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
 
     shash_add(&all_dpif_backers, type, backer);
 
-    error = dpif_recv_set(backer->dpif, true);
+    error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
     if (error) {
         VLOG_ERR("failed to listen on datapath of type %s: %s",
                  type, strerror(error));
@@ -1552,6 +1579,12 @@ run_fast(struct ofproto *ofproto_)
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
     struct ofport_dpif *ofport;
 
+    /* Do not perform any periodic activity required by 'ofproto' while
+     * waiting for flow restore to complete. */
+    if (ofproto_get_flow_restore_wait()) {
+        return 0;
+    }
+
     HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
         port_run_fast(ofport);
     }
@@ -1567,6 +1600,12 @@ run(struct ofproto *ofproto_)
     struct ofbundle *bundle;
     int error;
 
+    /* Do not perform any periodic activity required by 'ofproto' while
+     * waiting for flow restore to complete. */
+    if (ofproto_get_flow_restore_wait()) {
+        return 0;
+    }
+
     if (!clogged) {
         complete_operations(ofproto);
     }
@@ -1641,6 +1680,10 @@ wait(struct ofproto *ofproto_)
     struct ofport_dpif *ofport;
     struct ofbundle *bundle;
 
+    if (ofproto_get_flow_restore_wait()) {
+        return;
+    }
+
     if (!clogged && !list_is_empty(&ofproto->completions)) {
         poll_immediate_wake();
     }
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index ca1dc89..b34e213 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -226,6 +226,9 @@ static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
 
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
+/* The default value of true waits for flow restore. */
+static bool flow_restore_wait = true;
+
 /* Must be called to initialize the ofproto library.
  *
  * The caller may pass in 'iface_hints', which contains an shash of
@@ -653,6 +656,19 @@ ofproto_set_ipfix(struct ofproto *ofproto,
         return (bo || fo) ? EOPNOTSUPP : 0;
     }
 }
+
+void
+ofproto_set_flow_restore_wait(bool flow_restore_wait_db)
+{
+    flow_restore_wait = flow_restore_wait_db;
+}
+
+bool
+ofproto_get_flow_restore_wait(void)
+{
+    return flow_restore_wait;
+}
+
 
 /* Spanning Tree Protocol (STP) configuration. */
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index acb1790..18241e7 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -249,6 +249,8 @@ int ofproto_set_ipfix(struct ofproto *,
                       const struct ofproto_ipfix_bridge_exporter_options *,
                       const struct ofproto_ipfix_flow_exporter_options *,
                       size_t);
+void ofproto_set_flow_restore_wait(bool flow_restore_wait_db);
+bool ofproto_get_flow_restore_wait(void);
 int ofproto_set_stp(struct ofproto *, const struct ofproto_stp_settings *);
 int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *);
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index f067c9d..6daa108 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -2321,6 +2321,13 @@ bridge_run(void)
      * returns immediately. */
     bridge_init_ofproto(cfg);
 
+    /* Once the value of flow-restore-wait is false, we no longer should
+     * check its value from the database. */
+    if (ofproto_get_flow_restore_wait()) {
+        ofproto_set_flow_restore_wait(smap_get_bool(&cfg->other_config,
+                                        "flow-restore-wait", false));
+    }
+
     /* Let each datapath type do the work that it needs to do. */
     sset_init(&types);
     ofproto_enumerate_types(&types);
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 47b13d2..d5df6fa 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -71,6 +71,50 @@
         The Citrix XenServer universally unique identifier for the physical
         host as displayed by <code>xe host-list</code>.
       </column>
+
+      <column name="other_config" key="flow-restore-wait"
+              type='{"type": "boolean"}'>
+        When <code>ovs-vswitchd</code> starts up, it has an empty flow table
+        and therefore it handles all arriving packets in its default fashion
+        according to its configuration, by dropping them or sending them to
+        an OpenFlow controller or switching them as a standalone switch.
+        This behavior is ordinarily desirable.  However, if
+        <code>ovs-vswitchd</code> is restarting as part of a
+        ``hot-upgrade,'' then this leads to a relatively long period during
+        which packets are mishandled.
+
+        This option allows for improvement.  When <code>ovs-vswitchd</code>
+        starts with this value set as <code>true</code>, it will neither
+        flush or expire previously set datapath flows nor will it send and
+        receive any packets to or from the datapath.  When this value is
+        later set to <code>false</code>, <code>ovs-vswitchd</code> will
+        start receiving packets from the datapath and re-setup the flows.
+
+        Thus, with this option, the procedure for a hot-upgrade of
+        <code>ovs-vswitchd</code> becomes roughly the following:
+
+        <ol>
+          <li>
+            Stop <code>ovs-vswitchd</code>.
+          </li>
+          <li>
+            Set <ref column="other_config" key="flow-restore-wait"/>
+            to <code>true</code>.
+          </li>
+          <li>
+            Start <code>ovs-vswitchd</code>.
+          </li>
+          <li>
+            Use <code>ovs-ofctl</code> (or some other program, such as an
+            OpenFlow controller) to restore the OpenFlow flow table
+            to the desired state.
+          </li>
+          <li>
+            Set <ref column="other_config" key="flow-restore-wait"/>
+            to <code>false</code> (or remove it entirely from the database).
+          </li>
+        </ol>
+      </column>
     </group>
 
     <group title="Status">
-- 
1.7.9.5




More information about the dev mailing list