[ovs-dev] [PATCH 2/2] secchan: Better tolerate failing controller admission control in fail-open.

Ben Pfaff blp at nicira.com
Mon Sep 14 22:18:20 UTC 2009


When the switch is configured to connect to a controller that accepts
connections, waits a few seconds, and then disconnects without setting up
flows, currently this causes "fail-open" to stop flush the flow table and
stop setting up new flows during the connection duration.  This is OK if
it happens once, but it can easily happen every 8 seconds with typical
backoff settings, and that isn't so great.

This commit changes fail-open to only flush the flow table once the switch
appears to have been admitted by the controller, which prevents these
frequent network interruptions.

Bug #1695.
---
 lib/rconn.c                   |   21 +++++---
 lib/rconn.h                   |    1 +
 secchan/fail-open.c           |  107 ++++++++++++++++++++++++++++++++++-------
 secchan/fail-open.h           |    8 +++
 secchan/ofproto.c             |   39 ++++++++++++---
 secchan/pktbuf.c              |   67 +++++++++++++++++++++++++-
 secchan/pktbuf.h              |    1 +
 utilities/ovs-controller.8.in |    7 +++
 utilities/ovs-controller.c    |   14 +++++-
 9 files changed, 230 insertions(+), 35 deletions(-)

diff --git a/lib/rconn.c b/lib/rconn.c
index be030d4..c0bc95d 100644
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -499,7 +499,7 @@ rconn_recv(struct rconn *rc)
         int error = vconn_recv(rc->vconn, &buffer);
         if (!error) {
             copy_to_monitor(rc, buffer);
-            if (is_admitted_msg(buffer)
+            if (rc->probably_admitted || is_admitted_msg(buffer)
                 || time_now() - rc->last_connected >= 30) {
                 rc->probably_admitted = true;
                 rc->last_admitted = time_now();
@@ -637,15 +637,22 @@ rconn_is_connected(const struct rconn *rconn)
     return is_connected_state(rconn->state);
 }
 
-/* Returns 0 if 'rconn' is connected.  Otherwise, if 'rconn' is in a "failure
- * mode" (that is, it is not connected), returns the number of seconds that it
- * has been in failure mode, ignoring any times that it connected but the
- * controller's admission control policy caused it to be quickly
- * disconnected. */
+/* Returns true if 'rconn' is connected and thought to have been accepted by
+ * the peer's admission-control policy. */
+bool
+rconn_is_admitted(const struct rconn *rconn)
+{
+    return (rconn_is_connected(rconn)
+            && rconn->last_admitted >= rconn->last_connected);
+}
+
+/* Returns 0 if 'rconn' is currently connected and considered to have been
+ * accepted by the peer's admission-control policy, otherwise the number of
+ * seconds since 'rconn' was last in such a state. */
 int
 rconn_failure_duration(const struct rconn *rconn)
 {
-    return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted;
+    return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted;
 }
 
 /* Returns the IP address of the peer, or 0 if the peer's IP address is not
diff --git a/lib/rconn.h b/lib/rconn.h
index ed0780a..ef4e16c 100644
--- a/lib/rconn.h
+++ b/lib/rconn.h
@@ -69,6 +69,7 @@ void rconn_add_monitor(struct rconn *, struct vconn *);
 const char *rconn_get_name(const struct rconn *);
 bool rconn_is_alive(const struct rconn *);
 bool rconn_is_connected(const struct rconn *);
+bool rconn_is_admitted(const struct rconn *);
 int rconn_failure_duration(const struct rconn *);
 bool rconn_is_connectivity_questionable(struct rconn *);
 
diff --git a/secchan/fail-open.c b/secchan/fail-open.c
index 60890d4..749b182 100644
--- a/secchan/fail-open.c
+++ b/secchan/fail-open.c
@@ -29,30 +29,64 @@
 #define THIS_MODULE VLM_fail_open
 #include "vlog.h"
 
+/*
+ * Fail-open mode.
+ *
+ * In fail-open mode, the switch detects when the controller cannot be
+ * contacted or when the controller is dropping switch connections because the
+ * switch does not pass its admission control policy.  In those situations the
+ * switch sets up flows itself using the "normal" action.
+ *
+ * There is a little subtlety to implementation, to properly handle the case
+ * where the controller allows switch connections but drops them a few seconds
+ * later for admission control reasons.  Because of this case, we don't want to
+ * just stop setting up flows when we connect to the controller: if we did,
+ * then new flow setup and existing flows would stop during the duration of
+ * connection to the controller, and thus the whole network would go down for
+ * that period of time.  So, instead, we add some special cases:
+ *
+ *    - When we are connected to a controller, but not yet sure that it has
+ *      admitted us, we set up flows immediately ourselves, but simultaneously
+ *      send out an OFPT_PACKET_IN to the controller.  (The OFPT_PACKET_IN
+ *      contains a special bogus buffer-id so that duplicate packets don't get
+ *      sent out to the network when the controller replies.)
+ *
+ *    - At the time we connect to the controller, we flush the flow table.
+ *      See the comment in fail_open_run() for details.
+ */
+
 struct fail_open {
     struct ofproto *ofproto;
     struct rconn *controller;
     int trigger_duration;
     int last_disconn_secs;
     struct status_category *ss_cat;
+    unsigned int n_conns;
 };
 
-/* Causes the switch to enter or leave fail-open mode, if appropriate. */
-void
-fail_open_run(struct fail_open *fo)
+/* Returns true if 'fo' should be in fail-open mode, otherwise false. */
+static inline bool
+should_fail_open(const struct fail_open *fo)
 {
-    int disconn_secs = rconn_failure_duration(fo->controller);
-    bool open = disconn_secs >= fo->trigger_duration;
-    if (open != (fo->last_disconn_secs != 0)) {
-        if (!open) {
-            flow_t flow;
+    return rconn_failure_duration(fo->controller) >= fo->trigger_duration;
+}
 
-            VLOG_WARN("No longer in fail-open mode");
-            fo->last_disconn_secs = 0;
+/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */
+bool
+fail_open_is_active(const struct fail_open *fo)
+{
+    return fo->last_disconn_secs != 0;
+}
 
-            memset(&flow, 0, sizeof flow);
-            ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000);
-        } else {
+/* Enter fail-open mode if we should be in it.  Handle reconnecting to a
+ * controller from fail-open mode. */
+void
+fail_open_run(struct fail_open *fo)
+{
+    /* Enter fail-open mode if 'fo' is not in it but should be.  */
+    if (should_fail_open(fo)) {
+        int disconn_secs = rconn_failure_duration(fo->controller);
+        if (!fail_open_is_active(fo)) {
             VLOG_WARN("Could not connect to controller (or switch failed "
                       "controller's post-connection admission control "
                       "policy) for %d seconds, failing open", disconn_secs);
@@ -62,11 +96,47 @@ fail_open_run(struct fail_open *fo)
              * fail-open rule from fail_open_flushed() when
              * ofproto_flush_flows() calls back to us. */
             ofproto_flush_flows(fo->ofproto);
+        } else if (disconn_secs > fo->last_disconn_secs + 60) {
+            VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
+                      "from controller", disconn_secs);
+            fo->last_disconn_secs = disconn_secs;
+        }
+    }
+
+    /*
+     * If we're in fail-open mode and we just connected to a controller, blow
+     * away the whole flow table, to force sending the controller a
+     * OFPT_PACKET_IN message for each active flow.  If we don't do that, then
+     * we won't figure out that the controller has admitted us until a new flow
+     * arrives, which might take a while in a quiet or stable network.
+     *
+     * In a switch with lots of flows or very high-bandwidth flows this could
+     * cause network stuttering anyhow, since lots of packets will suddenly be
+     * thrown down to userspace.  If that causes a problem, we could start a
+     * timer here and only flush flows if fail-open is still active after a few
+     * seconds.
+     */
+    if (fo->n_conns != rconn_get_successful_connections(fo->controller)) {
+        fo->n_conns = rconn_get_successful_connections(fo->controller);
+        if (fail_open_is_active(fo) && rconn_is_connected(fo->controller)) {
+            ofproto_flush_flows(fo->ofproto);
         }
-    } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
-        VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
-                  "from controller", disconn_secs);
-        fo->last_disconn_secs = disconn_secs;
+    }
+}
+
+/* If 'fo' is currently in fail-open mode and its rconn has connected to the
+ * controller, exits fail open mode. */
+void
+fail_open_maybe_recover(struct fail_open *fo)
+{
+    if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) {
+        flow_t flow;
+
+        VLOG_WARN("No longer in fail-open mode");
+        fo->last_disconn_secs = 0;
+
+        memset(&flow, 0, sizeof flow);
+        ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY);
     }
 }
 
@@ -92,7 +162,7 @@ fail_open_flushed(struct fail_open *fo)
         action.output.len = htons(sizeof action);
         action.output.port = htons(OFPP_NORMAL);
         memset(&flow, 0, sizeof flow);
-        ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000,
+        ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY,
                          &action, 1, 0);
     }
 }
@@ -121,6 +191,7 @@ fail_open_create(struct ofproto *ofproto,
     fo->last_disconn_secs = 0;
     fo->ss_cat = switch_status_register(switch_status, "fail-open",
                                         fail_open_status_cb, fo);
+    fo->n_conns = 0;
     return fo;
 }
 
diff --git a/secchan/fail-open.h b/secchan/fail-open.h
index c0ada2e..900d587 100644
--- a/secchan/fail-open.h
+++ b/secchan/fail-open.h
@@ -26,13 +26,21 @@ struct ofproto;
 struct rconn;
 struct switch_status;
 
+/* Priority of the rule added by the fail-open subsystem when a switch enters
+ * fail-open mode.  This priority value uniquely identifies a fail-open flow
+ * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
+ * creates flows with this priority). */
+#define FAIL_OPEN_PRIORITY 70000
+
 struct fail_open *fail_open_create(struct ofproto *, int trigger_duration,
                                    struct switch_status *,
                                    struct rconn *controller);
 void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration);
 void fail_open_destroy(struct fail_open *);
 void fail_open_wait(struct fail_open *);
+bool fail_open_is_active(const struct fail_open *);
 void fail_open_run(struct fail_open *);
+void fail_open_maybe_recover(struct fail_open *);
 void fail_open_flushed(struct fail_open *);
 
 #endif /* fail-open.h */
diff --git a/secchan/ofproto.c b/secchan/ofproto.c
index 105e43e..dca59ee 100644
--- a/secchan/ofproto.c
+++ b/secchan/ofproto.c
@@ -811,9 +811,6 @@ ofproto_run1(struct ofproto *p)
             }
         }
     }
-    if (p->fail_open) {
-        fail_open_run(p->fail_open);
-    }
     pinsched_run(p->miss_sched, send_packet_in_miss, p);
     pinsched_run(p->action_sched, send_packet_in_action, p);
     if (p->executer) {
@@ -825,6 +822,12 @@ ofproto_run1(struct ofproto *p)
         ofconn_run(ofconn, p);
     }
 
+    /* Fail-open maintenance.  Do this after processing the ofconns since
+     * fail-open checks the status of the controller rconn. */
+    if (p->fail_open) {
+        fail_open_run(p->fail_open);
+    }
+
     for (i = 0; i < p->n_listeners; i++) {
         struct vconn *vconn;
         int retval;
@@ -1352,6 +1355,9 @@ ofconn_run(struct ofconn *ofconn, struct ofproto *p)
             if (!of_msg) {
                 break;
             }
+            if (p->fail_open) {
+                fail_open_maybe_recover(p->fail_open);
+            }
             handle_openflow(ofconn, p, of_msg);
             ofpbuf_delete(of_msg);
         }
@@ -2162,7 +2168,7 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn,
     if (opo->buffer_id != htonl(UINT32_MAX)) {
         error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
                                 &buffer, &in_port);
-        if (error) {
+        if (error || !buffer) {
             return error;
         }
         payload = *buffer;
@@ -3078,7 +3084,23 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
 
     rule_execute(p, rule, &payload, &flow);
     rule_reinstall(p, rule);
-    ofpbuf_delete(packet);
+
+    if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY
+        && rconn_is_connected(p->controller->rconn)) {
+        /*
+         * Extra-special case for fail-open mode.
+         *
+         * We are in fail-open mode and the packet matched the fail-open rule,
+         * but we are connected to a controller too.  We should send the packet
+         * up to the controller in the hope that it will try to set up a flow
+         * and thereby allow us to exit fail-open.
+         *
+         * See the top-level comment in fail-open.c for more information.
+         */
+        pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+    } else {
+        ofpbuf_delete(packet);
+    }
 }
 
 static void
@@ -3305,6 +3327,7 @@ static void
 send_packet_in_miss(struct ofpbuf *packet, void *p_)
 {
     struct ofproto *p = p_;
+    bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
     struct ofconn *ofconn;
     struct ofpbuf payload;
     struct odp_msg *msg;
@@ -3314,8 +3337,10 @@ send_packet_in_miss(struct ofpbuf *packet, void *p_)
     payload.size = msg->length - sizeof *msg;
     LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
         if (ofconn->miss_send_len) {
-            uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
-                                             msg->port);
+            struct pktbuf *pb = ofconn->pktbuf;
+            uint32_t buffer_id = (in_fail_open
+                                  ? pktbuf_get_null(pb)
+                                  : pktbuf_save(pb, &payload, msg->port));
             int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
                             : UINT32_MAX);
             do_send_packet_in(ofconn, buffer_id, packet, send_len);
diff --git a/secchan/pktbuf.c b/secchan/pktbuf.c
index b4198a8..e08f6a4 100644
--- a/secchan/pktbuf.c
+++ b/secchan/pktbuf.c
@@ -51,6 +51,7 @@ struct packet {
 struct pktbuf {
     struct packet packets[PKTBUF_CNT];
     unsigned int buffer_idx;
+    unsigned int null_idx;
 };
 
 int
@@ -78,6 +79,22 @@ pktbuf_destroy(struct pktbuf *pb)
     }
 }
 
+static unsigned int
+make_id(unsigned int buffer_idx, unsigned int cookie)
+{
+    return buffer_idx | (cookie << PKTBUF_BITS);
+}
+
+/* Attempts to allocate an OpenFlow packet buffer id within 'pb'.  The packet
+ * buffer will store a copy of 'buffer' and the port number 'in_port', which
+ * should be the datapath port number on which 'buffer' was received.
+ *
+ * If successful, returns the packet buffer id (a number other than
+ * UINT32_MAX).  pktbuf_retrieve() can later be used to retrieve the buffer and
+ * its input port number (buffers do expire after a time, so this is not
+ * guaranteed to be true forever).  On failure, returns UINT32_MAX.
+ *
+ * The caller retains ownership of 'buffer'. */
 uint32_t
 pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
 {
@@ -97,9 +114,50 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
     p->buffer = ofpbuf_clone(buffer);
     p->timeout = time_msec() + OVERWRITE_MSECS;
     p->in_port = in_port;
-    return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+    return make_id(p - pb->packets, p->cookie);
 }
 
+/*
+ * Allocates and returns a "null" packet buffer id.  The returned packet buffer
+ * id is considered valid by pktbuf_retrieve(), but it is not associated with
+ * actual buffered data.
+ *
+ * This function is always successful.
+ *
+ * This is useful in one special case: with the current OpenFlow design, the
+ * "fail-open" code cannot always know whether a connection to a controller is
+ * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request,
+ * but at that point the packet in question has already been forwarded (since
+ * we are still in "fail-open" mode).  If the packet was buffered in the usual
+ * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate
+ * packet in the network.  Null packet buffer ids identify such a packet that
+ * has already been forwarded, so that Open vSwitch can quietly ignore the
+ * request to re-send it.  (After that happens, the switch exits fail-open
+ * mode.)
+ *
+ * See the top-level comment in fail-open.c for an overview.
+ */
+uint32_t
+pktbuf_get_null(struct pktbuf *pb)
+{
+    unsigned int id = make_id(pb->null_idx, COOKIE_MAX);
+    if (++pb->null_idx >= PKTBUF_CNT) {
+        pb->null_idx = 0;
+    }
+    return id;
+}
+
+/* Attempts to retrieve a saved packet with the given 'id' from 'pb'.  Returns
+ * 0 if successful, otherwise an OpenFlow error code constructed with
+ * ofp_mkerr().
+ *
+ * On success, ordinarily stores the buffered packet in '*bufferp' and the
+ * datapath port number on which the packet was received in '*in_port'.  The
+ * caller becomes responsible for freeing the buffer.  However, if 'id'
+ * identifies a "null" packet buffer (created with pktbuf_get_null()), stores
+ * NULL in '*bufferp' and -1 in '*in_port'.
+ *
+ * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */
 int
 pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
                 uint16_t *in_port)
@@ -128,11 +186,16 @@ pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
             VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
             error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
         }
-    } else {
+    } else if (id >> PKTBUF_BITS != COOKIE_MAX) {
         COVERAGE_INC(pktbuf_bad_cookie);
         VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
                      id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
         error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+    } else {
+        COVERAGE_INC(pktbuf_null_cookie);
+        VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal "
+                     "if the switch was recently in fail-open mode)", id);
+        error = 0;
     }
     *bufferp = NULL;
     *in_port = -1;
diff --git a/secchan/pktbuf.h b/secchan/pktbuf.h
index b27b749..fea8b3c 100644
--- a/secchan/pktbuf.h
+++ b/secchan/pktbuf.h
@@ -27,6 +27,7 @@ int pktbuf_capacity(void);
 struct pktbuf *pktbuf_create(void);
 void pktbuf_destroy(struct pktbuf *);
 uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+uint32_t pktbuf_get_null(struct pktbuf *);
 int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
                     uint16_t *in_port);
 void pktbuf_discard(struct pktbuf *, uint32_t id);
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in
index 750fcea..43bc43b 100644
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -113,6 +113,13 @@ through the controller and every packet is flooded.
 This option is most useful for debugging.  It reduces switching
 performance, so it should not be used in production.
 
+.IP "\fB--mute\fR"
+Prevents ovs\-controller from replying to any OpenFlow messages sent
+to it by switches.
+.IP
+This option is only for debugging the Open vSwitch implementation of
+``fail open'' mode.  It must not be used in production.
+
 .so lib/daemon.man
 .so lib/vlog.man
 .so lib/common.man
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index 010cad7..314da18 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -58,6 +58,10 @@ static bool setup_flows = true;
 /* --max-idle: Maximum idle time, in seconds, before flows expire. */
 static int max_idle = 60;
 
+/* --mute: If true, accept connections from switches but do not reply to any
+ * of their messages (for debugging fail-open mode). */
+static bool mute = false;
+
 static int do_switching(struct switch_ *);
 static void new_switch(struct switch_ *, struct vconn *, const char *name);
 static void parse_options(int argc, char *argv[]);
@@ -211,7 +215,9 @@ do_switching(struct switch_ *sw)
 
     msg = rconn_recv(sw->rconn);
     if (msg) {
-        lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+        if (!mute) {
+            lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+        }
         ofpbuf_delete(msg);
     }
     rconn_run(sw->rconn);
@@ -227,12 +233,14 @@ parse_options(int argc, char *argv[])
     enum {
         OPT_MAX_IDLE = UCHAR_MAX + 1,
         OPT_PEER_CA_CERT,
+        OPT_MUTE,
         VLOG_OPTION_ENUMS
     };
     static struct option long_options[] = {
         {"hub",         no_argument, 0, 'H'},
         {"noflow",      no_argument, 0, 'n'},
         {"max-idle",    required_argument, 0, OPT_MAX_IDLE},
+        {"mute",        no_argument, 0, OPT_MUTE},
         {"help",        no_argument, 0, 'h'},
         {"version",     no_argument, 0, 'V'},
         DAEMON_LONG_OPTIONS,
@@ -263,6 +271,10 @@ parse_options(int argc, char *argv[])
             setup_flows = false;
             break;
 
+        case OPT_MUTE:
+            mute = true;
+            break;
+
         case OPT_MAX_IDLE:
             if (!strcmp(optarg, "permanent")) {
                 max_idle = OFP_FLOW_PERMANENT;
-- 
1.6.3.3





More information about the dev mailing list