[ovs-dev] [PATCH 2/2] secchan: Better tolerate failing controller admission control in fail-open.
Ben Pfaff
blp at nicira.com
Mon Sep 14 22:18:20 UTC 2009
When the switch is configured to connect to a controller that accepts
connections, waits a few seconds, and then disconnects without setting up
flows, currently this causes "fail-open" to stop flush the flow table and
stop setting up new flows during the connection duration. This is OK if
it happens once, but it can easily happen every 8 seconds with typical
backoff settings, and that isn't so great.
This commit changes fail-open to only flush the flow table once the switch
appears to have been admitted by the controller, which prevents these
frequent network interruptions.
Bug #1695.
---
lib/rconn.c | 21 +++++---
lib/rconn.h | 1 +
secchan/fail-open.c | 107 ++++++++++++++++++++++++++++++++++-------
secchan/fail-open.h | 8 +++
secchan/ofproto.c | 39 ++++++++++++---
secchan/pktbuf.c | 67 +++++++++++++++++++++++++-
secchan/pktbuf.h | 1 +
utilities/ovs-controller.8.in | 7 +++
utilities/ovs-controller.c | 14 +++++-
9 files changed, 230 insertions(+), 35 deletions(-)
diff --git a/lib/rconn.c b/lib/rconn.c
index be030d4..c0bc95d 100644
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -499,7 +499,7 @@ rconn_recv(struct rconn *rc)
int error = vconn_recv(rc->vconn, &buffer);
if (!error) {
copy_to_monitor(rc, buffer);
- if (is_admitted_msg(buffer)
+ if (rc->probably_admitted || is_admitted_msg(buffer)
|| time_now() - rc->last_connected >= 30) {
rc->probably_admitted = true;
rc->last_admitted = time_now();
@@ -637,15 +637,22 @@ rconn_is_connected(const struct rconn *rconn)
return is_connected_state(rconn->state);
}
-/* Returns 0 if 'rconn' is connected. Otherwise, if 'rconn' is in a "failure
- * mode" (that is, it is not connected), returns the number of seconds that it
- * has been in failure mode, ignoring any times that it connected but the
- * controller's admission control policy caused it to be quickly
- * disconnected. */
+/* Returns true if 'rconn' is connected and thought to have been accepted by
+ * the peer's admission-control policy. */
+bool
+rconn_is_admitted(const struct rconn *rconn)
+{
+ return (rconn_is_connected(rconn)
+ && rconn->last_admitted >= rconn->last_connected);
+}
+
+/* Returns 0 if 'rconn' is currently connected and considered to have been
+ * accepted by the peer's admission-control policy, otherwise the number of
+ * seconds since 'rconn' was last in such a state. */
int
rconn_failure_duration(const struct rconn *rconn)
{
- return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted;
+ return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted;
}
/* Returns the IP address of the peer, or 0 if the peer's IP address is not
diff --git a/lib/rconn.h b/lib/rconn.h
index ed0780a..ef4e16c 100644
--- a/lib/rconn.h
+++ b/lib/rconn.h
@@ -69,6 +69,7 @@ void rconn_add_monitor(struct rconn *, struct vconn *);
const char *rconn_get_name(const struct rconn *);
bool rconn_is_alive(const struct rconn *);
bool rconn_is_connected(const struct rconn *);
+bool rconn_is_admitted(const struct rconn *);
int rconn_failure_duration(const struct rconn *);
bool rconn_is_connectivity_questionable(struct rconn *);
diff --git a/secchan/fail-open.c b/secchan/fail-open.c
index 60890d4..749b182 100644
--- a/secchan/fail-open.c
+++ b/secchan/fail-open.c
@@ -29,30 +29,64 @@
#define THIS_MODULE VLM_fail_open
#include "vlog.h"
+/*
+ * Fail-open mode.
+ *
+ * In fail-open mode, the switch detects when the controller cannot be
+ * contacted or when the controller is dropping switch connections because the
+ * switch does not pass its admission control policy. In those situations the
+ * switch sets up flows itself using the "normal" action.
+ *
+ * There is a little subtlety to implementation, to properly handle the case
+ * where the controller allows switch connections but drops them a few seconds
+ * later for admission control reasons. Because of this case, we don't want to
+ * just stop setting up flows when we connect to the controller: if we did,
+ * then new flow setup and existing flows would stop during the duration of
+ * connection to the controller, and thus the whole network would go down for
+ * that period of time. So, instead, we add some special cases:
+ *
+ * - When we are connected to a controller, but not yet sure that it has
+ * admitted us, we set up flows immediately ourselves, but simultaneously
+ * send out an OFPT_PACKET_IN to the controller. (The OFPT_PACKET_IN
+ * contains a special bogus buffer-id so that duplicate packets don't get
+ * sent out to the network when the controller replies.)
+ *
+ * - At the time we connect to the controller, we flush the flow table.
+ * See the comment in fail_open_run() for details.
+ */
+
struct fail_open {
struct ofproto *ofproto;
struct rconn *controller;
int trigger_duration;
int last_disconn_secs;
struct status_category *ss_cat;
+ unsigned int n_conns;
};
-/* Causes the switch to enter or leave fail-open mode, if appropriate. */
-void
-fail_open_run(struct fail_open *fo)
+/* Returns true if 'fo' should be in fail-open mode, otherwise false. */
+static inline bool
+should_fail_open(const struct fail_open *fo)
{
- int disconn_secs = rconn_failure_duration(fo->controller);
- bool open = disconn_secs >= fo->trigger_duration;
- if (open != (fo->last_disconn_secs != 0)) {
- if (!open) {
- flow_t flow;
+ return rconn_failure_duration(fo->controller) >= fo->trigger_duration;
+}
- VLOG_WARN("No longer in fail-open mode");
- fo->last_disconn_secs = 0;
+/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */
+bool
+fail_open_is_active(const struct fail_open *fo)
+{
+ return fo->last_disconn_secs != 0;
+}
- memset(&flow, 0, sizeof flow);
- ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000);
- } else {
+/* Enter fail-open mode if we should be in it. Handle reconnecting to a
+ * controller from fail-open mode. */
+void
+fail_open_run(struct fail_open *fo)
+{
+ /* Enter fail-open mode if 'fo' is not in it but should be. */
+ if (should_fail_open(fo)) {
+ int disconn_secs = rconn_failure_duration(fo->controller);
+ if (!fail_open_is_active(fo)) {
VLOG_WARN("Could not connect to controller (or switch failed "
"controller's post-connection admission control "
"policy) for %d seconds, failing open", disconn_secs);
@@ -62,11 +96,47 @@ fail_open_run(struct fail_open *fo)
* fail-open rule from fail_open_flushed() when
* ofproto_flush_flows() calls back to us. */
ofproto_flush_flows(fo->ofproto);
+ } else if (disconn_secs > fo->last_disconn_secs + 60) {
+ VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
+ "from controller", disconn_secs);
+ fo->last_disconn_secs = disconn_secs;
+ }
+ }
+
+ /*
+ * If we're in fail-open mode and we just connected to a controller, blow
+ * away the whole flow table, to force sending the controller a
+ * OFPT_PACKET_IN message for each active flow. If we don't do that, then
+ * we won't figure out that the controller has admitted us until a new flow
+ * arrives, which might take a while in a quiet or stable network.
+ *
+ * In a switch with lots of flows or very high-bandwidth flows this could
+ * cause network stuttering anyhow, since lots of packets will suddenly be
+ * thrown down to userspace. If that causes a problem, we could start a
+ * timer here and only flush flows if fail-open is still active after a few
+ * seconds.
+ */
+ if (fo->n_conns != rconn_get_successful_connections(fo->controller)) {
+ fo->n_conns = rconn_get_successful_connections(fo->controller);
+ if (fail_open_is_active(fo) && rconn_is_connected(fo->controller)) {
+ ofproto_flush_flows(fo->ofproto);
}
- } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
- VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
- "from controller", disconn_secs);
- fo->last_disconn_secs = disconn_secs;
+ }
+}
+
+/* If 'fo' is currently in fail-open mode and its rconn has connected to the
+ * controller, exits fail open mode. */
+void
+fail_open_maybe_recover(struct fail_open *fo)
+{
+ if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) {
+ flow_t flow;
+
+ VLOG_WARN("No longer in fail-open mode");
+ fo->last_disconn_secs = 0;
+
+ memset(&flow, 0, sizeof flow);
+ ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY);
}
}
@@ -92,7 +162,7 @@ fail_open_flushed(struct fail_open *fo)
action.output.len = htons(sizeof action);
action.output.port = htons(OFPP_NORMAL);
memset(&flow, 0, sizeof flow);
- ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000,
+ ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY,
&action, 1, 0);
}
}
@@ -121,6 +191,7 @@ fail_open_create(struct ofproto *ofproto,
fo->last_disconn_secs = 0;
fo->ss_cat = switch_status_register(switch_status, "fail-open",
fail_open_status_cb, fo);
+ fo->n_conns = 0;
return fo;
}
diff --git a/secchan/fail-open.h b/secchan/fail-open.h
index c0ada2e..900d587 100644
--- a/secchan/fail-open.h
+++ b/secchan/fail-open.h
@@ -26,13 +26,21 @@ struct ofproto;
struct rconn;
struct switch_status;
+/* Priority of the rule added by the fail-open subsystem when a switch enters
+ * fail-open mode. This priority value uniquely identifies a fail-open flow
+ * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
+ * creates flows with this priority). */
+#define FAIL_OPEN_PRIORITY 70000
+
struct fail_open *fail_open_create(struct ofproto *, int trigger_duration,
struct switch_status *,
struct rconn *controller);
void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration);
void fail_open_destroy(struct fail_open *);
void fail_open_wait(struct fail_open *);
+bool fail_open_is_active(const struct fail_open *);
void fail_open_run(struct fail_open *);
+void fail_open_maybe_recover(struct fail_open *);
void fail_open_flushed(struct fail_open *);
#endif /* fail-open.h */
diff --git a/secchan/ofproto.c b/secchan/ofproto.c
index 105e43e..dca59ee 100644
--- a/secchan/ofproto.c
+++ b/secchan/ofproto.c
@@ -811,9 +811,6 @@ ofproto_run1(struct ofproto *p)
}
}
}
- if (p->fail_open) {
- fail_open_run(p->fail_open);
- }
pinsched_run(p->miss_sched, send_packet_in_miss, p);
pinsched_run(p->action_sched, send_packet_in_action, p);
if (p->executer) {
@@ -825,6 +822,12 @@ ofproto_run1(struct ofproto *p)
ofconn_run(ofconn, p);
}
+ /* Fail-open maintenance. Do this after processing the ofconns since
+ * fail-open checks the status of the controller rconn. */
+ if (p->fail_open) {
+ fail_open_run(p->fail_open);
+ }
+
for (i = 0; i < p->n_listeners; i++) {
struct vconn *vconn;
int retval;
@@ -1352,6 +1355,9 @@ ofconn_run(struct ofconn *ofconn, struct ofproto *p)
if (!of_msg) {
break;
}
+ if (p->fail_open) {
+ fail_open_maybe_recover(p->fail_open);
+ }
handle_openflow(ofconn, p, of_msg);
ofpbuf_delete(of_msg);
}
@@ -2162,7 +2168,7 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn,
if (opo->buffer_id != htonl(UINT32_MAX)) {
error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
&buffer, &in_port);
- if (error) {
+ if (error || !buffer) {
return error;
}
payload = *buffer;
@@ -3078,7 +3084,23 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
rule_execute(p, rule, &payload, &flow);
rule_reinstall(p, rule);
- ofpbuf_delete(packet);
+
+ if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY
+ && rconn_is_connected(p->controller->rconn)) {
+ /*
+ * Extra-special case for fail-open mode.
+ *
+ * We are in fail-open mode and the packet matched the fail-open rule,
+ * but we are connected to a controller too. We should send the packet
+ * up to the controller in the hope that it will try to set up a flow
+ * and thereby allow us to exit fail-open.
+ *
+ * See the top-level comment in fail-open.c for more information.
+ */
+ pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+ } else {
+ ofpbuf_delete(packet);
+ }
}
static void
@@ -3305,6 +3327,7 @@ static void
send_packet_in_miss(struct ofpbuf *packet, void *p_)
{
struct ofproto *p = p_;
+ bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
struct ofconn *ofconn;
struct ofpbuf payload;
struct odp_msg *msg;
@@ -3314,8 +3337,10 @@ send_packet_in_miss(struct ofpbuf *packet, void *p_)
payload.size = msg->length - sizeof *msg;
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (ofconn->miss_send_len) {
- uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
- msg->port);
+ struct pktbuf *pb = ofconn->pktbuf;
+ uint32_t buffer_id = (in_fail_open
+ ? pktbuf_get_null(pb)
+ : pktbuf_save(pb, &payload, msg->port));
int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
: UINT32_MAX);
do_send_packet_in(ofconn, buffer_id, packet, send_len);
diff --git a/secchan/pktbuf.c b/secchan/pktbuf.c
index b4198a8..e08f6a4 100644
--- a/secchan/pktbuf.c
+++ b/secchan/pktbuf.c
@@ -51,6 +51,7 @@ struct packet {
struct pktbuf {
struct packet packets[PKTBUF_CNT];
unsigned int buffer_idx;
+ unsigned int null_idx;
};
int
@@ -78,6 +79,22 @@ pktbuf_destroy(struct pktbuf *pb)
}
}
+static unsigned int
+make_id(unsigned int buffer_idx, unsigned int cookie)
+{
+ return buffer_idx | (cookie << PKTBUF_BITS);
+}
+
+/* Attempts to allocate an OpenFlow packet buffer id within 'pb'. The packet
+ * buffer will store a copy of 'buffer' and the port number 'in_port', which
+ * should be the datapath port number on which 'buffer' was received.
+ *
+ * If successful, returns the packet buffer id (a number other than
+ * UINT32_MAX). pktbuf_retrieve() can later be used to retrieve the buffer and
+ * its input port number (buffers do expire after a time, so this is not
+ * guaranteed to be true forever). On failure, returns UINT32_MAX.
+ *
+ * The caller retains ownership of 'buffer'. */
uint32_t
pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
{
@@ -97,9 +114,50 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
p->buffer = ofpbuf_clone(buffer);
p->timeout = time_msec() + OVERWRITE_MSECS;
p->in_port = in_port;
- return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+ return make_id(p - pb->packets, p->cookie);
}
+/*
+ * Allocates and returns a "null" packet buffer id. The returned packet buffer
+ * id is considered valid by pktbuf_retrieve(), but it is not associated with
+ * actual buffered data.
+ *
+ * This function is always successful.
+ *
+ * This is useful in one special case: with the current OpenFlow design, the
+ * "fail-open" code cannot always know whether a connection to a controller is
+ * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request,
+ * but at that point the packet in question has already been forwarded (since
+ * we are still in "fail-open" mode). If the packet was buffered in the usual
+ * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate
+ * packet in the network. Null packet buffer ids identify such a packet that
+ * has already been forwarded, so that Open vSwitch can quietly ignore the
+ * request to re-send it. (After that happens, the switch exits fail-open
+ * mode.)
+ *
+ * See the top-level comment in fail-open.c for an overview.
+ */
+uint32_t
+pktbuf_get_null(struct pktbuf *pb)
+{
+ unsigned int id = make_id(pb->null_idx, COOKIE_MAX);
+ if (++pb->null_idx >= PKTBUF_CNT) {
+ pb->null_idx = 0;
+ }
+ return id;
+}
+
+/* Attempts to retrieve a saved packet with the given 'id' from 'pb'. Returns
+ * 0 if successful, otherwise an OpenFlow error code constructed with
+ * ofp_mkerr().
+ *
+ * On success, ordinarily stores the buffered packet in '*bufferp' and the
+ * datapath port number on which the packet was received in '*in_port'. The
+ * caller becomes responsible for freeing the buffer. However, if 'id'
+ * identifies a "null" packet buffer (created with pktbuf_get_null()), stores
+ * NULL in '*bufferp' and -1 in '*in_port'.
+ *
+ * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */
int
pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port)
@@ -128,11 +186,16 @@ pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
}
- } else {
+ } else if (id >> PKTBUF_BITS != COOKIE_MAX) {
COVERAGE_INC(pktbuf_bad_cookie);
VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+ } else {
+ COVERAGE_INC(pktbuf_null_cookie);
+ VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal "
+ "if the switch was recently in fail-open mode)", id);
+ error = 0;
}
*bufferp = NULL;
*in_port = -1;
diff --git a/secchan/pktbuf.h b/secchan/pktbuf.h
index b27b749..fea8b3c 100644
--- a/secchan/pktbuf.h
+++ b/secchan/pktbuf.h
@@ -27,6 +27,7 @@ int pktbuf_capacity(void);
struct pktbuf *pktbuf_create(void);
void pktbuf_destroy(struct pktbuf *);
uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+uint32_t pktbuf_get_null(struct pktbuf *);
int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port);
void pktbuf_discard(struct pktbuf *, uint32_t id);
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in
index 750fcea..43bc43b 100644
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -113,6 +113,13 @@ through the controller and every packet is flooded.
This option is most useful for debugging. It reduces switching
performance, so it should not be used in production.
+.IP "\fB--mute\fR"
+Prevents ovs\-controller from replying to any OpenFlow messages sent
+to it by switches.
+.IP
+This option is only for debugging the Open vSwitch implementation of
+``fail open'' mode. It must not be used in production.
+
.so lib/daemon.man
.so lib/vlog.man
.so lib/common.man
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index 010cad7..314da18 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -58,6 +58,10 @@ static bool setup_flows = true;
/* --max-idle: Maximum idle time, in seconds, before flows expire. */
static int max_idle = 60;
+/* --mute: If true, accept connections from switches but do not reply to any
+ * of their messages (for debugging fail-open mode). */
+static bool mute = false;
+
static int do_switching(struct switch_ *);
static void new_switch(struct switch_ *, struct vconn *, const char *name);
static void parse_options(int argc, char *argv[]);
@@ -211,7 +215,9 @@ do_switching(struct switch_ *sw)
msg = rconn_recv(sw->rconn);
if (msg) {
- lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ if (!mute) {
+ lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ }
ofpbuf_delete(msg);
}
rconn_run(sw->rconn);
@@ -227,12 +233,14 @@ parse_options(int argc, char *argv[])
enum {
OPT_MAX_IDLE = UCHAR_MAX + 1,
OPT_PEER_CA_CERT,
+ OPT_MUTE,
VLOG_OPTION_ENUMS
};
static struct option long_options[] = {
{"hub", no_argument, 0, 'H'},
{"noflow", no_argument, 0, 'n'},
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
+ {"mute", no_argument, 0, OPT_MUTE},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
DAEMON_LONG_OPTIONS,
@@ -263,6 +271,10 @@ parse_options(int argc, char *argv[])
setup_flows = false;
break;
+ case OPT_MUTE:
+ mute = true;
+ break;
+
case OPT_MAX_IDLE:
if (!strcmp(optarg, "permanent")) {
max_idle = OFP_FLOW_PERMANENT;
--
1.6.3.3
More information about the dev
mailing list