[ovs-dev] [bond megaflow v5 4/4] ofproto/bond: Implement bond megaflow using recirculation

Andy Zhou azhou at nicira.com
Fri Apr 4 23:03:02 UTC 2014


Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou at nicira.com>

---
v1->v2:  Rewritten

V2->V3:
        Address Ben's review feedback. Rebased to master.

        Remove force recirculation off. It is always on for the user
        space datapath. Off for the kernel datapath, via the built-in
        detection logic.

        Remove dpif_netdev's enable_recirc. It is always enabled.

        When in use, make sure dp_hash != 0. Netlink logic is now
        simpler: if recirc_id or dh_hash is zero, they will not
        be part of the serialization.

        Add a drop rule to catch any run away recirc rule lookup.

        Allow flow_mod with OFPUTIL_FF_NO_READONLY to change rules
              in the READONLY tables.  (bypass readonly check).
              READONLY table remains read only to open flow controller.

        Allow OFPUTIL_FF_HIDDEN_FIELDS to set recirc_id and dp_hash.
              Currently only OVS internal logic set this bit when
              intstall intnerl rules. Open flow controllers can not
              set them.

        ofproto_dpif_add_internal_flows now return rule instead
                of rule_dpif.

        Recirculation depth limit.

        Added unit test case that tests recirculation

        Bug and style fixes.

V3->V4:
        rebase.

V4->v5:
	Improve match_has_default_recirc_id()
	Style fixes based on Ben's feedback.
	rebase.

---
 lib/dpif-netdev.c            |   62 +++++++---
 lib/flow.c                   |    2 +
 lib/match.c                  |   28 +++++
 lib/match.h                  |    3 +
 lib/odp-execute.c            |    7 +-
 lib/ofp-parse.c              |    3 +
 lib/ofp-print.c              |    6 +
 lib/ofp-util.h               |    7 ++
 ofproto/bond.c               |  274 +++++++++++++++++++++++++++++++++++++++--
 ofproto/bond.h               |   33 ++++-
 ofproto/ofproto-dpif-xlate.c |   57 +++++++--
 ofproto/ofproto-dpif-xlate.h |   16 ++-
 ofproto/ofproto-dpif.c       |  276 +++++++++++++++++++++++++++++++++---------
 ofproto/ofproto-dpif.h       |   14 ++-
 ofproto/ofproto-provider.h   |    3 +-
 ofproto/ofproto.c            |   69 +++++++----
 ofproto/ofproto.h            |    3 +-
 tests/classifier.at          |   16 +--
 tests/lacp.at                |   24 +++-
 tests/ofproto-dpif.at        |  144 +++++++++++++++++++++-
 20 files changed, 907 insertions(+), 140 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5b9bfdb..c6cf6e7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -68,6 +68,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define NETDEV_RULE_PRIORITY 0x8000
 
 #define NR_THREADS 1
+/* Use per thread recirc_depth to prevent recirculation loop. */
+#define MAX_RECIRC_DEPTH 5
+DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
@@ -1991,8 +1994,9 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
 }
 
 static void
-dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
-                     struct pkt_metadata *md)
+dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
+                struct pkt_metadata *md)
+    OVS_REQ_RDLOCK(dp->port_rwlock)
 {
     struct dp_netdev_flow *netdev_flow;
     struct flow key;
@@ -2021,6 +2025,17 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
     }
 }
 
+static void
+dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
+                     struct pkt_metadata *md)
+    OVS_REQ_RDLOCK(dp->port_rwlock)
+{
+    uint32_t *recirc_depth = recirc_depth_get();
+
+    *recirc_depth = 0;
+    dp_netdev_input(dp, packet, md);
+}
+
 static int
 dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
                            int queue_no, int type, const struct flow *flow,
@@ -2090,6 +2105,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     struct dp_netdev_execute_aux *aux = aux_;
     int type = nl_attr_type(a);
     struct dp_netdev_port *p;
+    uint32_t *depth = recirc_depth_get();
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
@@ -2116,23 +2132,40 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
         break;
     }
 
-    case OVS_ACTION_ATTR_RECIRC: {
-        const struct ovs_action_recirc *act;
+    case OVS_ACTION_ATTR_RECIRC:
+        if (*depth < MAX_RECIRC_DEPTH) {
+            uint32_t old_recirc_id = md->recirc_id;
+            uint32_t old_dp_hash = md->dp_hash;
+            const struct ovs_action_recirc *act;
+            struct ofpbuf *recirc_packet;
 
-        act = nl_attr_get(a);
-        md->recirc_id = act->recirc_id;
-        md->dp_hash = 0;
+            recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
 
-        if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
-            struct flow flow;
+            act = nl_attr_get(a);
+            md->recirc_id = act->recirc_id;
+            md->dp_hash = 0;
 
-            flow_extract(packet, md, &flow);
-            md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
-        }
+            if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+                struct flow flow;
+
+                flow_extract(recirc_packet, md, &flow);
+                md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
+                if (!md->dp_hash) {
+                    md->dp_hash = 1;  /* 0 is not valid */
+                }
+            }
 
-        dp_netdev_port_input(aux->dp, packet, md);
+            (*depth)++;
+            dp_netdev_input(aux->dp, recirc_packet, md);
+            (*depth)--;
+
+            md->recirc_id = old_recirc_id;
+            md->recirc_id = old_dp_hash;
+            break;
+        } else {
+            VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
+        }
         break;
-    }
 
     case OVS_ACTION_ATTR_PUSH_VLAN:
     case OVS_ACTION_ATTR_POP_VLAN:
@@ -2144,7 +2177,6 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     case __OVS_ACTION_ATTR_MAX:
         OVS_NOT_REACHED();
     }
-
 }
 
 static void
diff --git a/lib/flow.c b/lib/flow.c
index e1ea75e..adb894b 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -369,6 +369,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
         flow->in_port = md->in_port;
         flow->skb_priority = md->skb_priority;
         flow->pkt_mark = md->pkt_mark;
+        flow->recirc_id = md->recirc_id;
+        flow->dp_hash = md->dp_hash;
     }
 
     ofpbuf_set_frame(packet, ofpbuf_data(packet));
diff --git a/lib/match.c b/lib/match.c
index 2969972..514e7f9 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -788,6 +788,34 @@ match_hash(const struct match *match, uint32_t basis)
     return flow_wildcards_hash(&match->wc, flow_hash(&match->flow, basis));
 }
 
+static bool
+match_has_default_recirc_id(const struct match *m)
+{
+    return m->flow.recirc_id == 0 && (m->wc.masks.recirc_id == UINT32_MAX ||
+                                      m->wc.masks.recirc_id == 0);
+}
+
+static bool
+match_has_default_dp_hash(const struct match *m)
+{
+    return ((m->flow.dp_hash | m->wc.masks.dp_hash) == 0);
+}
+
+/* Return true if the hidden fields of the match are set to the default values.
+ * The default values equals to those set up by match_init_hidden_fields(). */
+bool
+match_has_default_hidden_fields(const struct match *m)
+{
+    return match_has_default_recirc_id(m) && match_has_default_dp_hash(m);
+}
+
+void
+match_init_hidden_fields(struct match *m)
+{
+    match_set_recirc_id(m, 0);
+    match_set_dp_hash_masked(m, 0, 0);
+}
+
 static void
 format_eth_masked(struct ds *s, const char *name, const uint8_t eth[6],
                   const uint8_t mask[6])
diff --git a/lib/match.h b/lib/match.h
index 95c8e67..2422fb1 100644
--- a/lib/match.h
+++ b/lib/match.h
@@ -134,6 +134,9 @@ void match_set_nd_target_masked(struct match *, const struct in6_addr *,
 bool match_equal(const struct match *, const struct match *);
 uint32_t match_hash(const struct match *, uint32_t basis);
 
+void match_init_hidden_fields(struct match *);
+bool match_has_default_hidden_fields(const struct match *);
+
 void match_format(const struct match *, struct ds *, unsigned int priority);
 char *match_to_string(const struct match *, unsigned int priority);
 void match_print(const struct match *);
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index ac0dac0..37e44e3 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
  * Copyright (c) 2013 Simon Horman
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -209,10 +209,11 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
         case OVS_ACTION_ATTR_USERSPACE:
         case OVS_ACTION_ATTR_RECIRC:
             if (dp_execute_action) {
-                bool may_steal;
                 /* Allow 'dp_execute_action' to steal the packet data if we do
                  * not need it any more. */
-                may_steal = steal && (!more_actions && left <= NLA_ALIGN(a->nla_len));
+                bool may_steal = steal && (!more_actions
+                                           && left <= NLA_ALIGN(a->nla_len)
+                                           && type != OVS_ACTION_ATTR_RECIRC);
                 dp_execute_action(dp, packet, md, a, may_steal);
             }
             break;
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 92fb40f..7116d67 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -1323,6 +1323,9 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string,
         } else if (fields & F_FLAGS && !strcmp(name, "no_byte_counts")) {
             fm->flags |= OFPUTIL_FF_NO_BYT_COUNTS;
             *usable_protocols &= OFPUTIL_P_OF13_UP;
+        } else if (!strcmp(name, "no_readonly_table")
+                   || !strcmp(name, "allow_hidden_fields")) {
+             /* ignore these fields. */
         } else {
             char *value;
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index c8c331e..95fa6d5 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -742,6 +742,12 @@ ofp_print_flow_flags(struct ds *s, enum ofputil_flow_mod_flags flags)
     if (flags & OFPUTIL_FF_NO_BYT_COUNTS) {
         ds_put_cstr(s, "no_byte_counts ");
     }
+    if (flags & OFPUTIL_FF_HIDDEN_FIELDS) {
+        ds_put_cstr(s, "allow_hidden_fields ");
+    }
+    if (flags & OFPUTIL_FF_NO_READONLY) {
+        ds_put_cstr(s, "no_readonly_table ");
+    }
 }
 
 static void
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index 298d595..245cc4e 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -246,6 +246,13 @@ enum ofputil_flow_mod_flags {
     OFPUTIL_FF_CHECK_OVERLAP = 1 << 3, /* All versions. */
     OFPUTIL_FF_EMERG         = 1 << 4, /* OpenFlow 1.0 only. */
     OFPUTIL_FF_RESET_COUNTS  = 1 << 5, /* OpenFlow 1.2+. */
+
+    /* Flags that are only set by OVS for its internal use.  Cannot be set via
+     * OpenFlow. */
+    OFPUTIL_FF_HIDDEN_FIELDS = 1 << 6, /* Allow hidden match fields to be
+                                          set or modified. */
+    OFPUTIL_FF_NO_READONLY   = 1 << 7, /* Allow rules within read only tables
+                                          to be modified */
 };
 
 /* Protocol-independent flow_mod.
diff --git a/ofproto/bond.c b/ofproto/bond.c
index 6812330..49dd49e 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -23,6 +23,11 @@
 #include <stdlib.h>
 #include <math.h>
 
+#include "ofp-util.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+#include "ofproto/ofproto-provider.h"
+#include "ofproto/ofproto-dpif.h"
 #include "connectivity.h"
 #include "coverage.h"
 #include "dynamic-string.h"
@@ -36,6 +41,7 @@
 #include "packets.h"
 #include "poll-loop.h"
 #include "seq.h"
+#include "match.h"
 #include "shash.h"
 #include "timeval.h"
 #include "unixctl.h"
@@ -50,6 +56,7 @@ static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
 /* Bit-mask for hashing a flow down to a bucket.
  * There are (BOND_MASK + 1) buckets. */
 #define BOND_MASK 0xff
+#define RECIRC_RULE_PRIORITY 20   /* Priority level for internal rules */
 
 /* A hash bucket for mapping a flow to a slave.
  * "struct bond" has an array of (BOND_MASK + 1) of these. */
@@ -57,6 +64,12 @@ struct bond_entry {
     struct bond_slave *slave;   /* Assigned slave, NULL if unassigned. */
     uint64_t tx_bytes;          /* Count of bytes recently transmitted. */
     struct list list_node;      /* In bond_slave's 'entries' list. */
+
+    /* Recirculation. */
+    struct rule *pr_rule;       /* Post recirculation rule for this entry.*/
+    uint64_t pr_tx_bytes;       /* Record the rule tx_bytes to figure out
+                                   the delta to update the tx_bytes entry
+                                   above.*/
 };
 
 /* A bond slave, that is, one of the links comprising a bond. */
@@ -68,6 +81,7 @@ struct bond_slave {
 
     struct netdev *netdev;      /* Network device, owned by the client. */
     unsigned int change_seq;    /* Tracks changes in 'netdev'. */
+    ofp_port_t  ofp_port;       /* Open flow port number */
     char *name;                 /* Name (a copy of netdev_get_name(netdev)). */
 
     /* Link status. */
@@ -86,6 +100,7 @@ struct bond_slave {
 struct bond {
     struct hmap_node hmap_node; /* In 'all_bonds' hmap. */
     char *name;                 /* Name provided by client. */
+    struct ofproto_dpif *ofproto; /* The bridge this bond belongs to. */
 
     /* Slaves. */
     struct hmap slaves;
@@ -111,6 +126,8 @@ struct bond {
     int rebalance_interval;      /* Interval between rebalances, in ms. */
     long long int next_rebalance; /* Next rebalancing time. */
     bool send_learning_packets;
+    uint32_t recirc_id;          /* Non zero if recirculation can be used.*/
+    struct hmap pr_rule_ops;     /* Helps to maintain post recirculation rules.*/
 
     /* Legacy compatibility. */
     long long int next_fake_iface_update; /* LLONG_MAX if disabled. */
@@ -119,6 +136,21 @@ struct bond {
     struct ovs_refcount ref_cnt;
 };
 
+/* What to do with an bond_recirc_rule. */
+enum bond_op {
+    ADD,        /* Add the rule to ofproto's flow table. */
+    DEL,        /* Delete the rule from the ofproto's flow table. */
+};
+
+/* A rule to add to or delete from ofproto's internal flow table. */
+struct bond_pr_rule_op {
+    struct hmap_node hmap_node;
+    struct match match;
+    ofp_port_t out_ofport;
+    enum bond_op op;
+    struct rule *pr_rule;
+};
+
 static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
 static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_)
     OVS_REQ_RDLOCK(rwlock);
@@ -185,17 +217,21 @@ bond_mode_to_string(enum bond_mode balance) {
  * The caller should register each slave on the new bond by calling
  * bond_slave_register().  */
 struct bond *
-bond_create(const struct bond_settings *s)
+bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto)
 {
     struct bond *bond;
 
     bond = xzalloc(sizeof *bond);
+    bond->ofproto = ofproto;
     hmap_init(&bond->slaves);
     list_init(&bond->enabled_slaves);
     ovs_mutex_init(&bond->mutex);
     bond->next_fake_iface_update = LLONG_MAX;
     ovs_refcount_init(&bond->ref_cnt);
 
+    bond->recirc_id = 0;
+    hmap_init(&bond->pr_rule_ops);
+
     bond_reconfigure(bond, s);
     return bond;
 }
@@ -216,6 +252,7 @@ void
 bond_unref(struct bond *bond)
 {
     struct bond_slave *slave, *next_slave;
+    struct bond_pr_rule_op *pr_op, *next_op;
 
     if (!bond || ovs_refcount_unref(&bond->ref_cnt) != 1) {
         return;
@@ -236,9 +273,124 @@ bond_unref(struct bond *bond)
     ovs_mutex_destroy(&bond->mutex);
     free(bond->hash);
     free(bond->name);
+
+    HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
+        hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
+        free(pr_op);
+    }
+    hmap_destroy(&bond->pr_rule_ops);
+
+    if (bond->recirc_id) {
+        ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id);
+    }
+
     free(bond);
 }
 
+static void
+add_pr_rule(struct bond *bond, const struct match *match,
+            ofp_port_t out_ofport, struct rule *rule)
+{
+    uint32_t hash = match_hash(match, 0);
+    struct bond_pr_rule_op *pr_op;
+
+    HMAP_FOR_EACH_WITH_HASH(pr_op, hmap_node, hash, &bond->pr_rule_ops) {
+        if (match_equal(&pr_op->match, match)) {
+            pr_op->op = ADD;
+            pr_op->out_ofport = out_ofport;
+            pr_op->pr_rule = rule;
+            return;
+        }
+    }
+
+    pr_op = xmalloc(sizeof *pr_op);
+    pr_op->match = *match;
+    pr_op->op = ADD;
+    pr_op->out_ofport = out_ofport;
+    pr_op->pr_rule = rule;
+    hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
+}
+
+static void
+update_recirc_rules(struct bond *bond)
+{
+    struct match match;
+    struct bond_pr_rule_op *pr_op, *next_op;
+    uint64_t ofpacts_stub[128 / 8];
+    struct ofpbuf ofpacts;
+    int i;
+
+    ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
+
+    HMAP_FOR_EACH(pr_op, hmap_node, &bond->pr_rule_ops) {
+        pr_op->op = DEL;
+    }
+
+    if ((bond->hash == NULL) || (!bond->recirc_id)) {
+        return;
+    }
+
+    for (i = 0; i < BOND_MASK + 1; i++) {
+        struct bond_slave *slave = bond->hash[i].slave;
+
+        if (slave) {
+            match_init_catchall(&match);
+            match_set_recirc_id(&match, bond->recirc_id);
+            /* recirc_id -> metadata to speed up look ups. */
+            match_set_metadata(&match, htonll(bond->recirc_id));
+            match_set_dp_hash_masked(&match, i, BOND_MASK);
+
+            add_pr_rule(bond, &match, slave->ofp_port,
+                            bond->hash[i].pr_rule);
+        }
+    }
+
+    HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
+        int error;
+        struct rule *rule;
+        switch (pr_op->op) {
+        case ADD:
+            ofpbuf_clear(&ofpacts);
+            ofpact_put_OUTPUT(&ofpacts)->port = pr_op->out_ofport;
+            error = ofproto_dpif_add_internal_flow(bond->ofproto,
+                                                   &pr_op->match,
+                                                   RECIRC_RULE_PRIORITY,
+                                                   &ofpacts, &rule);
+            if (error) {
+                char *err_s = match_to_string(&pr_op->match,
+                                              RECIRC_RULE_PRIORITY);
+
+                VLOG_ERR("failed to add post recirculation flow %s", err_s);
+                free(err_s);
+                pr_op->pr_rule = NULL;
+            } else {
+                pr_op->pr_rule = rule;
+            }
+            break;
+
+        case DEL:
+            error = ofproto_dpif_delete_internal_flow(bond->ofproto,
+                                                      &pr_op->match,
+                                                      RECIRC_RULE_PRIORITY);
+            if (error) {
+                char *err_s = match_to_string(&pr_op->match,
+                                              RECIRC_RULE_PRIORITY);
+
+                VLOG_ERR("failed to remove post recirculation flow %s", err_s);
+                free(err_s);
+            }
+
+            hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
+            pr_op->pr_rule = NULL;
+            free(pr_op);
+            break;
+        }
+    }
+
+    ofpbuf_uninit(&ofpacts);
+}
+
+
 /* Updates 'bond''s overall configuration to 's'.
  *
  * The caller should register each slave on 'bond' by calling
@@ -299,6 +451,15 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
         bond->bond_revalidate = false;
     }
 
+    if (bond->balance != BM_AB) {
+        if (!bond->recirc_id) {
+            bond->recirc_id = ofproto_dpif_alloc_recirc_id(bond->ofproto);
+        }
+    } else if (bond->recirc_id) {
+        ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id);
+        bond->recirc_id = 0;
+    }
+
     if (bond->balance == BM_AB || !bond->hash || revalidate) {
         bond_entry_reset(bond);
     }
@@ -327,7 +488,8 @@ bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
  * 'slave_' or destroying 'bond'.
  */
 void
-bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
+bond_slave_register(struct bond *bond, void *slave_,
+                    ofp_port_t ofport, struct netdev *netdev)
 {
     struct bond_slave *slave;
 
@@ -339,6 +501,7 @@ bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
         hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0));
         slave->bond = bond;
         slave->aux = slave_;
+        slave->ofp_port = ofport;
         slave->delay_expires = LLONG_MAX;
         slave->name = xstrdup(netdev_get_name(netdev));
         bond->bond_revalidate = true;
@@ -688,6 +851,84 @@ bond_choose_output_slave(struct bond *bond, const struct flow *flow,
     return aux;
 }
 
+/* Recirculation. */
+static void
+bond_entry_account(struct bond_entry *entry, uint64_t rule_tx_bytes)
+    OVS_REQ_RDLOCK(rwlock)
+{
+    if (entry->slave) {
+        uint64_t delta;
+
+        delta = rule_tx_bytes - entry->pr_tx_bytes;
+        entry->tx_bytes += delta;
+        entry->pr_tx_bytes = rule_tx_bytes;
+    }
+}
+
+/* Maintain bond stats using post recirculation rule byte counters.*/
+void
+bond_recirculation_account(struct bond *bond)
+{
+    int i;
+
+    ovs_rwlock_rdlock(&rwlock);
+    for (i=0; i<=BOND_MASK; i++) {
+        struct bond_entry *entry = &bond->hash[i];
+        struct rule *rule = entry->pr_rule;
+
+        if (rule) {
+            uint64_t n_packets OVS_UNUSED;
+            long long int used OVS_UNUSED;
+            uint64_t n_bytes;
+
+            rule->ofproto->ofproto_class->rule_get_stats(
+                rule, &n_packets, &n_bytes, &used);
+            bond_entry_account(entry, n_bytes);
+        }
+    }
+    ovs_rwlock_unlock(&rwlock);
+}
+
+bool
+bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
+                uint32_t *hash_bias)
+{
+    if (bond->balance == BM_TCP) {
+        if (recirc_id) {
+            *recirc_id = bond->recirc_id;
+        }
+        if (hash_bias) {
+            *hash_bias = bond->basis;
+        }
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void
+bond_update_post_recirc_rules(struct bond* bond, const bool force)
+{
+   struct bond_entry *e;
+   bool update_rules = force;  /* Always update rules if caller forces it. */
+
+   /* Make sure all bond entries are populated */
+   for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) {
+       if (!e->slave || !e->slave->enabled) {
+            update_rules = true;
+            e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves),
+                                    struct bond_slave, hmap_node);
+            if (!e->slave->enabled) {
+                e->slave = bond->active_slave;
+            }
+        }
+   }
+
+   if (update_rules) {
+        update_recirc_rules(bond);
+   }
+}
+
 /* Rebalancing. */
 
 static bool
@@ -845,19 +1086,22 @@ reinsert_bal(struct list *bals, struct bond_slave *slave)
 
 /* If 'bond' needs rebalancing, does so.
  *
- * The caller should have called bond_account() for each active flow, to ensure
- * that flow data is consistently accounted at this point. */
-void
+ * The caller should have called bond_account() for each active flow, or in case
+ * of recirculation is used, have called bond_recirculation_account(bond),
+ * to ensure that flow data is consistently accounted at this point.
+ *
+ * Return whether rebalancing took place.*/
+bool
 bond_rebalance(struct bond *bond)
 {
     struct bond_slave *slave;
     struct bond_entry *e;
     struct list bals;
+    bool rebalanced = false;
 
     ovs_rwlock_wrlock(&rwlock);
     if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) {
-        ovs_rwlock_unlock(&rwlock);
-        return;
+        goto done;
     }
     bond->next_rebalance = time_msec() + bond->rebalance_interval;
 
@@ -916,6 +1160,7 @@ bond_rebalance(struct bond *bond)
             /* Re-sort 'bals'. */
             reinsert_bal(&bals, from);
             reinsert_bal(&bals, to);
+	    rebalanced = true;
         } else {
             /* Can't usefully migrate anything away from 'from'.
              * Don't reconsider it. */
@@ -932,7 +1177,10 @@ bond_rebalance(struct bond *bond)
             e->slave = NULL;
         }
     }
+
+done:
     ovs_rwlock_unlock(&rwlock);
+    return rebalanced;
 }
 
 /* Bonding unixctl user interface functions. */
@@ -972,15 +1220,15 @@ bond_unixctl_list(struct unixctl_conn *conn,
     struct ds ds = DS_EMPTY_INITIALIZER;
     const struct bond *bond;
 
-    ds_put_cstr(&ds, "bond\ttype\tslaves\n");
+    ds_put_cstr(&ds, "bond\ttype\trecircID\tslaves\n");
 
     ovs_rwlock_rdlock(&rwlock);
     HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
         const struct bond_slave *slave;
         size_t i;
 
-        ds_put_format(&ds, "%s\t%s\t",
-                      bond->name, bond_mode_to_string(bond->balance));
+        ds_put_format(&ds, "%s\t%s\t%d\t", bond->name,
+                      bond_mode_to_string(bond->balance), bond->recirc_id);
 
         i = 0;
         HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
@@ -1003,12 +1251,18 @@ bond_print_details(struct ds *ds, const struct bond *bond)
     struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
     const struct shash_node **sorted_slaves = NULL;
     const struct bond_slave *slave;
+    bool may_recirc;
+    uint32_t recirc_id;
     int i;
 
     ds_put_format(ds, "---- %s ----\n", bond->name);
     ds_put_format(ds, "bond_mode: %s\n",
                   bond_mode_to_string(bond->balance));
 
+    may_recirc = bond_may_recirc(bond, &recirc_id, NULL);
+    ds_put_format(ds, "bond may use recirculation: %s, Recirc-ID : %d\n",
+                  may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
+
     ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
 
     ds_put_format(ds, "updelay: %d ms\n", bond->updelay);
diff --git a/ofproto/bond.h b/ofproto/bond.h
index 5b3814e..e5ceb45 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2014 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,12 +19,13 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-
+#include "ofproto-provider.h"
 #include "packets.h"
 
 struct flow;
 struct netdev;
 struct ofpbuf;
+struct ofproto_dpif;
 enum lacp_status;
 
 /* How flows are balanced among bond slaves. */
@@ -60,12 +61,13 @@ struct bond_settings {
 void bond_init(void);
 
 /* Basics. */
-struct bond *bond_create(const struct bond_settings *);
+struct bond *bond_create(const struct bond_settings *,
+                         struct ofproto_dpif *ofproto);
 void bond_unref(struct bond *);
 struct bond *bond_ref(const struct bond *);
 
 bool bond_reconfigure(struct bond *, const struct bond_settings *);
-void bond_slave_register(struct bond *, void *slave_, struct netdev *);
+void bond_slave_register(struct bond *, void *slave_, ofp_port_t ofport, struct netdev *);
 void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *);
 void bond_slave_unregister(struct bond *, const void *slave);
 
@@ -94,6 +96,27 @@ void *bond_choose_output_slave(struct bond *, const struct flow *,
 /* Rebalancing. */
 void bond_account(struct bond *, const struct flow *, uint16_t vlan,
                   uint64_t n_bytes);
-void bond_rebalance(struct bond *);
+bool bond_rebalance(struct bond *);
 
+/* Recirculation
+ *
+ * Only balance_tcp mode uses recirculation.
+ *
+ * When recirculation is used, each bond port is assigned with a unique
+ * recirc_id. The output action to the bond port will be replaced by
+ * a RECIRC action.
+ *
+ *   ... actions= ... RECIRC(L4_HASH, recirc_id) ....
+ *
+ * On handling first output packet, 256 post recirculation flows are installed:
+ *
+ *  recirc_id=<bond_recirc_id>, dp_hash=<[0..255]>/0xff, actions: output<slave>
+ *
+ * Bond module pulls stats from those post recirculation rules. If rebalancing
+ * is needed, those rules are updated with new output actions.
+*/
+void bond_update_post_recirc_rules(struct bond *, const bool force);
+bool bond_may_recirc(const struct bond *, uint32_t *recirc_id,
+                     uint32_t *hash_bias);
+void bond_recirculation_account(struct bond *);
 #endif /* bond.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index b8e8084..c3040d7 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -58,6 +58,8 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  * flow translation. */
 #define MAX_RESUBMIT_RECURSION 64
+#define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules in
+                                      internal table. */
 
 /* Maximum number of resubmit actions in a flow translation, whether they are
  * recursive or not. */
@@ -89,6 +91,9 @@ struct xbridge {
     bool has_in_band;             /* Bridge has in band control? */
     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
 
+    /* True if the datapath supports recirculation. */
+    bool enable_recirc;
+
     /* True if the datapath supports variable-length
      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
      * False if the datapath supports only 8-byte (or shorter) userdata. */
@@ -226,8 +231,8 @@ static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
                              struct xlate_ctx *);
 static void xlate_actions__(struct xlate_in *, struct xlate_out *)
     OVS_REQ_RDLOCK(xlate_rwlock);
-    static void xlate_normal(struct xlate_ctx *);
-    static void xlate_report(struct xlate_ctx *, const char *);
+static void xlate_normal(struct xlate_ctx *);
+static void xlate_report(struct xlate_ctx *, const char *);
 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
                                uint8_t table_id, bool may_packet_in,
                                bool honor_table_miss);
@@ -257,6 +262,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
                   const struct dpif_ipfix *ipfix,
                   const struct netflow *netflow, enum ofp_config_flags frag,
                   bool forward_bpdu, bool has_in_band,
+                  bool enable_recirc,
                   bool variable_length_userdata,
                   size_t max_mpls_depth)
 {
@@ -310,6 +316,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
     xbridge->frag = frag;
     xbridge->miss_rule = miss_rule;
     xbridge->no_packet_in_rule = no_packet_in_rule;
+    xbridge->enable_recirc = enable_recirc;
     xbridge->variable_length_userdata = variable_length_userdata;
     xbridge->max_mpls_depth = max_mpls_depth;
 }
@@ -1131,10 +1138,23 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
         /* Partially configured bundle with no slaves.  Drop the packet. */
         return;
     } else if (!out_xbundle->bond) {
+        ctx->xout->use_recirc = false;
         xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport,
                              bundle_node);
     } else {
         struct ofport_dpif *ofport;
+        struct xlate_recirc *xr = &ctx->xout->recirc;
+
+        if (ctx->xbridge->enable_recirc) {
+            ctx->xout->use_recirc = bond_may_recirc(
+                out_xbundle->bond, &xr->recirc_id, &xr->hash_bias);
+
+            if (ctx->xout->use_recirc) {
+                /* Only TCP mode uses recirculation. */
+                xr->hash_alg = OVS_RECIRC_HASH_ALG_L4;
+                bond_update_post_recirc_rules(out_xbundle->bond, false);
+            }
+        }
 
         ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow,
                                           &ctx->xout->wc, vid);
@@ -1817,8 +1837,20 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
                                               &ctx->xout->odp_actions,
                                               &ctx->xout->wc);
-        nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
-                            out_port);
+
+        if (ctx->xout->use_recirc) {
+            struct ovs_action_recirc *act_recirc;
+            struct xlate_recirc *xr = &ctx->xout->recirc;
+
+            act_recirc = nl_msg_put_unspec_uninit(&ctx->xout->odp_actions,
+                               OVS_ACTION_ATTR_RECIRC, sizeof *act_recirc);
+            act_recirc->recirc_id = xr->recirc_id;
+            act_recirc->hash_alg = xr->hash_alg;
+            act_recirc->hash_bias = xr->hash_bias;
+        } else {
+            nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
+                                out_port);
+        }
 
         ctx->sflow_odp_port = odp_port;
         ctx->sflow_n_outputs++;
@@ -1862,10 +1894,10 @@ xlate_resubmit_resource_check(struct xlate_ctx *ctx)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
 
-    if (ctx->recurse >= MAX_RESUBMIT_RECURSION) {
+    if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) {
         VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times",
                     MAX_RESUBMIT_RECURSION);
-    } else if (ctx->resubmits >= MAX_RESUBMITS) {
+    } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) {
         VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS);
     } else if (ofpbuf_size(&ctx->xout->odp_actions) > UINT16_MAX) {
         VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions");
@@ -2086,6 +2118,15 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx,
 {
     ofp_port_t in_port;
     uint8_t table_id;
+    bool may_packet_in = false;
+    bool honor_table_miss = false;
+
+    if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
+        /* Still allow missed packets to be sent to the controller
+         * if resubmitting from an internal table. */
+        may_packet_in = true;
+        honor_table_miss = true;
+    }
 
     in_port = resubmit->in_port;
     if (in_port == OFPP_IN_PORT) {
@@ -2097,7 +2138,8 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx,
         table_id = ctx->table_id;
     }
 
-    xlate_table_action(ctx, in_port, table_id, false, false);
+    xlate_table_action(ctx, in_port, table_id, may_packet_in,
+                       honor_table_miss);
 }
 
 static void
@@ -3069,6 +3111,7 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
         ctx.rule = rule;
     }
     xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);
+    xout->use_recirc = false;
 
     if (xin->ofpacts) {
         ofpacts = xin->ofpacts;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 8b01d4e..8b53e10 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -32,6 +32,12 @@ struct dpif_ipfix;
 struct dpif_sflow;
 struct mac_learning;
 
+struct xlate_recirc {
+    uint32_t recirc_id;  /* !0 Use recirculation instead of output. */
+    uint8_t  hash_alg;   /* !0 Compute hash for recirc before. */
+    uint32_t hash_bias;  /* Compute hash for recirc before. */
+};
+
 struct xlate_out {
     /* Wildcards relevant in translation.  Any fields that were used to
      * calculate the action must be set for caching and kernel
@@ -50,6 +56,9 @@ struct xlate_out {
     ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 
+    bool use_recirc;            /* Should generate recirc? */
+    struct xlate_recirc recirc; /* Information used for generating
+                                 * recirculation actions */
     uint64_t odp_actions_stub[256 / 8];
     struct ofpbuf odp_actions;
 };
@@ -129,7 +138,8 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name,
                        const struct mbridge *, const struct dpif_sflow *,
                        const struct dpif_ipfix *, const struct netflow *,
                        enum ofp_config_flags, bool forward_bpdu,
-                       bool has_in_band, bool variable_length_userdata,
+                       bool has_in_band, bool enable_recirc,
+                       bool variable_length_userdata,
                        size_t mpls_label_stack_length)
     OVS_REQ_WRLOCK(xlate_rwlock);
 void xlate_remove_ofproto(struct ofproto_dpif *) OVS_REQ_WRLOCK(xlate_rwlock);
@@ -161,8 +171,8 @@ int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet,
 void xlate_actions(struct xlate_in *, struct xlate_out *)
     OVS_EXCLUDED(xlate_rwlock);
 void xlate_in_init(struct xlate_in *, struct ofproto_dpif *,
-                   const struct flow *, struct rule_dpif *, uint16_t tcp_flags,
-                   const struct ofpbuf *packet);
+                   const struct flow *, struct rule_dpif *,
+                   uint16_t tcp_flags, const struct ofpbuf *packet);
 void xlate_out_uninit(struct xlate_out *);
 void xlate_actions_for_side_effects(struct xlate_in *);
 void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index f72d53e..cb01516 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -253,7 +253,9 @@ struct dpif_backer {
 
     bool recv_set_enable; /* Enables or disables receiving packets. */
 
+    /* Recirculation. */
     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
+    bool enable_recirc;   /* True if the datapath supports recirculation */
 
     /* True if the datapath supports variable-length
      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
@@ -332,9 +334,15 @@ ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
     return ofproto->backer->max_mpls_depth;
 }
 
+bool
+ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
+{
+    return ofproto->backer->enable_recirc;
+}
+
 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
                                         ofp_port_t ofp_port);
-static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
+static void ofproto_trace(struct ofproto_dpif *, struct flow *,
                           const struct ofpbuf *packet,
                           const struct ofpact[], size_t ofpacts_len,
                           struct ds *);
@@ -583,6 +591,7 @@ type_run(const char *type)
                               ofproto->netflow, ofproto->up.frag_handling,
                               ofproto->up.forward_bpdu,
                               connmgr_has_in_band(ofproto->up.connmgr),
+                              ofproto->backer->enable_recirc,
                               ofproto->backer->variable_length_userdata,
                               ofproto->backer->max_mpls_depth);
 
@@ -808,6 +817,7 @@ struct odp_garbage {
 
 static bool check_variable_length_userdata(struct dpif_backer *backer);
 static size_t check_max_mpls_depth(struct dpif_backer *backer);
+static bool check_recirc(struct dpif_backer *backer);
 
 static int
 open_dpif_backer(const char *type, struct dpif_backer **backerp)
@@ -908,6 +918,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
         close_dpif_backer(backer);
         return error;
     }
+    backer->enable_recirc = check_recirc(backer);
     backer->variable_length_userdata = check_variable_length_userdata(backer);
     backer->max_mpls_depth = check_max_mpls_depth(backer);
     backer->rid_pool = recirc_id_pool_create();
@@ -919,6 +930,59 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     return error;
 }
 
+/* Tests whether 'backer''s datapath supports recirculation Only newer datapath
+ * supports OVS_KEY_ATTR in OVS_ACTION_ATTR_USERSPACE actions.  We need to
+ * disable some features on older datapaths that don't support this feature.
+ *
+ * Returns false if 'backer' definitely does not support recirculation, true if
+ * it seems to support recirculation or if at least the error we get is
+ * ambiguous. */
+static bool
+check_recirc(struct dpif_backer *backer)
+{
+    struct flow flow;
+    struct odputil_keybuf keybuf;
+    struct ofpbuf key;
+    int error;
+    bool enable_recirc = false;
+
+    memset(&flow, 0, sizeof flow);
+    flow.recirc_id = 1;
+    flow.dp_hash = 1;
+
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &flow, 0);
+
+    error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
+                          key.data, key.size, NULL, 0, NULL, 0, NULL);
+    if (error && error != EEXIST) {
+        if (error != EINVAL) {
+            VLOG_WARN("%s: Reciculation flow probe failed (%s)",
+                      dpif_name(backer->dpif), ovs_strerror(error));
+        }
+        goto done;
+    }
+
+    error = dpif_flow_del(backer->dpif, key.data, key.size, NULL);
+    if (error) {
+        VLOG_WARN("%s: failed to delete recirculation feature probe flow",
+                  dpif_name(backer->dpif));
+    }
+
+    enable_recirc = true;
+
+done:
+    if (enable_recirc) {
+        VLOG_INFO("%s: Datapath supports recirculation",
+                  dpif_name(backer->dpif));
+    } else {
+        VLOG_INFO("%s: Datapath does not support recirculation",
+                  dpif_name(backer->dpif));
+    }
+
+    return enable_recirc;
+}
+
 /* Tests whether 'backer''s datapath supports variable-length
  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
  * to disable some features on older datapaths that don't support this
@@ -1102,51 +1166,27 @@ construct(struct ofproto *ofproto_)
 
     ofproto_init_tables(ofproto_, N_TABLES);
     error = add_internal_flows(ofproto);
+
     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
 
     return error;
 }
 
 static int
-add_internal_flow(struct ofproto_dpif *ofproto, int id,
+add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
 {
-    struct ofputil_flow_mod fm;
-    struct classifier *cls;
+    struct match match;
     int error;
+    struct rule *rule;
 
-    match_init_catchall(&fm.match);
-    fm.priority = 0;
-    match_set_reg(&fm.match, 0, id);
-    fm.new_cookie = htonll(0);
-    fm.cookie = htonll(0);
-    fm.cookie_mask = htonll(0);
-    fm.modify_cookie = false;
-    fm.table_id = TBL_INTERNAL;
-    fm.command = OFPFC_ADD;
-    fm.idle_timeout = 0;
-    fm.hard_timeout = 0;
-    fm.buffer_id = 0;
-    fm.out_port = 0;
-    fm.flags = 0;
-    fm.ofpacts = ofpbuf_data(ofpacts);
-    fm.ofpacts_len = ofpbuf_size(ofpacts);
-
-    error = ofproto_flow_mod(&ofproto->up, &fm);
-    if (error) {
-        VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
-                    id, ofperr_to_string(error));
-        return error;
-    }
+    match_init_catchall(&match);
+    match_set_reg(&match, 0, id);
 
-    cls = &ofproto->up.tables[TBL_INTERNAL].cls;
-    fat_rwlock_rdlock(&cls->rwlock);
-    *rulep = rule_dpif_cast(rule_from_cls_rule(
-                                classifier_lookup(cls, &fm.match.flow, NULL)));
-    ovs_assert(*rulep != NULL);
-    fat_rwlock_unlock(&cls->rwlock);
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, ofpacts, &rule);
+    *rulep = error ? NULL : rule_dpif_cast(rule);
 
-    return 0;
+    return error;
 }
 
 static int
@@ -1155,6 +1195,9 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     struct ofpact_controller *controller;
     uint64_t ofpacts_stub[128 / 8];
     struct ofpbuf ofpacts;
+    struct rule *unused_rulep OVS_UNUSED;
+    struct ofpact_resubmit *resubmit;
+    struct match match;
     int error;
     int id;
 
@@ -1167,20 +1210,53 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     controller->reason = OFPR_NO_MATCH;
     ofpact_pad(&ofpacts);
 
-    error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+                                   &ofproto->miss_rule);
     if (error) {
         return error;
     }
 
     ofpbuf_clear(&ofpacts);
-    error = add_internal_flow(ofproto, id++, &ofpacts,
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                               &ofproto->no_packet_in_rule);
     if (error) {
         return error;
     }
 
-    error = add_internal_flow(ofproto, id++, &ofpacts,
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                               &ofproto->drop_frags_rule);
+    if (error) {
+        return error;
+    }
+
+    /* Continue non-recirculation rule lookups from table 0.
+     *
+     * (priority=2), recirc=0, actions=resubmit(, 0)
+     */
+    resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->ofpact.compat = 0;
+    resubmit->in_port = OFPP_IN_PORT;
+    resubmit->table_id = 0;
+
+    match_init_catchall(&match);
+    match_set_recirc_id(&match, 0);
+
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 2,  &ofpacts,
+                                           &unused_rulep);
+    if (error) {
+        return error;
+    }
+
+    /* Drop any run away recirc rule lookups. Recirc_id has to be
+     * non-zero when reaching this rule.
+     *
+     * (priority=1), *, actions=drop
+     */
+    ofpbuf_clear(&ofpacts);
+    match_init_catchall(&match);
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 1,  &ofpacts,
+                                           &unused_rulep);
+
     return error;
 }
 
@@ -1248,6 +1324,7 @@ run(struct ofproto *ofproto_)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
     uint64_t new_seq, new_dump_seq;
+    const bool enable_recirc = ofproto_dpif_get_enable_recirc(ofproto);
 
     if (mbridge_need_revalidate(ofproto->mbridge)) {
         ofproto->backer->need_revalidate = REV_RECONFIGURE;
@@ -1325,12 +1402,17 @@ run(struct ofproto *ofproto_)
 
         /* All outstanding data in existing flows has been accounted, so it's a
          * good time to do bond rebalancing. */
-        if (ofproto->has_bonded_bundles) {
+        if (enable_recirc && ofproto->has_bonded_bundles) {
             struct ofbundle *bundle;
 
             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
-                if (bundle->bond) {
-                    bond_rebalance(bundle->bond);
+                struct bond *bond = bundle->bond;
+
+                if (bond && bond_may_recirc(bond, NULL, NULL)) {
+                    bond_recirculation_account(bond);
+                    if (bond_rebalance(bundle->bond)) {
+                        bond_update_post_recirc_rules(bond, true);
+                    }
                 }
             }
         }
@@ -2348,12 +2430,13 @@ bundle_set(struct ofproto *ofproto_, void *aux,
                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
             }
         } else {
-            bundle->bond = bond_create(s->bond);
+            bundle->bond = bond_create(s->bond, ofproto);
             ofproto->backer->need_revalidate = REV_RECONFIGURE;
         }
 
         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
-            bond_slave_register(bundle->bond, port, port->up.netdev);
+            bond_slave_register(bundle->bond, port,
+                                port->up.ofp_port, port->up.netdev);
         }
     } else {
         bond_unref(bundle->bond);
@@ -3003,6 +3086,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
     ovs_assert((rule != NULL) != (ofpacts != NULL));
 
     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
+
     if (rule) {
         rule_dpif_credit_stats(rule, &stats);
     }
@@ -3085,20 +3169,13 @@ rule_dpif_get_actions(const struct rule_dpif *rule)
     return rule_get_actions(&rule->up);
 }
 
-/* Lookup 'flow' in table 0 of 'ofproto''s classifier.
- * If 'wc' is non-null, sets the fields that were relevant as part of
- * the lookup. Returns the table_id where a match or miss occurred.
- *
- * The return value will be zero unless there was a miss and
- * OFPTC_TABLE_MISS_CONTINUE is in effect for the sequence of tables
- * where misses occur. */
-uint8_t
-rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
-                 struct flow_wildcards *wc, struct rule_dpif **rule)
+static uint8_t
+rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow,
+                    struct flow_wildcards *wc, struct rule_dpif **rule)
 {
     enum rule_dpif_lookup_verdict verdict;
     enum ofputil_port_config config = 0;
-    uint8_t table_id = 0;
+    uint8_t table_id = TBL_INTERNAL;
 
     verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true,
                                           &table_id, rule);
@@ -3134,6 +3211,23 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
     return table_id;
 }
 
+/* Lookup 'flow' in table 0 of 'ofproto''s classifier.
+ * If 'wc' is non-null, sets the fields that were relevant as part of
+ * the lookup. Returns the table_id where a match or miss occurred.
+ *
+ * The return value will be zero unless there was a miss and
+ * O!-TC_TABLE_MISS_CONTINUE is in effect for the sequence of tables
+ * where misses occur. */
+uint8_t
+rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
+                 struct flow_wildcards *wc, struct rule_dpif **rule)
+{
+    /* Set metadata to the value of recirc_id to speed up internal
+     * rule lookup. */
+    flow->metadata = htonll(flow->recirc_id);
+    return rule_dpif_lookup__(ofproto, flow, wc, rule);
+}
+
 static struct rule_dpif *
 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
                           const struct flow *flow, struct flow_wildcards *wc)
@@ -4058,7 +4152,7 @@ exit:
  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
  * trace, otherwise the actions are determined by a flow table lookup. */
 static void
-ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
+ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
               const struct ofpbuf *packet,
               const struct ofpact ofpacts[], size_t ofpacts_len,
               struct ds *ds)
@@ -4410,7 +4504,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
     if (realdev_ofp_port && ofport->bundle) {
         /* vlandevs are enslaved to their realdevs, so they are not allowed to
          * themselves be part of a bundle. */
-        bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
+        bundle_set(ofport_->ofproto, ofport->bundle, NULL);
     }
 
     ofport->realdev_ofp_port = realdev_ofp_port;
@@ -4661,6 +4755,78 @@ ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
     recirc_id_free(backer->rid_pool, recirc_id);
 }
 
+int
+ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
+                               struct match *match, int priority,
+                               const struct ofpbuf *ofpacts,
+                               struct rule **rulep)
+{
+    struct ofputil_flow_mod fm;
+    struct rule_dpif *rule;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.command = OFPFC_ADD;
+    fm.idle_timeout = 0;
+    fm.hard_timeout = 0;
+    fm.buffer_id = 0;
+    fm.out_port = 0;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.ofpacts = ofpacts->data;
+    fm.ofpacts_len = ofpacts->size;
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
+                    ofperr_to_string(error));
+        *rulep = NULL;
+        return error;
+    }
+
+    rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &match->flow,
+                                     &match->wc);
+    if (rule) {
+        rule_dpif_unref(rule);
+        *rulep = &rule->up;
+    } else {
+        OVS_NOT_REACHED();
+    }
+    return 0;
+}
+
+int
+ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
+                                  struct match *match, int priority)
+{
+    struct ofputil_flow_mod fm;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.command = OFPFC_DELETE_STRICT;
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
+                    ofperr_to_string(error));
+        return error;
+    }
+
+    return 0;
+}
+
 const struct ofproto_class ofproto_dpif_class = {
     init,
     enumerate_types,
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index ae6f9b7..ed0aa90 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -21,6 +21,7 @@
 #include "odp-util.h"
 #include "ofp-util.h"
 #include "ovs-thread.h"
+#include "ofproto-provider.h"
 #include "timer.h"
 #include "util.h"
 #include "ovs-thread.h"
@@ -83,9 +84,10 @@ extern struct ovs_rwlock xlate_rwlock;
  *   actions into datapath actions. */
 
 size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *);
+bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *);
 
-uint8_t rule_dpif_lookup(struct ofproto_dpif *, const struct flow *,
-                         struct flow_wildcards *, struct rule_dpif **rule);
+uint8_t rule_dpif_lookup(struct ofproto_dpif *, struct flow *,
+                      struct flow_wildcards *, struct rule_dpif **rule);
 
 enum rule_dpif_lookup_verdict rule_dpif_lookup_from_table(struct ofproto_dpif *,
                                                           const struct flow *,
@@ -103,6 +105,7 @@ void rule_dpif_credit_stats(struct rule_dpif *rule ,
 bool rule_dpif_is_fail_open(const struct rule_dpif *);
 bool rule_dpif_is_table_miss(const struct rule_dpif *);
 bool rule_dpif_is_internal(const struct rule_dpif *);
+uint8_t rule_dpif_get_table(const struct rule_dpif *);
 
 struct rule_actions *rule_dpif_get_actions(const struct rule_dpif *);
 
@@ -207,4 +210,11 @@ struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
 
 uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
 void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
+int ofproto_dpif_add_internal_flow(struct ofproto_dpif *,
+                                   struct match *, int priority,
+                                   const struct ofpbuf *ofpacts,
+                                   struct rule **rulep);
+int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *,
+                                      int priority);
+
 #endif /* ofproto-dpif.h */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 9f37f71..bfa0235 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -205,7 +205,8 @@ void ofproto_port_set_state(struct ofport *, enum ofputil_port_state);
  */
 enum oftable_flags {
     OFTABLE_HIDDEN = 1 << 0,   /* Hide from most OpenFlow operations. */
-    OFTABLE_READONLY = 1 << 1  /* Don't allow OpenFlow to change this table. */
+    OFTABLE_READONLY = 1 << 1  /* Don't allow OpenFlow controller to change
+                                  this table. */
 };
 
 /* A flow table within a "struct ofproto".
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 677da8c..a517264 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -261,7 +261,8 @@ struct ofport_usage {
 /* rule. */
 static void ofproto_rule_destroy__(struct rule *);
 static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
-static bool rule_is_modifiable(const struct rule *);
+static bool rule_is_modifiable(const struct rule *rule,
+                               enum ofputil_flow_mod_flags flag);
 
 /* OpenFlow. */
 static enum ofperr add_flow(struct ofproto *, struct ofconn *,
@@ -1143,6 +1144,24 @@ ofproto_get_n_tables(const struct ofproto *ofproto)
     return ofproto->n_tables;
 }
 
+/* Returns the number of Controller visible OpenFlow tables
+ * in 'ofproto'. This number will exclude Hidden tables.
+ * This funtion's return value should be less or equal to that of
+ * ofproto_get_n_tables() . */
+uint8_t
+ofproto_get_n_visible_tables(const struct ofproto *ofproto)
+{
+    uint8_t n = ofproto->n_tables;
+
+    /* Count only non-hidden tables in the number of tables.  (Hidden tables,
+     * if present, are always at the end.) */
+    while(n && (ofproto->tables[n - 1].flags & OFTABLE_HIDDEN)) {
+        n--;
+    }
+
+    return n;
+}
+
 /* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the
  * settings from 's'.  'table_id' must be in the range 0 through the number of
  * OpenFlow tables in 'ofproto' minus 1, inclusive.
@@ -2741,19 +2760,27 @@ destroy_rule_executes(struct ofproto *ofproto)
 static bool
 ofproto_rule_is_hidden(const struct rule *rule)
 {
-    return rule->cr.priority > UINT16_MAX;
+    return (rule->cr.priority > UINT16_MAX);
 }
 
-static enum oftable_flags
-rule_get_flags(const struct rule *rule)
+static bool
+oftable_is_modifiable(const struct oftable *table,
+                      enum ofputil_flow_mod_flags flags)
 {
-    return rule->ofproto->tables[rule->table_id].flags;
+    if (flags & OFPUTIL_FF_NO_READONLY) {
+        return true;
+    }
+
+    return !(table->flags & OFTABLE_READONLY);
 }
 
 static bool
-rule_is_modifiable(const struct rule *rule)
+rule_is_modifiable(const struct rule *rule, enum ofputil_flow_mod_flags flags)
 {
-    return !(rule_get_flags(rule) & OFTABLE_READONLY);
+    const struct oftable *rule_table;
+
+    rule_table = &rule->ofproto->tables[rule->table_id];
+    return oftable_is_modifiable(rule_table, flags);
 }
 
 static enum ofperr
@@ -2771,26 +2798,14 @@ handle_features_request(struct ofconn *ofconn, const struct ofp_header *oh)
     struct ofport *port;
     bool arp_match_ip;
     struct ofpbuf *b;
-    int n_tables;
-    int i;
 
     ofproto->ofproto_class->get_features(ofproto, &arp_match_ip,
                                          &features.actions);
     ovs_assert(features.actions & OFPUTIL_A_OUTPUT); /* sanity check */
 
-    /* Count only non-hidden tables in the number of tables.  (Hidden tables,
-     * if present, are always at the end.) */
-    n_tables = ofproto->n_tables;
-    for (i = 0; i < ofproto->n_tables; i++) {
-        if (ofproto->tables[i].flags & OFTABLE_HIDDEN) {
-            n_tables = i;
-            break;
-        }
-    }
-
     features.datapath_id = ofproto->datapath_id;
     features.n_buffers = pktbuf_capacity();
-    features.n_tables = n_tables;
+    features.n_tables = ofproto_get_n_visible_tables(ofproto);
     features.capabilities = (OFPUTIL_C_FLOW_STATS | OFPUTIL_C_TABLE_STATS |
                              OFPUTIL_C_PORT_STATS | OFPUTIL_C_QUEUE_STATS);
     if (arp_match_ip) {
@@ -3968,10 +3983,18 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
 
     table = &ofproto->tables[table_id];
 
-    if (table->flags & OFTABLE_READONLY) {
+    if (!oftable_is_modifiable(table, fm->flags)) {
         return OFPERR_OFPBRC_EPERM;
     }
 
+    if (!(fm->flags & OFPUTIL_FF_HIDDEN_FIELDS)) {
+        if (!match_has_default_hidden_fields(&fm->match)) {
+            VLOG_WARN_RL(&rl, "%s: (add_flow) only internal flows can set "
+                         "non-default values to hidden fields", ofproto->name);
+            return OFPERR_OFPBRC_EPERM;
+        }
+    }
+
     cls_rule_init(&cr, &fm->match, fm->priority);
 
     /* Transform "add" into "modify" if there's an existing identical flow. */
@@ -3980,7 +4003,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     fat_rwlock_unlock(&table->cls.rwlock);
     if (rule) {
         cls_rule_destroy(&cr);
-        if (!rule_is_modifiable(rule)) {
+        if (!rule_is_modifiable(rule, fm->flags)) {
             return OFPERR_OFPBRC_EPERM;
         } else if (rule->pending) {
             return OFPROTO_POSTPONE;
@@ -4108,7 +4131,7 @@ modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
 
         /* FIXME: Implement OFPFUTIL_FF_RESET_COUNTS */
 
-        if (rule_is_modifiable(rule)) {
+        if (rule_is_modifiable(rule, fm->flags)) {
             /* At least one rule is modifiable, don't report EPERM error. */
             error = 0;
         } else {
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 3f3557c..ab51365 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -382,6 +382,7 @@ struct ofproto_table_settings {
 };
 
 int ofproto_get_n_tables(const struct ofproto *);
+uint8_t ofproto_get_n_visible_tables(const struct ofproto *);
 void ofproto_configure_table(struct ofproto *, int table_id,
                              const struct ofproto_table_settings *);
 
diff --git a/tests/classifier.at b/tests/classifier.at
index 45146ba..b6c9352 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -40,22 +40,22 @@ table=0 in_port=3 priority=0,ip,action=drop
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
 Datapath actions: 2
 ])
 OVS_VSWITCHD_STOP
@@ -87,22 +87,22 @@ table=0 in_port=3 priority=0,ip,action=drop
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
 Datapath actions: 3
 ])
 OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
diff --git a/tests/lacp.at b/tests/lacp.at
index d44bee0..0db2077 100644
--- a/tests/lacp.at
+++ b/tests/lacp.at
@@ -1,5 +1,10 @@
 AT_BANNER([lacp])
 
+# Strips out Reciulation ID information since it may change over time.
+m4_define([STRIP_RECIRC_ID], [[sed '
+    s/Recirc-ID.*$/<del>/
+' ]])
+
 AT_SETUP([lacp - config])
 OVS_VSWITCHD_START([\
         add-port br0 p1 --\
@@ -113,6 +118,7 @@ slave: p2: expired attached
 AT_CHECK([ovs-appctl bond/show], [0], [dnl
 ---- bond ----
 bond_mode: active-backup
+bond may use recirculation: no, Recirc-ID : -1
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -182,8 +188,8 @@ done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [stdout])
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [stdout])
 AT_CHECK([sed '/active slave/d' stdout], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
@@ -275,6 +281,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -288,6 +295,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -316,8 +324,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
 	sys_id: aa:55:aa:55:00:00
@@ -408,6 +416,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -422,6 +431,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -442,8 +452,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
 	sys_id: aa:55:aa:55:00:00
@@ -534,6 +544,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -548,6 +559,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index e78788a..551fd1c 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -47,6 +47,148 @@ skb_priority(0),in_port(8),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif, active-backup bonding])
+# Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2
+#    and br1 with interfaces p3, p4 and p8.
+# toggle p1,p2 of bond0 up and down to test bonding in active-backup mode.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \
+   add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+ovs-appctl netdev-dummy/set-admin-state up
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 down
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 up
+ovs-appctl netdev-dummy/set-admin-state p1 down
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_XOUT], [0], [dnl
+skb_priority(0),in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(3),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3/0.0.0.0,dst=10.0.0.4/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: <del>
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif, balance-slb bonding])
+# Create br0 with interfaces bond0(p1, p2, p3) and p7,
+#    and br1 with interfaces p4, p5, p6 and p8.
+#    p1 <-> p4, p2 <-> p5, p3 <-> p6
+# Send some traffic, make sure the traffic are spread based on source mac.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-slb --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \
+   add-port br1 p5 -- set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \
+   add-port br1 p6 -- set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
+])
+ovs-appctl netdev-dummy/set-admin-state up
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+(
+for i in `seq 0 100 |xargs printf '%02x\n'`;
+    do
+    pkt="in_port(7),eth(src=50:54:00:00:00:$i,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt])
+    done
+)
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br1 > br1_flows.txt])
+# Make sure there is resonable distribution to all three ports.
+# We don't want to make this check precise, in case hash function changes.
+AT_CHECK([test `egrep 'in_port\(4\)' br1_flows.txt |wc -l` -gt 3])
+AT_CHECK([test `egrep 'in_port\(5\)' br1_flows.txt |wc -l` -gt 3])
+AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif, balance-tcp bonding])
+# Create br0 with interfaces bond0(p1, p2, p3) and p7,
+#    and br1 with interfaces bond1(p4, p5, p6) and p8.
+#    bond0 <-> bond1
+# Send some traffic, make sure the traffic are spread based on L4 headers.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-tcp lacp=active \
+        other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-bond br1 bond1 p4 p5 p6 bond_mode=balance-tcp lacp=active \
+        other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\
+   set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \
+   set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \
+   set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled
+], [])
+sleep 1;
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+ovs-appctl lacp/show > lacp.txt
+ovs-appctl bond/show > bond.txt
+(
+for i in `seq 10 100` ;
+    do
+    pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(0x010)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt])
+    done
+)
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt])
+AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt])
+# Make sure there is resonable distribution to all three ports.
+# We don't want to make this check precise, in case hash function changes.
+AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 7])
+AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 7])
+AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 7])
+OVS_VSWITCHD_STOP()
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - resubmit])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [10], [11], [12], [13], [14], [15],
@@ -3859,7 +4001,7 @@ ovs-appctl time/stop
 ovs-appctl time/warp 5000
 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
-sleep 1
+
 AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_XOUT], [0], [dnl
 skb_priority(0),in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.255,dst=10.0.0.1/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del>
 skb_priority(0),in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del>
-- 
1.7.9.5




More information about the dev mailing list