[ovs-dev] [megaflow bond rebase] ofproto/bond: Implement bond megaflow using recirculation

Andy Zhou azhou at nicira.com
Tue Apr 1 22:22:54 UTC 2014


Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

Signed-off-by: Andy Zhou <azhou at nicira.com>

---
v1->v2:  Rewritten

V2->V3:
        Address Ben's review feedback. Rebased to master.

        Remove force recirculation off. It is always on for the user
        space datapath. Off for the kernel datapath, via the built-in
        detection logic.

        Remove dpif_netdev's enable_recirc. It is always enabled.

        When in use, make sure dp_hash != 0. Netlink logic is now
        simpler: if recirc_id or dh_hash is zero, they will not
        be part of the serialization.

        Add a drop rule to catch any run away recirc rule lookup.

        Allow flow_mod with OFPUTIL_FF_NO_READONLY to change rules
              in the READONLY tables.  (bypass readonly check).
              READONLY table remains read only to open flow controller.

        Allow OFPUTIL_FF_HIDDEN_FIELDS to set recirc_id and dp_hash.
              Currently only OVS internal logic set this bit when
              intstall intnerl rules. Open flow controllers can not
              set them.

        ofproto_dpif_add_internal_flows now return rule instead
                of rule_dpif.

        Recirculation depth limit.

        Added unit test case that tests recirculation

        Bug and style fixes.

V3->V4:
        rebase.
        Test 670 and 672 are known to be broken at the moment, due
        to bugs most likly not related to this patch.
---
 lib/dpif-netdev.c            |   60 ++++++---
 lib/flow.c                   |    2 +
 lib/match.c                  |   29 +++++
 lib/match.h                  |    3 +
 lib/odp-execute.c            |    7 +-
 lib/ofp-parse.c              |    3 +
 lib/ofp-print.c              |    6 +
 lib/ofp-util.h               |    7 ++
 ofproto/bond.c               |  279 ++++++++++++++++++++++++++++++++++++++++--
 ofproto/bond.h               |   33 ++++-
 ofproto/ofproto-dpif-xlate.c |   57 +++++++--
 ofproto/ofproto-dpif-xlate.h |   16 ++-
 ofproto/ofproto-dpif.c       |  276 ++++++++++++++++++++++++++++++++---------
 ofproto/ofproto-dpif.h       |   13 +-
 ofproto/ofproto-provider.h   |    3 +-
 ofproto/ofproto.c            |   69 +++++++----
 ofproto/ofproto.h            |    3 +-
 tests/classifier.at          |   16 +--
 tests/lacp.at                |   24 +++-
 tests/ofproto-dpif.at        |  144 +++++++++++++++++++++-
 20 files changed, 910 insertions(+), 140 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ceee233..a630a63 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -68,6 +68,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define NETDEV_RULE_PRIORITY 0x8000
 
 #define NR_THREADS 1
+/* Use per thread recirc_depth to prevent recirculation loop. */
+#define MAX_RECIRC_DEPTH 5
+DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
@@ -1991,8 +1994,9 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
 }
 
 static void
-dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
-                     struct pkt_metadata *md)
+dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
+                struct pkt_metadata *md)
+    OVS_REQ_RDLOCK(dp->port_rwlock)
 {
     struct dp_netdev_flow *netdev_flow;
     struct flow key;
@@ -2021,6 +2025,17 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
     }
 }
 
+static void
+dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
+                     struct pkt_metadata *md)
+    OVS_REQ_RDLOCK(dp->port_rwlock)
+{
+    uint32_t *recirc_depth = recirc_depth_get();
+
+    *recirc_depth = 0;
+    dp_netdev_input(dp, packet, md);
+}
+
 static int
 dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
                            int queue_no, int type, const struct flow *flow,
@@ -2089,6 +2104,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     struct dp_netdev_execute_aux *aux = aux_;
     int type = nl_attr_type(a);
     struct dp_netdev_port *p;
+    uint32_t *depth = recirc_depth_get();
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
@@ -2115,23 +2131,38 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
         break;
     }
 
-    case OVS_ACTION_ATTR_RECIRC: {
-        const struct ovs_action_recirc *act;
+    case OVS_ACTION_ATTR_RECIRC:
+        if (*depth < MAX_RECIRC_DEPTH) {
+            uint32_t old_recirc_id = md->recirc_id;
+            uint32_t old_dp_hash = md->dp_hash;
+            const struct ovs_action_recirc *act;
 
-        act = nl_attr_get(a);
-        md->recirc_id = act->recirc_id;
-        md->dp_hash = 0;
+            act = nl_attr_get(a);
+            md->recirc_id = act->recirc_id;
 
-        if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
-            struct flow flow;
+            if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+                struct flow flow;
 
-            flow_extract(packet, md, &flow);
-            md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
-        }
+                flow_extract(packet, md, &flow);
+                md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
+                if (!md->dp_hash) {
+                    md->dp_hash = 1;  /* 0 is not valid */
+                }
+            } else {
+                md->dp_hash = 0;
+            }
 
-        dp_netdev_port_input(aux->dp, packet, md);
+            (*depth)++;
+            dp_netdev_input(aux->dp, packet, md);
+
+            (*depth)--;
+            md->recirc_id = old_recirc_id;
+            md->recirc_id = old_dp_hash;
+            break;
+        } else {
+            VLOG_WARN("Packet dropped. Max recirculation dpeth exceeded.");
+        }
         break;
-    }
 
     case OVS_ACTION_ATTR_PUSH_VLAN:
     case OVS_ACTION_ATTR_POP_VLAN:
@@ -2143,7 +2174,6 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
     case __OVS_ACTION_ATTR_MAX:
         OVS_NOT_REACHED();
     }
-
 }
 
 static void
diff --git a/lib/flow.c b/lib/flow.c
index 314c1c7..38be4d9 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -369,6 +369,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
         flow->in_port = md->in_port;
         flow->skb_priority = md->skb_priority;
         flow->pkt_mark = md->pkt_mark;
+        flow->recirc_id = md->recirc_id;
+        flow->dp_hash = md->dp_hash;
     }
 
     packet->l2   = b.data;
diff --git a/lib/match.c b/lib/match.c
index 2969972..ec97b8f 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -788,6 +788,35 @@ match_hash(const struct match *match, uint32_t basis)
     return flow_wildcards_hash(&match->wc, flow_hash(&match->flow, basis));
 }
 
+static bool
+match_has_default_recirc_id(const struct match *m)
+{
+    uint32_t masked = m->flow.recirc_id & m->wc.masks.recirc_id;
+    return ((m->flow.recirc_id | masked) == 0);
+}
+
+static bool
+match_has_default_dp_hash(const struct match *m)
+{
+    return ((m->flow.dp_hash | m->wc.masks.dp_hash) == 0);
+}
+
+/* Return true if the hidden fields of the match are set to the default values.
+ * The default values equals to those set up by match_init_hidden_fiels(). */
+bool
+match_has_default_hidden_fields(const struct match *m)
+{
+    return match_has_default_recirc_id(m)
+           && match_has_default_dp_hash(m);
+}
+
+void
+match_init_hidden_fields(struct match *m)
+{
+    match_set_recirc_id(m, 0);
+    match_set_dp_hash_masked(m, 0, 0);
+}
+
 static void
 format_eth_masked(struct ds *s, const char *name, const uint8_t eth[6],
                   const uint8_t mask[6])
diff --git a/lib/match.h b/lib/match.h
index 95c8e67..2422fb1 100644
--- a/lib/match.h
+++ b/lib/match.h
@@ -134,6 +134,9 @@ void match_set_nd_target_masked(struct match *, const struct in6_addr *,
 bool match_equal(const struct match *, const struct match *);
 uint32_t match_hash(const struct match *, uint32_t basis);
 
+void match_init_hidden_fields(struct match *);
+bool match_has_default_hidden_fields(const struct match *);
+
 void match_format(const struct match *, struct ds *, unsigned int priority);
 char *match_to_string(const struct match *, unsigned int priority);
 void match_print(const struct match *);
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index e5aa0ce..c48101d 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
  * Copyright (c) 2013 Simon Horman
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -207,10 +207,11 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
         case OVS_ACTION_ATTR_USERSPACE:
         case OVS_ACTION_ATTR_RECIRC:
             if (dp_execute_action) {
-                bool may_steal;
                 /* Allow 'dp_execute_action' to steal the packet data if we do
                  * not need it any more. */
-                may_steal = steal && (!more_actions && left <= NLA_ALIGN(a->nla_len));
+                bool may_steal = steal && (!more_actions
+                                && left <= NLA_ALIGN(a->nla_len)
+                                && type != OVS_ACTION_ATTR_RECIRC);
                 dp_execute_action(dp, packet, md, a, may_steal);
             }
             break;
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 5c5bb06..2125af0 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -1326,6 +1326,9 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string,
         } else if (fields & F_FLAGS && !strcmp(name, "no_byte_counts")) {
             fm->flags |= OFPUTIL_FF_NO_BYT_COUNTS;
             *usable_protocols &= OFPUTIL_P_OF13_UP;
+        } else if (!strcmp(name, "no_readonly_table")
+                   || !strcmp(name, "allow_hidden_fields")) {
+             /* ignore these fields. */
         } else {
             char *value;
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index b88d1e7..d2d7f6f 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -742,6 +742,12 @@ ofp_print_flow_flags(struct ds *s, enum ofputil_flow_mod_flags flags)
     if (flags & OFPUTIL_FF_NO_BYT_COUNTS) {
         ds_put_cstr(s, "no_byte_counts ");
     }
+    if (flags & OFPUTIL_FF_HIDDEN_FIELDS) {
+        ds_put_cstr(s, "allow_hidden_fields ");
+    }
+    if (flags & OFPUTIL_FF_NO_READONLY) {
+        ds_put_cstr(s, "no_readonly_table ");
+    }
 }
 
 static void
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index 298d595..a944558 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -246,6 +246,13 @@ enum ofputil_flow_mod_flags {
     OFPUTIL_FF_CHECK_OVERLAP = 1 << 3, /* All versions. */
     OFPUTIL_FF_EMERG         = 1 << 4, /* OpenFlow 1.0 only. */
     OFPUTIL_FF_RESET_COUNTS  = 1 << 5, /* OpenFlow 1.2+. */
+
+    /* Flags that are only set by OVS for its internal use. Open flow
+     * controller will Never set them. */
+    OFPUTIL_FF_HIDDEN_FIELDS = 1 << 6, /* Allow hidden match fields to be
+                                          set or modified. */
+    OFPUTIL_FF_NO_READONLY   = 1 << 7, /* Allow rules within read only tables
+                                          to be modified */
 };
 
 /* Protocol-independent flow_mod.
diff --git a/ofproto/bond.c b/ofproto/bond.c
index 6812330..92a3109 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -23,6 +23,11 @@
 #include <stdlib.h>
 #include <math.h>
 
+#include "ofp-util.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+#include "ofproto/ofproto-provider.h"
+#include "ofproto/ofproto-dpif.h"
 #include "connectivity.h"
 #include "coverage.h"
 #include "dynamic-string.h"
@@ -36,6 +41,7 @@
 #include "packets.h"
 #include "poll-loop.h"
 #include "seq.h"
+#include "match.h"
 #include "shash.h"
 #include "timeval.h"
 #include "unixctl.h"
@@ -50,6 +56,7 @@ static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
 /* Bit-mask for hashing a flow down to a bucket.
  * There are (BOND_MASK + 1) buckets. */
 #define BOND_MASK 0xff
+#define RECIRC_RULE_PRIORITY 20   /* Priority level for internal rules */
 
 /* A hash bucket for mapping a flow to a slave.
  * "struct bond" has an array of (BOND_MASK + 1) of these. */
@@ -57,6 +64,12 @@ struct bond_entry {
     struct bond_slave *slave;   /* Assigned slave, NULL if unassigned. */
     uint64_t tx_bytes;          /* Count of bytes recently transmitted. */
     struct list list_node;      /* In bond_slave's 'entries' list. */
+
+    /* Recirculation. */
+    struct rule *pr_rule;       /* Post recirculation rule for this entry.*/
+    uint64_t pr_tx_bytes;       /* Record the rule tx_bytes to figure out
+                                   the delta to update the tx_bytes entry
+                                   above.*/
 };
 
 /* A bond slave, that is, one of the links comprising a bond. */
@@ -68,6 +81,7 @@ struct bond_slave {
 
     struct netdev *netdev;      /* Network device, owned by the client. */
     unsigned int change_seq;    /* Tracks changes in 'netdev'. */
+    ofp_port_t  ofp_port;       /* Open flow port number */
     char *name;                 /* Name (a copy of netdev_get_name(netdev)). */
 
     /* Link status. */
@@ -86,6 +100,7 @@ struct bond_slave {
 struct bond {
     struct hmap_node hmap_node; /* In 'all_bonds' hmap. */
     char *name;                 /* Name provided by client. */
+    struct ofproto_dpif *ofproto; /* The bridge this bond belongs to. */
 
     /* Slaves. */
     struct hmap slaves;
@@ -111,6 +126,8 @@ struct bond {
     int rebalance_interval;      /* Interval between rebalances, in ms. */
     long long int next_rebalance; /* Next rebalancing time. */
     bool send_learning_packets;
+    uint32_t recirc_id;          /* Non zero if recirculation can be used.*/
+    struct hmap pr_rule_ops;     /* Helps to maintain post recirculation rules.*/
 
     /* Legacy compatibility. */
     long long int next_fake_iface_update; /* LLONG_MAX if disabled. */
@@ -119,6 +136,21 @@ struct bond {
     struct ovs_refcount ref_cnt;
 };
 
+/* What to do with an bond_recirc_rule. */
+enum bond_op {
+    ADD,        /* Add the rule to ofproto's flow table. */
+    DEL,        /* Delete the rule from the ofproto's flow table. */
+};
+
+/* A rule to add to or delete from ofproto's internal flow table. */
+struct bond_pr_rule_op {
+    struct hmap_node hmap_node;
+    struct match match;
+    ofp_port_t out_ofport;
+    enum bond_op op;
+    struct rule *pr_rule;
+};
+
 static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
 static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_)
     OVS_REQ_RDLOCK(rwlock);
@@ -185,17 +217,21 @@ bond_mode_to_string(enum bond_mode balance) {
  * The caller should register each slave on the new bond by calling
  * bond_slave_register().  */
 struct bond *
-bond_create(const struct bond_settings *s)
+bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto)
 {
     struct bond *bond;
 
     bond = xzalloc(sizeof *bond);
+    bond->ofproto = ofproto;
     hmap_init(&bond->slaves);
     list_init(&bond->enabled_slaves);
     ovs_mutex_init(&bond->mutex);
     bond->next_fake_iface_update = LLONG_MAX;
     ovs_refcount_init(&bond->ref_cnt);
 
+    bond->recirc_id = 0;
+    hmap_init(&bond->pr_rule_ops);
+
     bond_reconfigure(bond, s);
     return bond;
 }
@@ -216,6 +252,7 @@ void
 bond_unref(struct bond *bond)
 {
     struct bond_slave *slave, *next_slave;
+    struct bond_pr_rule_op *pr_op, *next_op;
 
     if (!bond || ovs_refcount_unref(&bond->ref_cnt) != 1) {
         return;
@@ -236,9 +273,126 @@ bond_unref(struct bond *bond)
     ovs_mutex_destroy(&bond->mutex);
     free(bond->hash);
     free(bond->name);
+
+    HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
+        hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
+        free(pr_op);
+    }
+    hmap_destroy(&bond->pr_rule_ops);
+
+    if (bond->recirc_id) {
+        ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id);
+    }
+
     free(bond);
 }
 
+static void
+add_pr_rule(struct bond *bond, const struct match *match,
+            ofp_port_t out_ofport, struct rule *rule)
+{
+    uint32_t hash = match_hash(match, 0);
+    struct bond_pr_rule_op *pr_op;
+
+    HMAP_FOR_EACH_WITH_HASH(pr_op, hmap_node, hash, &bond->pr_rule_ops) {
+        if (match_equal(&pr_op->match, match)) {
+            pr_op->op = ADD;
+            pr_op->out_ofport = out_ofport;
+            pr_op->pr_rule = rule;
+            return;
+        }
+    }
+
+    pr_op = xmalloc(sizeof *pr_op);
+    pr_op->match = *match;
+    pr_op->op = ADD;
+    pr_op->out_ofport = out_ofport;
+    pr_op->pr_rule = rule;
+    hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
+}
+
+static void
+update_recirc_rules(struct bond *bond)
+{
+    struct match match;
+    struct bond_pr_rule_op *pr_op, *next_op;
+    uint64_t ofpacts_stub[128 / 8];
+    struct ofpbuf ofpacts;
+    int i;
+
+    ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
+
+    HMAP_FOR_EACH(pr_op, hmap_node, &bond->pr_rule_ops) {
+        pr_op->op = DEL;
+    }
+
+    if ((bond->hash == NULL) || (!bond->recirc_id)) {
+        return;
+    }
+
+    for (i = 0; i < BOND_MASK + 1; i++) {
+        struct bond_slave *slave = bond->hash[i].slave;
+
+        if (slave) {
+            match_init_catchall(&match);
+            match_set_recirc_id(&match, bond->recirc_id);
+            /* recirc_id -> metadata to speed up look ups. */
+            match_set_metadata(&match, htonll(bond->recirc_id));
+            match_set_dp_hash_masked(&match, i, BOND_MASK);
+
+            add_pr_rule(bond, &match, slave->ofp_port,
+                            bond->hash[i].pr_rule);
+        }
+    }
+
+    HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
+        int error;
+        struct rule *rule;
+        switch (pr_op->op) {
+        case ADD:
+            ofpbuf_clear(&ofpacts);
+            ofpact_put_OUTPUT(&ofpacts)->port = pr_op->out_ofport;
+            error = ofproto_dpif_add_internal_flow(bond->ofproto,
+                                                   &pr_op->match,
+                                                   RECIRC_RULE_PRIORITY,
+                                                   &ofpacts, &rule);
+            if (error) {
+                char *err_s = match_to_string(&pr_op->match,
+                                              RECIRC_RULE_PRIORITY);
+
+                VLOG_ERR("Bond: Failed to add post recirculation flow %s",
+                    err_s);
+                free(err_s);
+                pr_op->pr_rule = NULL;
+            } else {
+                pr_op->pr_rule = rule;
+            }
+            break;
+
+        case DEL:
+            error = ofproto_dpif_delete_internal_flow(bond->ofproto,
+                                                      &pr_op->match,
+                                                      RECIRC_RULE_PRIORITY);
+            if (error) {
+                char *err_s = match_to_string(&pr_op->match,
+                                              RECIRC_RULE_PRIORITY);
+
+                VLOG_ERR("Bond: Failed to remove post recirculation flow %s",
+                    err_s);
+                free(err_s);
+            }
+
+            hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
+            pr_op->pr_rule = NULL;
+            free(pr_op);
+            break;
+        }
+    }
+
+    ofpbuf_uninit(&ofpacts);
+}
+
+
 /* Updates 'bond''s overall configuration to 's'.
  *
  * The caller should register each slave on 'bond' by calling
@@ -299,6 +453,15 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
         bond->bond_revalidate = false;
     }
 
+    if (bond->balance != BM_AB) {
+        if (!bond->recirc_id) {
+            bond->recirc_id = ofproto_dpif_alloc_recirc_id(bond->ofproto);
+        }
+    } else if (bond->recirc_id) {
+        ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id);
+        bond->recirc_id = 0;
+    }
+
     if (bond->balance == BM_AB || !bond->hash || revalidate) {
         bond_entry_reset(bond);
     }
@@ -327,7 +490,8 @@ bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
  * 'slave_' or destroying 'bond'.
  */
 void
-bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
+bond_slave_register(struct bond *bond, void *slave_,
+                    ofp_port_t ofport, struct netdev *netdev)
 {
     struct bond_slave *slave;
 
@@ -339,6 +503,7 @@ bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
         hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0));
         slave->bond = bond;
         slave->aux = slave_;
+        slave->ofp_port = ofport;
         slave->delay_expires = LLONG_MAX;
         slave->name = xstrdup(netdev_get_name(netdev));
         bond->bond_revalidate = true;
@@ -688,6 +853,87 @@ bond_choose_output_slave(struct bond *bond, const struct flow *flow,
     return aux;
 }
 
+/* Recirculation. */
+static void
+bond_entry_acount(struct bond_entry *entry, uint64_t rule_tx_bytes)
+    OVS_REQ_RDLOCK(rwlock)
+{
+    if (entry->slave) {
+        uint64_t delta;
+
+        delta = rule_tx_bytes - entry->pr_tx_bytes;
+        entry->tx_bytes += delta;
+        entry->pr_tx_bytes = rule_tx_bytes;
+    }
+}
+
+/* Maintain bond stats using post recirculation rule byte counters.*/
+void
+bond_recirculation_account(struct bond *bond)
+{
+    int i;
+
+    ovs_rwlock_rdlock(&rwlock);
+    for (i=0; i<=BOND_MASK; i++) {
+        struct bond_entry *entry = &bond->hash[i];
+        struct rule *rule;
+
+        rule = entry->pr_rule;
+        if (rule) {
+            uint64_t n_packets OVS_UNUSED;
+            long long int used OVS_UNUSED;
+            uint64_t n_bytes;
+
+            rule->ofproto->ofproto_class->rule_get_stats(
+                rule, &n_packets, &n_bytes, &used);
+            bond_entry_acount(entry, n_bytes);
+        }
+    }
+    ovs_rwlock_unlock(&rwlock);
+}
+
+bool
+bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
+                uint32_t *hash_bias)
+{
+    bool rv = false;
+
+    if (bond->balance == BM_TCP) {
+        if (recirc_id) {
+            *recirc_id = bond->recirc_id;
+        }
+        if (hash_bias) {
+            *hash_bias = bond->basis;
+        }
+        rv = true;
+    }
+
+    return rv;
+}
+
+void
+bond_update_post_recirc_rules(struct bond* bond, const bool force)
+{
+   struct bond_entry *e;
+   bool update_rules = force;  /* Always update rules if caller forces it.*/
+
+   /* Make sure all bond entries are populated */
+   for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) {
+       if (!e->slave || !e->slave->enabled) {
+            update_rules = true;
+            e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves),
+                                    struct bond_slave, hmap_node);
+            if (!e->slave->enabled) {
+                e->slave = bond->active_slave;
+            }
+        }
+   }
+
+   if (update_rules) {
+        update_recirc_rules(bond);
+   }
+}
+
 /* Rebalancing. */
 
 static bool
@@ -845,19 +1091,22 @@ reinsert_bal(struct list *bals, struct bond_slave *slave)
 
 /* If 'bond' needs rebalancing, does so.
  *
- * The caller should have called bond_account() for each active flow, to ensure
- * that flow data is consistently accounted at this point. */
-void
+ * The caller should have called bond_account() for each active flow, or in case
+ * of recirculation is used, have called bond_recirculation_account(bond),
+ * to ensure that flow data is consistently accounted at this point.
+ *
+ * Return whether rebalancing took place.*/
+bool
 bond_rebalance(struct bond *bond)
 {
     struct bond_slave *slave;
     struct bond_entry *e;
     struct list bals;
+    bool rebalanced = false;
 
     ovs_rwlock_wrlock(&rwlock);
     if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) {
-        ovs_rwlock_unlock(&rwlock);
-        return;
+        goto done;
     }
     bond->next_rebalance = time_msec() + bond->rebalance_interval;
 
@@ -916,6 +1165,7 @@ bond_rebalance(struct bond *bond)
             /* Re-sort 'bals'. */
             reinsert_bal(&bals, from);
             reinsert_bal(&bals, to);
+	    rebalanced = true;
         } else {
             /* Can't usefully migrate anything away from 'from'.
              * Don't reconsider it. */
@@ -932,7 +1182,10 @@ bond_rebalance(struct bond *bond)
             e->slave = NULL;
         }
     }
+
+done:
     ovs_rwlock_unlock(&rwlock);
+    return rebalanced;
 }
 
 /* Bonding unixctl user interface functions. */
@@ -972,15 +1225,15 @@ bond_unixctl_list(struct unixctl_conn *conn,
     struct ds ds = DS_EMPTY_INITIALIZER;
     const struct bond *bond;
 
-    ds_put_cstr(&ds, "bond\ttype\tslaves\n");
+    ds_put_cstr(&ds, "bond\ttype\trecircID\tslaves\n");
 
     ovs_rwlock_rdlock(&rwlock);
     HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
         const struct bond_slave *slave;
         size_t i;
 
-        ds_put_format(&ds, "%s\t%s\t",
-                      bond->name, bond_mode_to_string(bond->balance));
+        ds_put_format(&ds, "%s\t%s\t%d\t", bond->name,
+                      bond_mode_to_string(bond->balance), bond->recirc_id);
 
         i = 0;
         HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
@@ -1003,12 +1256,18 @@ bond_print_details(struct ds *ds, const struct bond *bond)
     struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
     const struct shash_node **sorted_slaves = NULL;
     const struct bond_slave *slave;
+    bool may_recirc;
+    uint32_t recirc_id;
     int i;
 
     ds_put_format(ds, "---- %s ----\n", bond->name);
     ds_put_format(ds, "bond_mode: %s\n",
                   bond_mode_to_string(bond->balance));
 
+    may_recirc = bond_may_recirc(bond, &recirc_id, NULL);
+    ds_put_format(ds, "bond may use recirculation: %s, Recirc-ID : %d\n",
+                  may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
+
     ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
 
     ds_put_format(ds, "updelay: %d ms\n", bond->updelay);
diff --git a/ofproto/bond.h b/ofproto/bond.h
index 5b3814e..e5ceb45 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2014 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,12 +19,13 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-
+#include "ofproto-provider.h"
 #include "packets.h"
 
 struct flow;
 struct netdev;
 struct ofpbuf;
+struct ofproto_dpif;
 enum lacp_status;
 
 /* How flows are balanced among bond slaves. */
@@ -60,12 +61,13 @@ struct bond_settings {
 void bond_init(void);
 
 /* Basics. */
-struct bond *bond_create(const struct bond_settings *);
+struct bond *bond_create(const struct bond_settings *,
+                         struct ofproto_dpif *ofproto);
 void bond_unref(struct bond *);
 struct bond *bond_ref(const struct bond *);
 
 bool bond_reconfigure(struct bond *, const struct bond_settings *);
-void bond_slave_register(struct bond *, void *slave_, struct netdev *);
+void bond_slave_register(struct bond *, void *slave_, ofp_port_t ofport, struct netdev *);
 void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *);
 void bond_slave_unregister(struct bond *, const void *slave);
 
@@ -94,6 +96,27 @@ void *bond_choose_output_slave(struct bond *, const struct flow *,
 /* Rebalancing. */
 void bond_account(struct bond *, const struct flow *, uint16_t vlan,
                   uint64_t n_bytes);
-void bond_rebalance(struct bond *);
+bool bond_rebalance(struct bond *);
 
+/* Recirculation
+ *
+ * Only balance_tcp mode uses recirculation.
+ *
+ * When recirculation is used, each bond port is assigned with a unique
+ * recirc_id. The output action to the bond port will be replaced by
+ * a RECIRC action.
+ *
+ *   ... actions= ... RECIRC(L4_HASH, recirc_id) ....
+ *
+ * On handling first output packet, 256 post recirculation flows are installed:
+ *
+ *  recirc_id=<bond_recirc_id>, dp_hash=<[0..255]>/0xff, actions: output<slave>
+ *
+ * Bond module pulls stats from those post recirculation rules. If rebalancing
+ * is needed, those rules are updated with new output actions.
+*/
+void bond_update_post_recirc_rules(struct bond *, const bool force);
+bool bond_may_recirc(const struct bond *, uint32_t *recirc_id,
+                     uint32_t *hash_bias);
+void bond_recirculation_account(struct bond *);
 #endif /* bond.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index e26b7c9..7805f19 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -58,6 +58,8 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  * flow translation. */
 #define MAX_RESUBMIT_RECURSION 64
+#define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules in
+                                      internal table. */
 
 /* Maximum number of resubmit actions in a flow translation, whether they are
  * recursive or not. */
@@ -89,6 +91,9 @@ struct xbridge {
     bool has_in_band;             /* Bridge has in band control? */
     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
 
+    /* True if the datapath supports recirculation. */
+    bool enable_recirc;
+
     /* True if the datapath supports variable-length
      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
      * False if the datapath supports only 8-byte (or shorter) userdata. */
@@ -226,8 +231,8 @@ static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
                              struct xlate_ctx *);
 static void xlate_actions__(struct xlate_in *, struct xlate_out *)
     OVS_REQ_RDLOCK(xlate_rwlock);
-    static void xlate_normal(struct xlate_ctx *);
-    static void xlate_report(struct xlate_ctx *, const char *);
+static void xlate_normal(struct xlate_ctx *);
+static void xlate_report(struct xlate_ctx *, const char *);
 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
                                uint8_t table_id, bool may_packet_in,
                                bool honor_table_miss);
@@ -257,6 +262,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
                   const struct dpif_ipfix *ipfix,
                   const struct netflow *netflow, enum ofp_config_flags frag,
                   bool forward_bpdu, bool has_in_band,
+                  bool enable_recirc,
                   bool variable_length_userdata,
                   size_t max_mpls_depth)
 {
@@ -310,6 +316,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
     xbridge->frag = frag;
     xbridge->miss_rule = miss_rule;
     xbridge->no_packet_in_rule = no_packet_in_rule;
+    xbridge->enable_recirc = enable_recirc;
     xbridge->variable_length_userdata = variable_length_userdata;
     xbridge->max_mpls_depth = max_mpls_depth;
 }
@@ -1131,10 +1138,23 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
         /* Partially configured bundle with no slaves.  Drop the packet. */
         return;
     } else if (!out_xbundle->bond) {
+        ctx->xout->use_recirc = false;
         xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport,
                              bundle_node);
     } else {
         struct ofport_dpif *ofport;
+        struct xlate_recirc *xr = &ctx->xout->recirc;
+
+        if (ctx->xbridge->enable_recirc) {
+            ctx->xout->use_recirc = bond_may_recirc(
+                out_xbundle->bond, &xr->recirc_id, &xr->hash_bias);
+
+            if (ctx->xout->use_recirc) {
+                /* Only TCP mode uses recirculation. */
+                xr->hash_alg = OVS_RECIRC_HASH_ALG_L4;
+                bond_update_post_recirc_rules(out_xbundle->bond, false);
+            }
+        }
 
         ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow,
                                           &ctx->xout->wc, vid);
@@ -1817,8 +1837,20 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
                                               &ctx->xout->odp_actions,
                                               &ctx->xout->wc);
-        nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
-                            out_port);
+
+        if (ctx->xout->use_recirc) {
+            struct ovs_action_recirc *act_recirc;
+            struct xlate_recirc *xr = &ctx->xout->recirc;
+
+            act_recirc = nl_msg_put_unspec_uninit(&ctx->xout->odp_actions,
+                               OVS_ACTION_ATTR_RECIRC, sizeof *act_recirc);
+            act_recirc->recirc_id = xr->recirc_id;
+            act_recirc->hash_alg = xr->hash_alg;
+            act_recirc->hash_bias = xr->hash_bias;
+        } else {
+            nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
+                                out_port);
+        }
 
         ctx->sflow_odp_port = odp_port;
         ctx->sflow_n_outputs++;
@@ -1862,10 +1894,10 @@ xlate_resubmit_resource_check(struct xlate_ctx *ctx)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
 
-    if (ctx->recurse >= MAX_RESUBMIT_RECURSION) {
+    if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) {
         VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times",
                     MAX_RESUBMIT_RECURSION);
-    } else if (ctx->resubmits >= MAX_RESUBMITS) {
+    } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) {
         VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS);
     } else if (ctx->xout->odp_actions.size > UINT16_MAX) {
         VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions");
@@ -2081,6 +2113,15 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx,
 {
     ofp_port_t in_port;
     uint8_t table_id;
+    bool may_packet_in = false;
+    bool honor_table_miss = false;
+
+    if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
+        /* Still allow missed packets to be sent to the controller
+         * if Resubmiting from an internal table  */
+        may_packet_in = true;
+        honor_table_miss = true;
+    }
 
     in_port = resubmit->in_port;
     if (in_port == OFPP_IN_PORT) {
@@ -2092,7 +2133,8 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx,
         table_id = ctx->table_id;
     }
 
-    xlate_table_action(ctx, in_port, table_id, false, false);
+    xlate_table_action(ctx, in_port, table_id, may_packet_in,
+                       honor_table_miss);
 }
 
 static void
@@ -3063,6 +3105,7 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
         ctx.rule = rule;
     }
     xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);
+    xout->use_recirc = false;
 
     if (xin->ofpacts) {
         ofpacts = xin->ofpacts;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 8b01d4e..966eec1 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -32,6 +32,12 @@ struct dpif_ipfix;
 struct dpif_sflow;
 struct mac_learning;
 
+struct xlate_recirc {
+    uint32_t recirc_id;  /* !0 Use recirculation instead of output.*/
+    uint8_t  hash_alg;   /* !0 Compute hash for recirc before.*/
+    uint32_t hash_bias;  /* Compute hash for recirc before.*/
+};
+
 struct xlate_out {
     /* Wildcards relevant in translation.  Any fields that were used to
      * calculate the action must be set for caching and kernel
@@ -50,6 +56,9 @@ struct xlate_out {
     ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 
+    bool use_recirc;            /* Should generate recirc? */
+    struct xlate_recirc recirc; /* Information used for generating
+                                 * recirculation actions */
     uint64_t odp_actions_stub[256 / 8];
     struct ofpbuf odp_actions;
 };
@@ -129,7 +138,8 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name,
                        const struct mbridge *, const struct dpif_sflow *,
                        const struct dpif_ipfix *, const struct netflow *,
                        enum ofp_config_flags, bool forward_bpdu,
-                       bool has_in_band, bool variable_length_userdata,
+                       bool has_in_band, bool enable_recirc,
+                       bool variable_length_userdata,
                        size_t mpls_label_stack_length)
     OVS_REQ_WRLOCK(xlate_rwlock);
 void xlate_remove_ofproto(struct ofproto_dpif *) OVS_REQ_WRLOCK(xlate_rwlock);
@@ -161,8 +171,8 @@ int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet,
 void xlate_actions(struct xlate_in *, struct xlate_out *)
     OVS_EXCLUDED(xlate_rwlock);
 void xlate_in_init(struct xlate_in *, struct ofproto_dpif *,
-                   const struct flow *, struct rule_dpif *, uint16_t tcp_flags,
-                   const struct ofpbuf *packet);
+                   const struct flow *, struct rule_dpif *,
+                   uint16_t tcp_flags, const struct ofpbuf *packet);
 void xlate_out_uninit(struct xlate_out *);
 void xlate_actions_for_side_effects(struct xlate_in *);
 void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 7172cb2..daf7488 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -253,7 +253,9 @@ struct dpif_backer {
 
     bool recv_set_enable; /* Enables or disables receiving packets. */
 
+    /* Recirculation. */
     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
+    bool enable_recirc;   /* True if the datapath supports recirculation */
 
     /* True if the datapath supports variable-length
      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
@@ -332,9 +334,15 @@ ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
     return ofproto->backer->max_mpls_depth;
 }
 
+bool
+ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
+{
+    return ofproto->backer->enable_recirc;
+}
+
 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
                                         ofp_port_t ofp_port);
-static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
+static void ofproto_trace(struct ofproto_dpif *, struct flow *,
                           const struct ofpbuf *packet,
                           const struct ofpact[], size_t ofpacts_len,
                           struct ds *);
@@ -571,6 +579,7 @@ type_run(const char *type)
                               ofproto->netflow, ofproto->up.frag_handling,
                               ofproto->up.forward_bpdu,
                               connmgr_has_in_band(ofproto->up.connmgr),
+                              ofproto->backer->enable_recirc,
                               ofproto->backer->variable_length_userdata,
                               ofproto->backer->max_mpls_depth);
 
@@ -796,6 +805,7 @@ struct odp_garbage {
 
 static bool check_variable_length_userdata(struct dpif_backer *backer);
 static size_t check_max_mpls_depth(struct dpif_backer *backer);
+static bool check_recirc(struct dpif_backer *backer);
 
 static int
 open_dpif_backer(const char *type, struct dpif_backer **backerp)
@@ -896,6 +906,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
         close_dpif_backer(backer);
         return error;
     }
+    backer->enable_recirc = check_recirc(backer);
     backer->variable_length_userdata = check_variable_length_userdata(backer);
     backer->max_mpls_depth = check_max_mpls_depth(backer);
     backer->rid_pool = recirc_id_pool_create();
@@ -907,6 +918,60 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     return error;
 }
 
+/* Tests whether 'backer''s datapath supports recirculation
+ * Only newer datapath supports OVS_KEY_ATTR in OVS_ACTION_ATTR_USERSPACE actions.  We need
+ * to disable some features on older datapaths that don't support this
+ * feature.
+ *
+ * Returns false if 'backer' definitely does not support variable-length
+ * userdata, true if it seems to support them or if at least the error we get
+ * is ambiguous. */
+static bool
+check_recirc(struct dpif_backer *backer)
+{
+    struct flow flow;
+    struct odputil_keybuf keybuf;
+    struct ofpbuf key;
+    int error;
+    bool enable_recirc = false;
+
+    memset(&flow, 0, sizeof flow);
+    flow.recirc_id = 1;
+    flow.dp_hash = 1;
+
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &flow, 0);
+
+    error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
+                          key.data, key.size, NULL, 0, NULL, 0, NULL);
+    if (error && error != EEXIST) {
+        if (error != EINVAL) {
+            VLOG_WARN("%s: Reciculation flow probe failed (%s)",
+                      dpif_name(backer->dpif), ovs_strerror(error));
+        }
+        goto done;
+    }
+
+    error = dpif_flow_del(backer->dpif, key.data, key.size, NULL);
+    if (error) {
+        VLOG_WARN("%s: failed to delete recirculation feature probe flow",
+                  dpif_name(backer->dpif));
+    }
+
+    enable_recirc = true;
+
+done:
+    if (enable_recirc) {
+        VLOG_INFO("%s: Datapath supports recirculation",
+                  dpif_name(backer->dpif));
+    } else {
+        VLOG_INFO("%s: Datapath does not support recirculation",
+                  dpif_name(backer->dpif));
+    }
+
+    return enable_recirc;
+}
+
 /* Tests whether 'backer''s datapath supports variable-length
  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
  * to disable some features on older datapaths that don't support this
@@ -1090,51 +1155,27 @@ construct(struct ofproto *ofproto_)
 
     ofproto_init_tables(ofproto_, N_TABLES);
     error = add_internal_flows(ofproto);
+
     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
 
     return error;
 }
 
 static int
-add_internal_flow(struct ofproto_dpif *ofproto, int id,
+add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
 {
-    struct ofputil_flow_mod fm;
-    struct classifier *cls;
+    struct match match;
     int error;
+    struct rule *rule;
 
-    match_init_catchall(&fm.match);
-    fm.priority = 0;
-    match_set_reg(&fm.match, 0, id);
-    fm.new_cookie = htonll(0);
-    fm.cookie = htonll(0);
-    fm.cookie_mask = htonll(0);
-    fm.modify_cookie = false;
-    fm.table_id = TBL_INTERNAL;
-    fm.command = OFPFC_ADD;
-    fm.idle_timeout = 0;
-    fm.hard_timeout = 0;
-    fm.buffer_id = 0;
-    fm.out_port = 0;
-    fm.flags = 0;
-    fm.ofpacts = ofpacts->data;
-    fm.ofpacts_len = ofpacts->size;
+    match_init_catchall(&match);
+    match_set_reg(&match, 0, id);
 
-    error = ofproto_flow_mod(&ofproto->up, &fm);
-    if (error) {
-        VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
-                    id, ofperr_to_string(error));
-        return error;
-    }
-
-    cls = &ofproto->up.tables[TBL_INTERNAL].cls;
-    fat_rwlock_rdlock(&cls->rwlock);
-    *rulep = rule_dpif_cast(rule_from_cls_rule(
-                                classifier_lookup(cls, &fm.match.flow, NULL)));
-    ovs_assert(*rulep != NULL);
-    fat_rwlock_unlock(&cls->rwlock);
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, ofpacts, &rule);
+    *rulep = error ? NULL : rule_dpif_cast(rule);
 
-    return 0;
+    return error;
 }
 
 static int
@@ -1143,6 +1184,9 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     struct ofpact_controller *controller;
     uint64_t ofpacts_stub[128 / 8];
     struct ofpbuf ofpacts;
+    struct rule *unused_rulep OVS_UNUSED;
+    struct ofpact_resubmit *resubmit;
+    struct match match;
     int error;
     int id;
 
@@ -1155,20 +1199,52 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     controller->reason = OFPR_NO_MATCH;
     ofpact_pad(&ofpacts);
 
-    error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
     if (error) {
         return error;
     }
 
     ofpbuf_clear(&ofpacts);
-    error = add_internal_flow(ofproto, id++, &ofpacts,
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                               &ofproto->no_packet_in_rule);
     if (error) {
         return error;
     }
 
-    error = add_internal_flow(ofproto, id++, &ofpacts,
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                               &ofproto->drop_frags_rule);
+    if (error) {
+        return error;
+    }
+
+    /* Continue non-recirculation rule lookups from table 0.
+     *
+     * (priority=2), recirc=0, actions=resubmit(, 0)
+     */
+    resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->ofpact.compat = 0;
+    resubmit->in_port = OFPP_IN_PORT;
+    resubmit->table_id = 0;
+
+    match_init_catchall(&match);
+    match_set_recirc_id(&match, 0);
+
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 2,  &ofpacts,
+                                           &unused_rulep);
+    if (error) {
+        return error;
+    }
+
+    /* Drop any run away recirc rule lookups. Recirc_id has to be
+     * non-zero when reaching this rule.
+     *
+     * (priority=1), *, actions=drop
+     */
+    ofpbuf_clear(&ofpacts);
+    match_init_catchall(&match);
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 1,  &ofpacts,
+                                           &unused_rulep);
+
     return error;
 }
 
@@ -1236,6 +1312,7 @@ run(struct ofproto *ofproto_)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
     uint64_t new_seq, new_dump_seq;
+    const bool enable_recirc = ofproto_dpif_get_enable_recirc(ofproto);
 
     if (mbridge_need_revalidate(ofproto->mbridge)) {
         ofproto->backer->need_revalidate = REV_RECONFIGURE;
@@ -1313,12 +1390,17 @@ run(struct ofproto *ofproto_)
 
         /* All outstanding data in existing flows has been accounted, so it's a
          * good time to do bond rebalancing. */
-        if (ofproto->has_bonded_bundles) {
+        if (enable_recirc && ofproto->has_bonded_bundles) {
             struct ofbundle *bundle;
 
             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
-                if (bundle->bond) {
-                    bond_rebalance(bundle->bond);
+                struct bond *bond = bundle->bond;
+
+                if (bond && bond_may_recirc(bond, NULL, NULL)) {
+                    bond_recirculation_account(bond);
+                    if (bond_rebalance(bundle->bond)) {
+                        bond_update_post_recirc_rules(bond, true);
+                    }
                 }
             }
         }
@@ -2336,12 +2418,13 @@ bundle_set(struct ofproto *ofproto_, void *aux,
                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
             }
         } else {
-            bundle->bond = bond_create(s->bond);
+            bundle->bond = bond_create(s->bond, ofproto);
             ofproto->backer->need_revalidate = REV_RECONFIGURE;
         }
 
         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
-            bond_slave_register(bundle->bond, port, port->up.netdev);
+            bond_slave_register(bundle->bond, port,
+                                port->up.ofp_port, port->up.netdev);
         }
     } else {
         bond_unref(bundle->bond);
@@ -2991,6 +3074,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
     ovs_assert((rule != NULL) != (ofpacts != NULL));
 
     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
+
     if (rule) {
         rule_dpif_credit_stats(rule, &stats);
     }
@@ -3073,20 +3157,13 @@ rule_dpif_get_actions(const struct rule_dpif *rule)
     return rule_get_actions(&rule->up);
 }
 
-/* Lookup 'flow' in table 0 of 'ofproto''s classifier.
- * If 'wc' is non-null, sets the fields that were relevant as part of
- * the lookup. Returns the table_id where a match or miss occurred.
- *
- * The return value will be zero unless there was a miss and
- * OFPTC_TABLE_MISS_CONTINUE is in effect for the sequence of tables
- * where misses occur. */
-uint8_t
-rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
-                 struct flow_wildcards *wc, struct rule_dpif **rule)
+static uint8_t
+rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow,
+                    struct flow_wildcards *wc, struct rule_dpif **rule)
 {
     enum rule_dpif_lookup_verdict verdict;
     enum ofputil_port_config config = 0;
-    uint8_t table_id = 0;
+    uint8_t table_id = TBL_INTERNAL;
 
     verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true,
                                           &table_id, rule);
@@ -3117,6 +3194,23 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
     return table_id;
 }
 
+/* Lookup 'flow' in table 0 of 'ofproto''s classifier.
+ * If 'wc' is non-null, sets the fields that were relevant as part of
+ * the lookup. Returns the table_id where a match or miss occurred.
+ *
+ * The return value will be zero unless there was a miss and
+ * O!-TC_TABLE_MISS_CONTINUE is in effect for the sequence of tables
+ * where misses occur. */
+uint8_t
+rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
+                 struct flow_wildcards *wc, struct rule_dpif **rule)
+{
+    /* Set metadata to the value of recirc_id to speed up internal
+     * rule lookup. */
+    flow->metadata = htonll(flow->recirc_id);
+    return rule_dpif_lookup__(ofproto, flow, wc, rule);
+}
+
 static struct rule_dpif *
 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
                           const struct flow *flow, struct flow_wildcards *wc)
@@ -4031,7 +4125,7 @@ exit:
  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
  * trace, otherwise the actions are determined by a flow table lookup. */
 static void
-ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
+ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
               const struct ofpbuf *packet,
               const struct ofpact ofpacts[], size_t ofpacts_len,
               struct ds *ds)
@@ -4383,7 +4477,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
     if (realdev_ofp_port && ofport->bundle) {
         /* vlandevs are enslaved to their realdevs, so they are not allowed to
          * themselves be part of a bundle. */
-        bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
+        bundle_set(ofport_->ofproto, ofport->bundle, NULL);
     }
 
     ofport->realdev_ofp_port = realdev_ofp_port;
@@ -4634,6 +4728,78 @@ ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
     recirc_id_free(backer->rid_pool, recirc_id);
 }
 
+int
+ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
+                               struct match *match, int priority,
+                               const struct ofpbuf *ofpacts,
+                               struct rule **rulep)
+{
+    struct ofputil_flow_mod fm;
+    struct rule_dpif *rule;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.command = OFPFC_ADD;
+    fm.idle_timeout = 0;
+    fm.hard_timeout = 0;
+    fm.buffer_id = 0;
+    fm.out_port = 0;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.ofpacts = ofpacts->data;
+    fm.ofpacts_len = ofpacts->size;
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
+                    ofperr_to_string(error));
+        *rulep = NULL;
+        return error;
+    }
+
+    rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &match->flow,
+                                     &match->wc);
+    if (rule) {
+        rule_dpif_unref(rule);
+        *rulep = &rule->up;
+    } else {
+        OVS_NOT_REACHED();
+    }
+    return 0;
+}
+
+int
+ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
+                               struct match *match, int priority)
+{
+    struct ofputil_flow_mod fm;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.command = OFPFC_DELETE_STRICT;
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
+                    ofperr_to_string(error));
+        return error;
+    }
+
+    return 0;
+}
+
 const struct ofproto_class ofproto_dpif_class = {
     init,
     enumerate_types,
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 088ff89..99b9da1 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -21,6 +21,7 @@
 #include "odp-util.h"
 #include "ofp-util.h"
 #include "ovs-thread.h"
+#include "ofproto-provider.h"
 #include "timer.h"
 #include "util.h"
 #include "ovs-thread.h"
@@ -76,9 +77,10 @@ extern struct ovs_rwlock xlate_rwlock;
  *   actions into datapath actions. */
 
 size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *);
+bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *);
 
-uint8_t rule_dpif_lookup(struct ofproto_dpif *, const struct flow *,
-                         struct flow_wildcards *, struct rule_dpif **rule);
+uint8_t rule_dpif_lookup(struct ofproto_dpif *, struct flow *,
+                      struct flow_wildcards *, struct rule_dpif **rule);
 
 enum rule_dpif_lookup_verdict rule_dpif_lookup_from_table(struct ofproto_dpif *,
                                                           const struct flow *,
@@ -96,6 +98,7 @@ void rule_dpif_credit_stats(struct rule_dpif *rule ,
 bool rule_dpif_is_fail_open(const struct rule_dpif *);
 bool rule_dpif_is_table_miss(const struct rule_dpif *);
 bool rule_dpif_is_internal(const struct rule_dpif *);
+uint8_t rule_dpif_get_table(const struct rule_dpif *);
 
 struct rule_actions *rule_dpif_get_actions(const struct rule_dpif *);
 
@@ -199,4 +202,10 @@ struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
 
 uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
 void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
+int ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
+                                   struct match *match, int proiroty,
+                                   const struct ofpbuf *ofpacts,
+                                   struct rule **rulep);
+int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, struct match *match,
+                      int proiroty);
 #endif /* ofproto-dpif.h */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index a7bf4df..e06faa2 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -205,7 +205,8 @@ void ofproto_port_set_state(struct ofport *, enum ofputil_port_state);
  */
 enum oftable_flags {
     OFTABLE_HIDDEN = 1 << 0,   /* Hide from most OpenFlow operations. */
-    OFTABLE_READONLY = 1 << 1  /* Don't allow OpenFlow to change this table. */
+    OFTABLE_READONLY = 1 << 1  /* Don't allow OpenFlow controller to change
+                                  this table. */
 };
 
 /* A flow table within a "struct ofproto".
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index b2d6526..19ef631 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -261,7 +261,8 @@ struct ofport_usage {
 /* rule. */
 static void ofproto_rule_destroy__(struct rule *);
 static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
-static bool rule_is_modifiable(const struct rule *);
+static bool rule_is_modifiable(const struct rule *rule,
+                               enum ofputil_flow_mod_flags flag);
 
 /* OpenFlow. */
 static enum ofperr add_flow(struct ofproto *, struct ofconn *,
@@ -1143,6 +1144,24 @@ ofproto_get_n_tables(const struct ofproto *ofproto)
     return ofproto->n_tables;
 }
 
+/* Returns the number of Controller visible OpenFlow tables
+ * in 'ofproto'. This number will exclude Hidden tables.
+ * This funtion's return value should be less or equal to that of
+ * ofproto_get_n_tables() . */
+uint8_t
+ofproto_get_n_visible_tables(const struct ofproto *ofproto)
+{
+    uint8_t n = ofproto->n_tables;
+
+    /* Count only non-hidden tables in the number of tables.  (Hidden tables,
+     * if present, are always at the end.) */
+    while(n && (ofproto->tables[n - 1].flags & OFTABLE_HIDDEN)) {
+        n--;
+    }
+
+    return n;
+}
+
 /* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the
  * settings from 's'.  'table_id' must be in the range 0 through the number of
  * OpenFlow tables in 'ofproto' minus 1, inclusive.
@@ -2741,19 +2760,27 @@ destroy_rule_executes(struct ofproto *ofproto)
 static bool
 ofproto_rule_is_hidden(const struct rule *rule)
 {
-    return rule->cr.priority > UINT16_MAX;
+    return (rule->cr.priority > UINT16_MAX);
 }
 
-static enum oftable_flags
-rule_get_flags(const struct rule *rule)
+static bool
+oftable_is_modifiable(const struct oftable *table,
+                      enum ofputil_flow_mod_flags flags)
 {
-    return rule->ofproto->tables[rule->table_id].flags;
+    if (flags & OFPUTIL_FF_NO_READONLY) {
+        return true;
+    }
+
+    return !(table->flags & OFTABLE_READONLY);
 }
 
 static bool
-rule_is_modifiable(const struct rule *rule)
+rule_is_modifiable(const struct rule *rule, enum ofputil_flow_mod_flags flags)
 {
-    return !(rule_get_flags(rule) & OFTABLE_READONLY);
+    const struct oftable *rule_table;
+
+    rule_table = &rule->ofproto->tables[rule->table_id];
+    return oftable_is_modifiable(rule_table, flags);
 }
 
 static enum ofperr
@@ -2771,26 +2798,14 @@ handle_features_request(struct ofconn *ofconn, const struct ofp_header *oh)
     struct ofport *port;
     bool arp_match_ip;
     struct ofpbuf *b;
-    int n_tables;
-    int i;
 
     ofproto->ofproto_class->get_features(ofproto, &arp_match_ip,
                                          &features.actions);
     ovs_assert(features.actions & OFPUTIL_A_OUTPUT); /* sanity check */
 
-    /* Count only non-hidden tables in the number of tables.  (Hidden tables,
-     * if present, are always at the end.) */
-    n_tables = ofproto->n_tables;
-    for (i = 0; i < ofproto->n_tables; i++) {
-        if (ofproto->tables[i].flags & OFTABLE_HIDDEN) {
-            n_tables = i;
-            break;
-        }
-    }
-
     features.datapath_id = ofproto->datapath_id;
     features.n_buffers = pktbuf_capacity();
-    features.n_tables = n_tables;
+    features.n_tables = ofproto_get_n_visible_tables(ofproto);
     features.capabilities = (OFPUTIL_C_FLOW_STATS | OFPUTIL_C_TABLE_STATS |
                              OFPUTIL_C_PORT_STATS | OFPUTIL_C_QUEUE_STATS);
     if (arp_match_ip) {
@@ -3968,10 +3983,18 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
 
     table = &ofproto->tables[table_id];
 
-    if (table->flags & OFTABLE_READONLY) {
+    if (!oftable_is_modifiable(table, fm->flags)) {
         return OFPERR_OFPBRC_EPERM;
     }
 
+    if (!(fm->flags & OFPUTIL_FF_HIDDEN_FIELDS)) {
+        if (!match_has_default_hidden_fields(&fm->match)) {
+            VLOG_WARN_RL(&rl, "%s: (add_flow) only internal flows can set "
+                         "non-default values to hidden fields", ofproto->name);
+            return OFPERR_OFPBRC_EPERM;
+        }
+    }
+
     cls_rule_init(&cr, &fm->match, fm->priority);
 
     /* Transform "add" into "modify" if there's an existing identical flow. */
@@ -3980,7 +4003,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     fat_rwlock_unlock(&table->cls.rwlock);
     if (rule) {
         cls_rule_destroy(&cr);
-        if (!rule_is_modifiable(rule)) {
+        if (!rule_is_modifiable(rule, fm->flags)) {
             return OFPERR_OFPBRC_EPERM;
         } else if (rule->pending) {
             return OFPROTO_POSTPONE;
@@ -4108,7 +4131,7 @@ modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
 
         /* FIXME: Implement OFPFUTIL_FF_RESET_COUNTS */
 
-        if (rule_is_modifiable(rule)) {
+        if (rule_is_modifiable(rule, fm->flags)) {
             /* At least one rule is modifiable, don't report EPERM error. */
             error = 0;
         } else {
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 309511c..0732e32 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -382,6 +382,7 @@ struct ofproto_table_settings {
 };
 
 int ofproto_get_n_tables(const struct ofproto *);
+uint8_t ofproto_get_n_visible_tables(const struct ofproto *);
 void ofproto_configure_table(struct ofproto *, int table_id,
                              const struct ofproto_table_settings *);
 
diff --git a/tests/classifier.at b/tests/classifier.at
index a46c526..eeb3f6d 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -40,22 +40,22 @@ table=0 in_port=3 priority=0,ip,action=drop
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
 Datapath actions: 2
 ])
 OVS_VSWITCHD_STOP
@@ -87,22 +87,22 @@ table=0 in_port=3 priority=0,ip,action=drop
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
+  [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
 Datapath actions: 3
 ])
 OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
diff --git a/tests/lacp.at b/tests/lacp.at
index d44bee0..0db2077 100644
--- a/tests/lacp.at
+++ b/tests/lacp.at
@@ -1,5 +1,10 @@
 AT_BANNER([lacp])
 
+# Strips out Reciulation ID information since it may change over time.
+m4_define([STRIP_RECIRC_ID], [[sed '
+    s/Recirc-ID.*$/<del>/
+' ]])
+
 AT_SETUP([lacp - config])
 OVS_VSWITCHD_START([\
         add-port br0 p1 --\
@@ -113,6 +118,7 @@ slave: p2: expired attached
 AT_CHECK([ovs-appctl bond/show], [0], [dnl
 ---- bond ----
 bond_mode: active-backup
+bond may use recirculation: no, Recirc-ID : -1
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -182,8 +188,8 @@ done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [stdout])
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [stdout])
 AT_CHECK([sed '/active slave/d' stdout], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
@@ -275,6 +281,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -288,6 +295,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -316,8 +324,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
 	sys_id: aa:55:aa:55:00:00
@@ -408,6 +416,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -422,6 +431,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -442,8 +452,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done
 AT_CHECK(
   [ovs-appctl lacp/show bond0
 ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0
-ovs-appctl bond/show bond1], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl
 ---- bond0 ----
 	status: active negotiated
 	sys_id: aa:55:aa:55:00:00
@@ -534,6 +544,7 @@ slave: p3: current attached
 	partner state: activity timeout aggregation synchronized collecting distributing
 ---- bond0 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
@@ -548,6 +559,7 @@ slave p1: enabled
 
 ---- bond1 ----
 bond_mode: balance-tcp
+bond may use recirculation: yes, <del>
 bond-hash-basis: 0
 updelay: 0 ms
 downdelay: 0 ms
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index af5fd8f..740970a 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -47,6 +47,148 @@ skb_priority(0),in_port(8),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif, active-backup bonding])
+# Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2
+#    and br1 with interfaces p3, p4 and p8.
+# toggle p1,p2 of bond0 up and down to test bonding in active-backup mode.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \
+   add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+ovs-appctl netdev-dummy/set-admin-state up
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 down
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 up
+ovs-appctl netdev-dummy/set-admin-state p1 down
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_XOUT], [0], [dnl
+skb_priority(0),in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(3),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3/0.0.0.0,dst=10.0.0.4/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: <del>
+skb_priority(0),in_port(4),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: <del>
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif, balance-slb bonding])
+# Create br0 with interfaces bond0(p1, p2, p3) and p7,
+#    and br1 with interfaces p4, p5, p6 and p8.
+#    p1 <-> p4, p2 <-> p5, p3 <-> p6
+# Send some traffic, make sure the traffic are spread based on source mac.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-slb --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \
+   add-port br1 p5 -- set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \
+   add-port br1 p6 -- set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
+])
+ovs-appctl netdev-dummy/set-admin-state up
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+(
+for i in `seq 0 100 |xargs printf '%02x\n'`;
+    do
+    pkt="in_port(7),eth(src=50:54:00:00:00:$i,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt])
+    done
+)
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br1 > br1_flows.txt])
+# Make sure there is resonable distribution to all three ports.
+# We don't want to make this check precise, in case hash function changes.
+AT_CHECK([test `egrep 'in_port\(4\)' br1_flows.txt |wc -l` -gt 3])
+AT_CHECK([test `egrep 'in_port\(5\)' br1_flows.txt |wc -l` -gt 3])
+AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif, balance-tcp bonding])
+# Create br0 with interfaces bond0(p1, p2, p3) and p7,
+#    and br1 with interfaces bond1(p4, p5, p6) and p8.
+#    bond0 <-> bond1
+# Send some traffic, make sure the traffic are spread based on L4 headers.
+OVS_VSWITCHD_START(
+  [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-tcp lacp=active \
+        other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\
+   set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+   set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+   set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \
+   add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-bond br1 bond1 p4 p5 p6 bond_mode=balance-tcp lacp=active \
+        other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\
+   set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \
+   set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \
+   set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \
+   add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled
+], [])
+sleep 1;
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+ovs-appctl lacp/show > lacp.txt
+ovs-appctl bond/show > bond.txt
+(
+for i in `seq 10 100` ;
+    do
+    pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(0x010)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt])
+    done
+)
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt])
+AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt])
+# Make sure there is resonable distribution to all three ports.
+# We don't want to make this check precise, in case hash function changes.
+AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 7])
+AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 7])
+AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 7])
+OVS_VSWITCHD_STOP()
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - resubmit])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [10], [11], [12], [13], [14], [15],
@@ -3826,7 +3968,7 @@ ovs-appctl time/stop
 ovs-appctl time/warp 5000
 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
-sleep 1
+
 AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_XOUT], [0], [dnl
 skb_priority(0),in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.255,dst=10.0.0.1/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del>
 skb_priority(0),in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del>
-- 
1.7.9.5




More information about the dev mailing list