[ovs-dev] [eviction 12/12] Add support for limiting the number of flows in an OpenFlow flow table.

Ben Pfaff blp at nicira.com
Sat Jan 14 00:43:53 UTC 2012


Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 NEWS                       |    4 +
 ofproto/ofproto-provider.h |   34 +++-
 ofproto/ofproto.c          |  512 ++++++++++++++++++++++++++++++++++++++++++--
 ofproto/ofproto.h          |   22 ++-
 tests/ofproto-macros.at    |   16 ++
 tests/ofproto.at           |  220 +++++++++++++++++++
 tests/ovs-vsctl.at         |    2 +
 utilities/ovs-vsctl.8.in   |    3 +
 utilities/ovs-vsctl.c      |    6 +-
 vswitchd/bridge.c          |   63 ++++++
 vswitchd/vswitch.gv        |    2 +
 vswitchd/vswitch.ovsschema |   26 ++-
 vswitchd/vswitch.pic       |   99 +++++----
 vswitchd/vswitch.xml       |   98 +++++++++
 14 files changed, 1037 insertions(+), 70 deletions(-)

diff --git a/NEWS b/NEWS
index 1fe83bb..189cc4b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,9 @@
 post-v1.5.0
 ------------------------
+    - New support for limiting the number of flows in an OpenFlow flow
+      table, with configurable policy for evicting flows upon
+      overflow.  See the Flow_Table table in ovs-vswitch.conf.db(5)
+      for more information.
     - ofproto-provider interface:
         - "struct rule" has a new member "used" that ofproto implementations
           should maintain by updating with ofproto_rule_update_used().
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 9fb5eff..5cb2b0f 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include "ofproto/ofproto.h"
 #include "cfm.h"
 #include "classifier.h"
+#include "heap.h"
 #include "list.h"
 #include "ofp-errors.h"
 #include "shash.h"
@@ -111,6 +112,33 @@ enum oftable_flags {
 struct oftable {
     enum oftable_flags flags;
     struct classifier cls;      /* Contains "struct rule"s. */
+    char *name;                 /* Table name exposed via OpenFlow, or NULL. */
+
+    /* Maximum number of flows or UINT_MAX if there is no limit besides any
+     * limit imposed by resource limitations. */
+    unsigned int max_flows;
+
+    /* These members determine the handling of an attempt to add a flow that
+     * would cause the table to have more than 'max_flows' flows.
+     *
+     * If 'eviction_fields' is NULL, overflows will be rejected with an error.
+     *
+     * If 'eviction_fields' is nonnull (regardless of whether n_eviction_fields
+     * is nonzero), an overflow will cause a flow to be removed.  The flow to
+     * be removed is chosen to give fairness among groups distinguished by
+     * different values for the subfields within 'groups'. */
+    struct mf_subfield *eviction_fields;
+    size_t n_eviction_fields;
+
+    /* Eviction groups.
+     *
+     * When a flow is added that would cause the table to have more than
+     * 'max_flows' flows, and 'eviction_fields' is nonnull, these groups are
+     * used to decide which rule to evict: the rule is chosen from the eviction
+     * group that contains the greatest number of rules.*/
+    uint32_t eviction_group_id_basis;
+    struct hmap eviction_groups_by_id;
+    struct heap eviction_groups_by_size;
 };
 
 /* Assigns TABLE to each oftable, in turn, in OFPROTO.
@@ -142,6 +170,10 @@ struct rule {
     uint8_t table_id;            /* Index in ofproto's 'tables' array. */
     bool send_flow_removed;      /* Send a flow removed message? */
 
+    /* Eviction groups. */
+    struct heap_node evg_node;   /* In eviction_group's "rules" heap. */
+    struct eviction_group *eviction_group; /* NULL if not in any group. */
+
     union ofp_action *actions;   /* OpenFlow actions. */
     int n_actions;               /* Number of elements in actions[]. */
 };
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 357c488..ccfc181 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -29,6 +29,7 @@
 #include "dynamic-string.h"
 #include "hash.h"
 #include "hmap.h"
+#include "meta-flow.h"
 #include "netdev.h"
 #include "nx-match.h"
 #include "ofp-errors.h"
@@ -42,6 +43,7 @@
 #include "pinsched.h"
 #include "pktbuf.h"
 #include "poll-loop.h"
+#include "random.h"
 #include "shash.h"
 #include "sset.h"
 #include "timeval.h"
@@ -63,6 +65,7 @@ COVERAGE_DEFINE(ofproto_update_port);
 
 enum ofproto_state {
     S_OPENFLOW,                 /* Processing OpenFlow commands. */
+    S_EVICT,                    /* Evicting flows from over-limit tables. */
     S_FLUSH,                    /* Deleting all flow table rules. */
 };
 
@@ -128,15 +131,42 @@ static void ofoperation_destroy(struct ofoperation *);
 static void oftable_init(struct oftable *);
 static void oftable_destroy(struct oftable *);
 
+static void oftable_set_name(struct oftable *, const char *name);
+
+static void oftable_disable_eviction(struct oftable *);
+static void oftable_enable_eviction(struct oftable *,
+                                    const struct mf_subfield *fields,
+                                    size_t n_fields);
+
 static void oftable_remove_rule(struct rule *);
 static struct rule *oftable_replace_rule(struct rule *);
 static void oftable_substitute_rule(struct rule *old, struct rule *new);
 
-/* rule. */
-static void ofproto_rule_destroy__(struct rule *);
-static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
-static bool rule_is_modifiable(const struct rule *);
-static bool rule_is_hidden(const struct rule *);
+/* A set of rules within a single OpenFlow table (oftable) that have the same
+ * values for the oftable's eviction_fields.  A rule to be evicted, when one is
+ * needed, is taken from the eviction group that contains the greatest number
+ * of rules.
+ *
+ * An oftable owns any number of eviction groups, each of which contains any
+ * number of rules.
+ *
+ * Membership in an eviction group is imprecise, based on the hash of the
+ * oftable's eviction_fields (in the eviction_group's id_node.hash member).
+ * That is, if two rules have different eviction_fields, but those
+ * eviction_fields hash to the same value, then they will belong to the same
+ * eviction_group anyway.
+ *
+ * (When eviction is not enabled on an oftable, we don't track any eviction
+ * groups, to save time and space.) */
+struct eviction_group {
+    struct hmap_node id_node;   /* In oftable's "eviction_groups_by_id". */
+    struct heap_node size_node; /* In oftable's "eviction_groups_by_size". */
+    struct heap rules;          /* Contains "struct rule"s. */
+};
+
+static struct rule *choose_rule_to_evict(struct oftable *, struct rule *avoid);
+static void ofproto_evict(struct ofproto *);
+static uint32_t rule_eviction_priority(struct rule *);
 
 /* ofport. */
 static void ofport_destroy__(struct ofport *);
@@ -146,11 +176,17 @@ static void update_port(struct ofproto *, const char *devname);
 static int init_ports(struct ofproto *);
 static void reinit_ports(struct ofproto *);
 
+/* rule. */
+static void ofproto_rule_destroy__(struct rule *);
+static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
+static bool rule_is_modifiable(const struct rule *);
+static bool rule_is_hidden(const struct rule *);
+
 /* OpenFlow. */
 static enum ofperr add_flow(struct ofproto *, struct ofconn *,
                             const struct ofputil_flow_mod *,
                             const struct ofp_header *);
-
+static void delete_flow__(struct rule *, struct ofopgroup *);
 static bool handle_openflow(struct ofconn *, struct ofpbuf *);
 static enum ofperr handle_flow_mod__(struct ofproto *, struct ofconn *,
                                      const struct ofputil_flow_mod *,
@@ -804,6 +840,57 @@ ofproto_is_mirror_output_bundle(const struct ofproto *ofproto, void *aux)
             : false);
 }
 
+/* Configuration of OpenFlow tables. */
+
+/* Returns the number of OpenFlow tables in 'ofproto'. */
+int
+ofproto_get_n_tables(const struct ofproto *ofproto)
+{
+    return ofproto->n_tables;
+}
+
+/* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the
+ * settings from 's'.  'table_id' must be in the range 0 through the number of
+ * OpenFlow tables in 'ofproto' minus 1, inclusive.
+ *
+ * For read-only tables, only the name may be configured. */
+void
+ofproto_configure_table(struct ofproto *ofproto, int table_id,
+                        const struct ofproto_table_settings *s)
+{
+    struct oftable *table = &ofproto->tables[table_id];
+    assert(table_id >= 0 && table_id < ofproto->n_tables);
+
+    oftable_set_name(table, s->name);
+
+    if (table->flags & OFTABLE_READONLY) {
+        return;
+    }
+
+    if (s->groups) {
+        oftable_enable_eviction(table, s->groups, s->n_groups);
+    } else {
+        oftable_disable_eviction(table);
+    }
+
+    table->max_flows = s->max_flows;
+    if (classifier_count(&table->cls) > table->max_flows
+        && table->eviction_fields) {
+        /* 'table' contains more flows than allowed.  We might not be able to
+         * evict them right away because of the asynchronous nature of flow
+         * table changes.  Schedule eviction for later. */
+        switch (ofproto->state) {
+        case S_OPENFLOW:
+            ofproto->state = S_EVICT;
+            break;
+        case S_EVICT:
+        case S_FLUSH:
+            /* We're already deleting flows, nothing more to do. */
+            break;
+        }
+    }
+}
+
 bool
 ofproto_has_snoops(const struct ofproto *ofproto)
 {
@@ -950,12 +1037,19 @@ ofproto_run(struct ofproto *p)
         }
     }
 
-
     switch (p->state) {
     case S_OPENFLOW:
         connmgr_run(p->connmgr, handle_openflow);
         break;
 
+    case S_EVICT:
+        connmgr_run(p->connmgr, NULL);
+        ofproto_evict(p);
+        if (list_is_empty(&p->pending) && hmap_is_empty(&p->deletions)) {
+            p->state = S_OPENFLOW;
+        }
+        break;
+
     case S_FLUSH:
         connmgr_run(p->connmgr, NULL);
         ofproto_flush__(p);
@@ -1012,6 +1106,7 @@ ofproto_wait(struct ofproto *p)
         connmgr_wait(p->connmgr, true);
         break;
 
+    case S_EVICT:
     case S_FLUSH:
         connmgr_wait(p->connmgr, false);
         if (list_is_empty(&p->pending) && hmap_is_empty(&p->deletions)) {
@@ -1248,7 +1343,7 @@ ofproto_delete_flow(struct ofproto *ofproto, const struct cls_rule *target)
         struct ofopgroup *group = ofopgroup_create_unattached(ofproto);
         ofoperation_create(group, rule, OFOPERATION_DELETE);
         oftable_remove_rule(rule);
-        rule->ofproto->ofproto_class->rule_destruct(rule);
+        ofproto->ofproto_class->rule_destruct(rule);
         ofopgroup_submit(group);
         return true;
     }
@@ -1991,6 +2086,18 @@ handle_table_stats_request(struct ofconn *ofconn,
 
     p->ofproto_class->get_tables(p, ots);
 
+    for (i = 0; i < p->n_tables; i++) {
+        const struct oftable *table = &p->tables[i];
+
+        if (table->name) {
+            ovs_strzcpy(ots[i].name, table->name, sizeof ots[i].name);
+        }
+
+        if (table->max_flows < ntohl(ots[i].max_entries)) {
+            ots[i].max_entries = htonl(table->max_flows);
+        }
+    }
+
     ofconn_send_reply(ofconn, msg);
     return 0;
 }
@@ -2580,6 +2687,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     rule->send_flow_removed = (fm->flags & OFPFF_SEND_FLOW_REM) != 0;
     rule->actions = ofputil_actions_clone(fm->actions, fm->n_actions);
     rule->n_actions = fm->n_actions;
+    rule->eviction_group = NULL;
 
     /* Insert new rule. */
     victim = oftable_replace_rule(rule);
@@ -2588,6 +2696,21 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     } else if (victim && victim->pending) {
         error = OFPROTO_POSTPONE;
     } else {
+        struct rule *evict;
+
+        if (classifier_count(&table->cls) > table->max_flows) {
+            evict = choose_rule_to_evict(table, rule);
+            if (!evict) {
+                error = OFPERR_OFPFMFC_ALL_TABLES_FULL;
+                goto exit;
+            } else if (evict->pending) {
+                error = OFPROTO_POSTPONE;
+                goto exit;
+            }
+        } else {
+            evict = NULL;
+        }
+
         group = ofopgroup_create(ofproto, ofconn, request, fm->buffer_id);
         ofoperation_create(group, rule, OFOPERATION_ADD);
         rule->pending->victim = victim;
@@ -2595,10 +2718,13 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
         error = ofproto->ofproto_class->rule_construct(rule);
         if (error) {
             ofoperation_destroy(rule->pending);
+        } else if (evict) {
+            delete_flow__(evict, group);
         }
         ofopgroup_submit(group);
     }
 
+exit:
     /* Back out if an error occurred. */
     if (error) {
         oftable_substitute_rule(rule, victim);
@@ -2642,7 +2768,7 @@ modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
             rule->pending->n_actions = rule->n_actions;
             rule->actions = ofputil_actions_clone(fm->actions, fm->n_actions);
             rule->n_actions = fm->n_actions;
-            rule->ofproto->ofproto_class->rule_modify_actions(rule);
+            ofproto->ofproto_class->rule_modify_actions(rule);
         } else {
             rule->modified = time_msec();
         }
@@ -2699,6 +2825,18 @@ modify_flow_strict(struct ofproto *ofproto, struct ofconn *ofconn,
 
 /* OFPFC_DELETE implementation. */
 
+static void
+delete_flow__(struct rule *rule, struct ofopgroup *group)
+{
+    struct ofproto *ofproto = rule->ofproto;
+
+    ofproto_rule_send_removed(rule, OFPRR_DELETE);
+
+    ofoperation_create(group, rule, OFOPERATION_DELETE);
+    oftable_remove_rule(rule);
+    ofproto->ofproto_class->rule_destruct(rule);
+}
+
 /* Deletes the rules listed in 'rules'.
  *
  * Returns 0 on success, otherwise an OpenFlow error code. */
@@ -2711,11 +2849,7 @@ delete_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
 
     group = ofopgroup_create(ofproto, ofconn, request, UINT32_MAX);
     LIST_FOR_EACH_SAFE (rule, next, ofproto_node, rules) {
-        ofproto_rule_send_removed(rule, OFPRR_DELETE);
-
-        ofoperation_create(group, rule, OFOPERATION_DELETE);
-        oftable_remove_rule(rule);
-        rule->ofproto->ofproto_class->rule_destruct(rule);
+        delete_flow__(rule, group);
     }
     ofopgroup_submit(group);
 
@@ -2782,7 +2916,13 @@ void
 ofproto_rule_update_used(struct rule *rule, long long int used)
 {
     if (used > rule->used) {
+        struct eviction_group *evg = rule->eviction_group;
+
         rule->used = used;
+        if (evg) {
+            heap_change(&evg->rules, &rule->evg_node,
+                        rule_eviction_priority(rule));
+        }
     }
 }
 
@@ -2805,7 +2945,7 @@ ofproto_rule_expire(struct rule *rule, uint8_t reason)
     group = ofopgroup_create_unattached(ofproto);
     ofoperation_create(group, rule, OFOPERATION_DELETE);
     oftable_remove_rule(rule);
-    rule->ofproto->ofproto_class->rule_destruct(rule);
+    ofproto->ofproto_class->rule_destruct(rule);
     ofopgroup_submit(group);
 }
 
@@ -3365,6 +3505,255 @@ pick_fallback_dpid(void)
     return eth_addr_to_uint64(ea);
 }
 
+/* Table overflow policy. */
+
+/* Chooses and returns a rule to evict from 'table'.  If 'avoid' is nonnull,
+ * then it will not be chosen for eviction.  Returns NULL if the table is not
+ * configured to evict rules or if 'avoid' (if nonnull) is the only rule within
+ * its table that is a candidate for eviction. */
+static struct rule *
+choose_rule_to_evict(struct oftable *table, struct rule *avoid)
+{
+    struct eviction_group *evg;
+
+    if (!table->eviction_fields) {
+        return NULL;
+    }
+
+    /* In the common case, the outer and inner loops here will each be entered
+     * exactly once:
+     *
+     *   - The inner loop normally "return"s in its first iteration.  If the
+     *     eviction group has more than one rule, then it always returns,
+     *     either in the first or second iteration.
+     *
+     *   - The outer loop only iterates a second time if the first eviction
+     *     group visited has only a single rule (which is unusual since the
+     *     first group is also the largest group).  It never iterates more
+     *     than twice.
+     *
+     *   - The outer loop can exit only if table's 'max_flows' is all filled up
+     *     by permanent rules and 'avoid'. */
+    HEAP_FOR_EACH (evg, size_node, &table->eviction_groups_by_size) {
+        struct rule *rule;
+
+        HEAP_FOR_EACH (rule, evg_node, &evg->rules) {
+            if (rule != avoid) {
+                return rule;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/* Searches 'ofproto' for tables that have more flows than their configured
+ * maximum and that have flow eviction enabled, and evicts as many flows as
+ * necessary and currently feasible from them.
+ *
+ * This triggers only when an OpenFlow table has N flows in it and then the
+ * client configures a maximum number of flows less than N. */
+static void
+ofproto_evict(struct ofproto *ofproto)
+{
+    struct ofopgroup *group;
+    struct oftable *table;
+
+    group = ofopgroup_create_unattached(ofproto);
+    OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+        while (classifier_count(&table->cls) > table->max_flows
+               && table->eviction_fields) {
+            struct rule *rule;
+
+            rule = choose_rule_to_evict(table, NULL);
+            if (!rule || rule->pending) {
+                break;
+            }
+
+            ofoperation_create(group, rule, OFOPERATION_DELETE);
+            oftable_remove_rule(rule);
+            ofproto->ofproto_class->rule_destruct(rule);
+        }
+    }
+    ofopgroup_submit(group);
+}
+
+/* Eviction groups. */
+
+/* Returns the priority to use for an eviction_group that contains 'n_rules'
+ * rules.  The priority contains low-order random bits to ensure that eviction
+ * groups with the same number of rules are prioritized randomly. */
+static uint32_t
+eviction_group_priority(size_t n_rules)
+{
+    uint16_t size = MIN(UINT16_MAX, n_rules);
+    return (size << 16) | random_uint16();
+}
+
+/* Updates 'evg', an eviction_group within 'table', following a change that
+ * adds or removes rules in 'evg'. */
+static void
+eviction_group_resized(struct oftable *table, struct eviction_group *evg)
+{
+    heap_change(&table->eviction_groups_by_size, &evg->size_node,
+                eviction_group_priority(heap_count(&evg->rules)));
+}
+
+/* Destroys 'evg', an eviction_group within 'table':
+ *
+ *   - Removes all the rules, if any, from 'evg'.  (It doesn't destroy the
+ *     rules themselves, just removes them from the eviction group.)
+ *
+ *   - Removes 'evg' from 'table'.
+ *
+ *   - Frees 'evg'. */
+static void
+eviction_group_destroy(struct oftable *table, struct eviction_group *evg)
+{
+    while (!heap_is_empty(&evg->rules)) {
+        struct rule *rule;
+
+        rule = CONTAINER_OF(heap_pop(&evg->rules), struct rule, evg_node);
+        rule->eviction_group = NULL;
+    }
+    hmap_remove(&table->eviction_groups_by_id, &evg->id_node);
+    heap_remove(&table->eviction_groups_by_size, &evg->size_node);
+    heap_destroy(&evg->rules);
+    free(evg);
+}
+
+/* Removes 'rule' from its eviction group, if any. */
+static void
+eviction_group_remove_rule(struct rule *rule)
+{
+    if (rule->eviction_group) {
+        struct oftable *table = &rule->ofproto->tables[rule->table_id];
+        struct eviction_group *evg = rule->eviction_group;
+
+        rule->eviction_group = NULL;
+        heap_remove(&evg->rules, &rule->evg_node);
+        if (heap_is_empty(&evg->rules)) {
+            eviction_group_destroy(table, evg);
+        } else {
+            eviction_group_resized(table, evg);
+        }
+    }
+}
+
+/* Hashes the 'rule''s values for the eviction_fields of 'rule''s table, and
+ * returns the hash value. */
+static uint32_t
+eviction_group_hash_rule(struct rule *rule)
+{
+    struct oftable *table = &rule->ofproto->tables[rule->table_id];
+    const struct mf_subfield *sf;
+    uint32_t hash;
+
+    hash = table->eviction_group_id_basis;
+    for (sf = table->eviction_fields;
+         sf < &table->eviction_fields[table->n_eviction_fields];
+         sf++)
+    {
+        if (mf_are_prereqs_ok(sf->field, &rule->cr.flow)) {
+            union mf_value value;
+
+            mf_get_value(sf->field, &rule->cr.flow, &value);
+            if (sf->ofs) {
+                bitwise_zero(&value, sf->field->n_bytes, 0, sf->ofs);
+            }
+            if (sf->ofs + sf->n_bits < sf->field->n_bytes * 8) {
+                unsigned int start = sf->ofs + sf->n_bits;
+                bitwise_zero(&value, sf->field->n_bytes, start,
+                             sf->field->n_bytes * 8 - start);
+            }
+            hash = hash_bytes(&value, sf->field->n_bytes, hash);
+        } else {
+            hash = hash_int(hash, 0);
+        }
+    }
+
+    return hash;
+}
+
+/* Returns an eviction group within 'table' with the given 'id', creating one
+ * if necessary. */
+static struct eviction_group *
+eviction_group_find(struct oftable *table, uint32_t id)
+{
+    struct eviction_group *evg;
+
+    HMAP_FOR_EACH_WITH_HASH (evg, id_node, id, &table->eviction_groups_by_id) {
+        return evg;
+    }
+
+    evg = xmalloc(sizeof *evg);
+    hmap_insert(&table->eviction_groups_by_id, &evg->id_node, id);
+    heap_insert(&table->eviction_groups_by_size, &evg->size_node,
+                eviction_group_priority(0));
+    heap_init(&evg->rules);
+
+    return evg;
+}
+
+/* Returns an eviction priority for 'rule'.  The return value should be
+ * interpreted so that higher priorities make a rule more attractive candidates
+ * for eviction. */
+static uint32_t
+rule_eviction_priority(struct rule *rule)
+{
+    long long int hard_expiration;
+    long long int idle_expiration;
+    long long int expiration;
+    uint32_t expiration_offset;
+
+    /* Calculate time of expiration. */
+    hard_expiration = (rule->hard_timeout
+                       ? rule->modified + rule->hard_timeout * 1000
+                       : LLONG_MAX);
+    idle_expiration = (rule->idle_timeout
+                       ? rule->used + rule->idle_timeout * 1000
+                       : LLONG_MAX);
+    expiration = MIN(hard_expiration, idle_expiration);
+    if (expiration == LLONG_MAX) {
+        return 0;
+    }
+
+    /* Calculate the time of expiration as a number of (approximate) seconds
+     * after program startup.
+     *
+     * This should work OK for program runs that last UINT32_MAX seconds or
+     * less.  Therefore, please restart OVS at least once every 136 years. */
+    expiration_offset = (expiration >> 10) - (time_boot_msec() >> 10);
+
+    /* Invert the expiration offset because we're using a max-heap. */
+    return UINT32_MAX - expiration_offset;
+}
+
+/* Adds 'rule' to an appropriate eviction group for its oftable's
+ * configuration.  Does nothing if 'rule''s oftable doesn't have eviction
+ * enabled, or if 'rule' is a permanent rule (one that will never expire on its
+ * own).
+ *
+ * The caller must ensure that 'rule' is not already in an eviction group. */
+static void
+eviction_group_add_rule(struct rule *rule)
+{
+    struct ofproto *ofproto = rule->ofproto;
+    struct oftable *table = &ofproto->tables[rule->table_id];
+
+    if (table->eviction_fields
+        && (rule->hard_timeout || rule->idle_timeout)) {
+        struct eviction_group *evg;
+
+        evg = eviction_group_find(table, eviction_group_hash_rule(rule));
+
+        rule->eviction_group = evg;
+        heap_insert(&evg->rules, &rule->evg_node,
+                    rule_eviction_priority(rule));
+        eviction_group_resized(table, evg);
+    }
+}
+
 /* oftables. */
 
 /* Initializes 'table'. */
@@ -3375,14 +3764,96 @@ oftable_init(struct oftable *table)
     classifier_init(&table->cls);
 }
 
-/* Destroys 'table'.
+/* Destroys 'table', including its classifier and eviction groups.
  *
  * The caller is responsible for freeing 'table' itself. */
 static void
 oftable_destroy(struct oftable *table)
 {
     assert(classifier_is_empty(&table->cls));
+    oftable_disable_eviction(table);
     classifier_destroy(&table->cls);
+    free(table->name);
+}
+
+/* Changes the name of 'table' to 'name'.  If 'name' is NULL or the empty
+ * string, then 'table' will use its default name.
+ *
+ * This only affects the name exposed for a table exposed through the OpenFlow
+ * OFPST_TABLE (as printed by "ovs-ofctl dump-tables"). */
+static void
+oftable_set_name(struct oftable *table, const char *name)
+{
+    if (name && name[0]) {
+        int len = strnlen(name, OFP_MAX_TABLE_NAME_LEN);
+        if (!table->name || strncmp(name, table->name, len)) {
+            free(table->name);
+            table->name = xmemdup0(name, len);
+        }
+    } else {
+        free(table->name);
+        table->name = NULL;
+    }
+}
+
+/* oftables support a choice of two policies when adding a rule would cause the
+ * number of flows in the table to exceed the configured maximum number: either
+ * they can refuse to add the new flow or they can evict some existing flow.
+ * This function configures the former policy on 'table'. */
+static void
+oftable_disable_eviction(struct oftable *table)
+{
+    if (table->eviction_fields) {
+        struct eviction_group *evg, *next;
+
+        HMAP_FOR_EACH_SAFE (evg, next, id_node,
+                            &table->eviction_groups_by_id) {
+            eviction_group_destroy(table, evg);
+        }
+        hmap_destroy(&table->eviction_groups_by_id);
+        heap_destroy(&table->eviction_groups_by_size);
+
+        free(table->eviction_fields);
+        table->eviction_fields = NULL;
+        table->n_eviction_fields = 0;
+    }
+}
+
+/* oftables support a choice of two policies when adding a rule would cause the
+ * number of flows in the table to exceed the configured maximum number: either
+ * they can refuse to add the new flow or they can evict some existing flow.
+ * This function configures the latter policy on 'table', with fairness based
+ * on the values of the 'n_fields' fields specified in 'fields'.  (Specifying
+ * 'n_fields' as 0 disables fairness.) */
+static void
+oftable_enable_eviction(struct oftable *table,
+                        const struct mf_subfield *fields, size_t n_fields)
+{
+    struct cls_cursor cursor;
+    struct rule *rule;
+
+    if (table->eviction_fields
+        && n_fields == table->n_eviction_fields
+        && (!n_fields
+            || !memcmp(fields, table->eviction_fields,
+                       n_fields * sizeof *fields))) {
+        /* No change. */
+        return;
+    }
+
+    oftable_disable_eviction(table);
+
+    table->n_eviction_fields = n_fields;
+    table->eviction_fields = xmemdup(fields, n_fields * sizeof *fields);
+
+    table->eviction_group_id_basis = random_uint32();
+    hmap_init(&table->eviction_groups_by_id);
+    heap_init(&table->eviction_groups_by_size);
+
+    cls_cursor_init(&cursor, &table->cls, NULL);
+    CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
+        eviction_group_add_rule(rule);
+    }
 }
 
 /* Removes 'rule' from the oftable that contains it. */
@@ -3393,6 +3864,7 @@ oftable_remove_rule(struct rule *rule)
     struct oftable *table = &ofproto->tables[rule->table_id];
 
     classifier_remove(&table->cls, &rule->cr);
+    eviction_group_remove_rule(rule);
 }
 
 /* Inserts 'rule' into its oftable.  Removes any existing rule from 'rule''s
@@ -3403,8 +3875,14 @@ oftable_replace_rule(struct rule *rule)
 {
     struct ofproto *ofproto = rule->ofproto;
     struct oftable *table = &ofproto->tables[rule->table_id];
+    struct rule *victim;
 
-    return rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
+    victim = rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
+    if (victim) {
+        eviction_group_remove_rule(victim);
+    }
+    eviction_group_add_rule(rule);
+    return victim;
 }
 
 /* Removes 'old' from its oftable then, if 'new' is nonnull, inserts 'new'. */
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 2d47878..dd42ecf 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -217,7 +217,6 @@ int ofproto_set_stp(struct ofproto *, const struct ofproto_stp_settings *);
 int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *);
 
 /* Configuration of ports. */
-
 void ofproto_port_unregister(struct ofproto *, uint16_t ofp_port);
 
 void ofproto_port_clear_cfm(struct ofproto *, uint16_t ofp_port);
@@ -314,6 +313,27 @@ int ofproto_mirror_get_stats(struct ofproto *, void *aux,
 int ofproto_set_flood_vlans(struct ofproto *, unsigned long *flood_vlans);
 bool ofproto_is_mirror_output_bundle(const struct ofproto *, void *aux);
 
+/* Configuration of OpenFlow tables. */
+struct ofproto_table_settings {
+    char *name;                 /* Name exported via OpenFlow or NULL. */
+    unsigned int max_flows;     /* Maximum number of flows or UINT_MAX. */
+
+    /* These members determine the handling of an attempt to add a flow that
+     * would cause the table to have more than 'max_flows' flows.
+     *
+     * If 'groups' is NULL, overflows will be rejected with an error.
+     *
+     * If 'groups' is nonnull, an overflow will cause a flow to be removed.
+     * The flow to be removed is chosen to give fairness among groups
+     * distinguished by different values for the subfields within 'groups'. */
+    struct mf_subfield *groups;
+    size_t n_groups;
+};
+
+int ofproto_get_n_tables(const struct ofproto *);
+void ofproto_configure_table(struct ofproto *, int table_id,
+                             const struct ofproto_table_settings *);
+
 /* Configuration querying. */
 bool ofproto_has_snoops(const struct ofproto *);
 void ofproto_get_snoops(const struct ofproto *, struct sset *);
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index 13586c3..3656ecf 100644
--- a/tests/ofproto-macros.at
+++ b/tests/ofproto-macros.at
@@ -1,3 +1,19 @@
+m4_divert_push([PREPARE_TESTS])
+[
+# Strips out uninteresting parts of ovs-ofctl output, as well as parts
+# that vary from one run to another.
+ofctl_strip () {
+    sed '
+s/ (xid=0x[0-9a-fA-F]*)//
+s/ duration=[0-9.]*s,//
+s/ cookie=0x0,//
+s/ table=0,//
+s/ n_packets=0,//
+s/ n_bytes=0,//
+'
+}]
+m4_divert_pop([PREPARE_TESTS])
+
 m4_define([STRIP_XIDS], [[sed 's/ (xid=0x[0-9a-fA-F]*)//']])
 m4_define([STRIP_DURATION], [[sed 's/\bduration=[0-9.]*s/duration=?s/']])
 m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m'])
diff --git a/tests/ofproto.at b/tests/ofproto.at
index b54d1dd..7c34e14 100644
--- a/tests/ofproto.at
+++ b/tests/ofproto.at
@@ -201,3 +201,223 @@ NXST_FLOW reply:
 ])
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([ofproto - flow table configuration])
+OVS_VSWITCHD_START
+# Check the default configuration.
+(echo "OFPST_TABLE reply (xid=0x1): 255 tables
+  0: classifier: wild=0x3fffff, max=1000000, active=0
+               lookup=0, matched=0"
+ x=1
+ while test $x -lt 255; do
+   printf "  %d: %-8s: wild=0x3fffff, max=1000000, active=0
+               lookup=0, matched=0
+" $x table$x
+   x=`expr $x + 1`
+ done) > expout
+AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout])
+# Change the configuration.
+AT_CHECK(
+  [ovs-vsctl \
+     -- --id=@t0 create Flow_Table name=main \
+     -- --id=@t1 create Flow_Table flow-limit=1024 \
+     -- set bridge br0 'flow_tables={1=@t1,0=@t0}' \
+   | perl $srcdir/uuidfilt.pl],
+  [0], [<0>
+<1>
+])
+# Check that the configuration was updated.
+mv expout orig-expout
+(echo "OFPST_TABLE reply (xid=0x1): 255 tables
+  0: main    : wild=0x3fffff, max=1000000, active=0
+               lookup=0, matched=0
+  1: table1  : wild=0x3fffff, max=  1024, active=0
+               lookup=0, matched=0"
+ tail -n +6 orig-expout) > expout
+AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto - hard limits on flow table size])
+OVS_VSWITCHD_START
+# Configure a maximum of 4 flows.
+AT_CHECK(
+  [ovs-vsctl \
+     -- --id=@t0 create Flow_Table flow-limit=4 \
+     -- set bridge br0 flow_tables:0=@t0 \
+   | perl $srcdir/uuidfilt.pl],
+  [0], [<0>
+])
+# Add 4 flows.
+for in_port in 1 2 3 4; do
+    ovs-ofctl add-flow br0 in_port=$in_port,actions=drop
+done
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ in_port=1 actions=drop
+ in_port=2 actions=drop
+ in_port=3 actions=drop
+ in_port=4 actions=drop
+NXST_FLOW reply:
+])
+# Adding another flow will be refused.
+AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop], [1], [], [stderr])
+AT_CHECK([head -n 1 stderr], [0],
+  [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
+])
+# Also a mod-flow that would add a flow will be refused.
+AT_CHECK([ovs-ofctl mod-flows br0 in_port=5,actions=drop], [1], [], [stderr])
+AT_CHECK([head -n 1 stderr], [0],
+  [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
+])
+# Replacing or modifying an existing flow is allowed.
+AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal])
+AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ in_port=1 actions=drop
+ in_port=2 actions=drop
+ in_port=3 actions=output:1
+ in_port=4 actions=NORMAL
+NXST_FLOW reply:
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto - eviction upon table overflow])
+OVS_VSWITCHD_START
+# Configure a maximum of 4 flows.
+AT_CHECK(
+  [ovs-vsctl \
+     -- --id=@t0 create Flow_Table flow-limit=4 overflow-policy=evict \
+     -- set bridge br0 flow_tables:0=@t0 \
+   | perl $srcdir/uuidfilt.pl],
+  [0], [<0>
+])
+# Add 4 flows.
+for in_port in 4 3 2 1; do
+    ovs-ofctl add-flow br0 idle_timeout=${in_port}0,in_port=$in_port,actions=drop
+done
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=10,in_port=1 actions=drop
+ idle_timeout=20,in_port=2 actions=drop
+ idle_timeout=30,in_port=3 actions=drop
+ idle_timeout=40,in_port=4 actions=drop
+NXST_FLOW reply:
+])
+# Adding another flow will cause the one that expires soonest to be evicted.
+AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=20,in_port=2 actions=drop
+ idle_timeout=30,in_port=3 actions=drop
+ idle_timeout=40,in_port=4 actions=drop
+ in_port=5 actions=drop
+NXST_FLOW reply:
+])
+# A mod-flow that adds a flow also causes eviction, but replacing or
+# modifying an existing flow doesn't.
+AT_CHECK([ovs-ofctl mod-flows br0 in_port=6,actions=drop])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal])
+AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=30,in_port=3 actions=output:1
+ in_port=4 actions=NORMAL
+ in_port=5 actions=drop
+ in_port=6 actions=drop
+NXST_FLOW reply:
+])
+# Flows with no timeouts at all cannot be evicted.
+AT_CHECK([ovs-ofctl add-flow br0 in_port=7,actions=normal])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=8,actions=drop], [1], [], [stderr])
+AT_CHECK([head -n 1 stderr], [0],
+  [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
+])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ in_port=4 actions=NORMAL
+ in_port=5 actions=drop
+ in_port=6 actions=drop
+ in_port=7 actions=NORMAL
+NXST_FLOW reply:
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto - eviction upon table overflow, with fairness])
+OVS_VSWITCHD_START
+# Configure a maximum of 4 flows.
+AT_CHECK(
+  [ovs-vsctl \
+     -- --id=@t0 create Flow_Table name=evict flow-limit=4 \
+                                   overflow-policy=evict \
+                                   groups='"NXM_OF_IN_PORT[[]]"' \
+     -- set bridge br0 flow_tables:0=@t0 \
+   | perl $srcdir/uuidfilt.pl],
+  [0], [<0>
+])
+# Add 4 flows.
+ovs-ofctl add-flows br0 - <<EOF
+idle_timeout=10 in_port=2 dl_src=00:44:55:66:77:88 actions=drop
+idle_timeout=20 in_port=1 dl_src=00:11:22:33:44:55 actions=drop
+idle_timeout=30 in_port=1 dl_src=00:22:33:44:55:66 actions=drop
+idle_timeout=40 in_port=1 dl_src=00:33:44:55:66:77 actions=drop
+EOF
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
+ idle_timeout=20,in_port=1,dl_src=00:11:22:33:44:55 actions=drop
+ idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
+ idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
+NXST_FLOW reply:
+])
+# Adding another flow will cause the one that expires soonest within
+# the largest group (those with in_port=1) to be evicted.  In this
+# case this is not the same as the one that expires soonest overall
+# (which is what makes the test interesting):
+AT_CHECK([ovs-ofctl add-flow br0 in_port=2,dl_src=00:55:66:77:88:99,actions=drop])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
+ idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
+ idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
+ in_port=2,dl_src=00:55:66:77:88:99 actions=drop
+NXST_FLOW reply:
+])
+# Enlarge the flow limit, change the eviction policy back to strictly
+# based on expiration, and and add some flows.
+AT_CHECK([ovs-vsctl set Flow_Table evict groups='[[]]' flow-limit=7])
+ovs-ofctl add-flows br0 - <<EOF
+idle_timeout=50 in_port=2 dl_src=00:66:77:88:99:aa actions=drop
+idle_timeout=60 in_port=2 dl_src=00:77:88:99:aa:bb actions=drop
+idle_timeout=70 in_port=2 dl_src=00:88:99:aa:bb:cc actions=drop
+EOF
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
+ idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
+ idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
+ idle_timeout=50,in_port=2,dl_src=00:66:77:88:99:aa actions=drop
+ idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
+ idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
+ in_port=2,dl_src=00:55:66:77:88:99 actions=drop
+NXST_FLOW reply:
+])
+# Adding another flow will cause the one that expires soonest overall
+# to be evicted.
+AT_CHECK([ovs-ofctl add-flow br0 'idle_timeout=80 in_port=2 dl_src=00:99:aa:bb:cc:dd actions=drop'])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
+ idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
+ idle_timeout=50,in_port=2,dl_src=00:66:77:88:99:aa actions=drop
+ idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
+ idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
+ idle_timeout=80,in_port=2,dl_src=00:99:aa:bb:cc:dd actions=drop
+ in_port=2,dl_src=00:55:66:77:88:99 actions=drop
+NXST_FLOW reply:
+])
+# Reducing the flow limit also causes the flows that expire soonest
+# overall to be evicted.
+AT_CHECK([ovs-vsctl set Flow_Table evict flow-limit=4])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
+ idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
+ idle_timeout=80,in_port=2,dl_src=00:99:aa:bb:cc:dd actions=drop
+ in_port=2,dl_src=00:55:66:77:88:99 actions=drop
+NXST_FLOW reply:
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 8ade172..69f06b3 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -573,6 +573,7 @@ datapath_type       : ""
 external_ids        : {}
 fail_mode           : []
 flood_vlans         : []
+flow_tables         : {}
 mirrors             : []
 name                : "br0"
 netflow             : []
@@ -1019,6 +1020,7 @@ datapath_type       : ""
 external_ids        : {}
 fail_mode           : []
 flood_vlans         : []
+flow_tables         : {}
 mirrors             : []
 name                : "br0"
 netflow             : []
diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index d24af14..55b3c4f 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -486,6 +486,9 @@ A bridge port.  Records may be identified by port name.
 .IP "\fBInterface\fR"
 A network device attached to a port.  Records may be identified by
 name.
+.IP "\fBFlow_Table\fR"
+Configuration for a particular OpenFlow flow table.  Records may be
+identified by name.
 .IP "\fBQoS\fR"
 Quality-of-service configuration for a \fBPort\fR.  Records may be
 identified by port name.
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index a2af2f6..48ae56b 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2340,6 +2340,10 @@ static const struct vsctl_table_class tables[] = {
        &ovsrec_bridge_col_sflow},
       {NULL, NULL, NULL}}},
 
+    {&ovsrec_table_flow_table,
+     {{&ovsrec_table_flow_table, &ovsrec_flow_table_col_name, NULL},
+      {NULL, NULL, NULL}}},
+
     {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}
 };
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index b45b972..312d1aa 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -32,6 +32,7 @@
 #include "jsonrpc.h"
 #include "lacp.h"
 #include "list.h"
+#include "meta-flow.h"
 #include "netdev.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
@@ -158,6 +159,7 @@ static void bridge_configure_netflow(struct bridge *);
 static void bridge_configure_forward_bpdu(struct bridge *);
 static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number);
 static void bridge_configure_stp(struct bridge *);
+static void bridge_configure_tables(struct bridge *);
 static void bridge_configure_remotes(struct bridge *,
                                      const struct sockaddr_in *managers,
                                      size_t n_managers);
@@ -469,6 +471,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         bridge_configure_netflow(br);
         bridge_configure_sflow(br, &sflow_bridge_number);
         bridge_configure_stp(br);
+        bridge_configure_tables(br);
     }
     free(managers);
 
@@ -2472,6 +2475,66 @@ bridge_configure_remotes(struct bridge *br,
         sset_destroy(&snoops);
     }
 }
+
+static void
+bridge_configure_tables(struct bridge *br)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+    int n_tables;
+    int i, j;
+
+    n_tables = ofproto_get_n_tables(br->ofproto);
+    j = 0;
+    for (i = 0; i < n_tables; i++) {
+        struct ofproto_table_settings s;
+
+        s.name = NULL;
+        s.max_flows = UINT_MAX;
+        s.groups = NULL;
+        s.n_groups = 0;
+
+        if (j < br->cfg->n_flow_tables && i == br->cfg->key_flow_tables[j]) {
+            struct ovsrec_flow_table *cfg = br->cfg->value_flow_tables[j++];
+
+            s.name = cfg->name;
+            if (cfg->n_flow_limit && *cfg->flow_limit < UINT_MAX) {
+                s.max_flows = *cfg->flow_limit;
+            }
+            if (cfg->overflow_policy
+                && !strcmp(cfg->overflow_policy, "evict")) {
+                size_t k;
+
+                s.groups = xmalloc(cfg->n_groups * sizeof *s.groups);
+                for (k = 0; k < cfg->n_groups; k++) {
+                    const char *string = cfg->groups[k];
+                    char *msg;
+
+                    msg = mf_parse_subfield__(&s.groups[k], &string);
+                    if (msg) {
+                        VLOG_WARN_RL(&rl, "bridge %s table %d: error parsing "
+                                     "'groups' (%s)", br->name, i, msg);
+                        free(msg);
+                    } else if (*string) {
+                        VLOG_WARN_RL(&rl, "bridge %s table %d: 'groups' "
+                                     "element '%s' contains trailing garbage",
+                                     br->name, i, cfg->groups[k]);
+                    } else {
+                        s.n_groups++;
+                    }
+                }
+            }
+        }
+
+        ofproto_configure_table(br->ofproto, i, &s);
+
+        free(s.groups);
+    }
+    for (; j < br->cfg->n_flow_tables; j++) {
+        VLOG_WARN_RL(&rl, "bridge %s: ignoring configuration for flow table "
+                     "%"PRId64" not supported by this datapath", br->name,
+                     br->cfg->key_flow_tables[j]);
+    }
+}
 
 /* Port functions. */
 
diff --git a/vswitchd/vswitch.gv b/vswitchd/vswitch.gv
index 65916d4..3a0980f 100644
--- a/vswitchd/vswitch.gv
+++ b/vswitchd/vswitch.gv
@@ -8,6 +8,7 @@ digraph Open_vSwitch {
 	Bridge -> Mirror [label="mirrors*"];
 	Bridge -> Port [label="ports*"];
 	Bridge -> Controller [label="controller*"];
+	Bridge -> Flow_Table [label="flow_tables value*"];
 	Bridge -> NetFlow [label="netflow?"];
 	QoS [style=bold];
 	QoS -> Queue [label="queues value*"];
@@ -18,6 +19,7 @@ digraph Open_vSwitch {
 	Open_vSwitch -> SSL [label="ssl?"];
 	Open_vSwitch -> Manager [label="manager_options*"];
 	Controller [];
+	Flow_Table [];
 	Queue [style=bold];
 	SSL [];
 	Manager [];
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 9d91b0f..6f2c458 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "6.4.0",
- "cksum": "923041702 15687",
+ "version": "6.5.0",
+ "cksum": "2847700438 16419",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -99,7 +99,14 @@
          "type": {"key": {"type": "integer",
                           "minInteger": 0,
                           "maxInteger": 4095},
-                  "min": 0, "max": 4096}}},
+                  "min": 0, "max": 4096}},
+       "flow_tables": {
+         "type": {"key": {"type": "integer",
+                          "minInteger": 0,
+                          "maxInteger": 254},
+                  "value": {"type": "uuid",
+                            "refTable": "Flow_Table"},
+                  "min": 0, "max": "unlimited"}}},
      "indexes": [["name"]]},
    "Port": {
      "columns": {
@@ -236,6 +243,19 @@
          "type": {"key": "integer", "min": 0, "max": 1},
          "ephemeral": true}},
      "indexes": [["name"]]},
+   "Flow_Table": {
+     "columns": {
+       "name": {
+	 "type": {"key": "string", "min": 0, "max": 1}},
+       "flow_limit": {
+	 "type": {"key": {"type": "integer", "minInteger": 0},
+		  "min": 0, "max": 1}},
+       "overflow_policy": {
+	 "type": {"key": {"type": "string",
+			  "enum": ["set", ["refuse", "evict"]]},
+		  "min": 0, "max": 1}},
+       "groups": {
+	 "type": {"key": "string", "min": 0, "max": "unlimited"}}}},
    "QoS": {
      "columns": {
        "type": {
diff --git a/vswitchd/vswitch.pic b/vswitchd/vswitch.pic
index e17a696..97a5537 100644
--- a/vswitchd/vswitch.pic
+++ b/vswitchd/vswitch.pic
@@ -1,78 +1,83 @@
-.\" Generated from vswitch.gv with cksum "3079400319 1035"
+.\" Generated from vswitch.gv with cksum "3861934566 1103"
 .PS
 linethick = 1;
 linethick = 1;
-box at 2.691279811,2.68196 wid 0.5680525378 height 0.335245 "Bridge"
+box at 2.941088544,2.50604 wid 0.5307918022 height 0.313255 "Bridge"
 linethick = 1;
-box at 0.2607468561,1.8438475 wid 0.5214937122 height 0.335245 "sFlow"
+box at 0.2436434739,1.7229025 wid 0.4872869478 height 0.313255 "sFlow"
 linethick = 1;
-box at 0.959202994,1.8438475 wid 0.5494263256 height 0.335245 "Mirror"
+box at 0.896285206,1.7229025 wid 0.5133873544 height 0.313255 "Mirror"
 linethick = 1;
-box at 2.691279811,1.8438475 wid 0.5028675 height 0.335245 "Port"
+box at 2.514748489,1.7229025 wid 0.4698825 height 0.313255 "Port"
 linethick = 1;
-box at 3.510752689,1.8438475 wid 0.800833256 height 0.335245 "Controller"
+box at 3.280469011,1.7229025 wid 0.748303544 height 0.313255 "Controller"
 linethick = 1;
-box at 4.423356628,1.8438475 wid 0.689129622 height 0.335245 "NetFlow"
+box at 4.211525522,1.7229025 wid 0.800554478 height 0.313255 "Flow_Table"
+linethick = 1;
+box at 5.490670989,1.7229025 wid 0.643926978 height 0.313255 "NetFlow"
 linethick = 0.5;
-box at 2.160452878,1.005735 wid 0.5028675 height 0.335245 "QoS"
-box at 2.160452878,1.005735 wid 0.447311944444444 height 0.279689444444444
+box at 2.018740522,0.939765 wid 0.4698825 height 0.313255 "QoS"
+box at 2.018740522,0.939765 wid 0.414326944444444 height 0.257699444444444
 linethick = 0.5;
-box at 2.160452878,0.1676225 wid 0.5773656439 height 0.335245 "Queue"
-box at 2.160452878,0.1676225 wid 0.521810088344444 height 0.279689444444444
+box at 2.018740522,0.1566275 wid 0.5394940261 height 0.313255 "Queue"
+box at 2.018740522,0.1566275 wid 0.483938470544444 height 0.257699444444444
 linethick = 0.5;
-box at 4.218521933,3.5200725 wid 1.080226439 height 0.335245 "Open_vSwitch"
-box at 4.218521933,3.5200725 wid 1.02467088344444 height 0.279689444444444
+box at 4.368153022,3.2891775 wid 1.009370261 height 0.313255 "Open_vSwitch"
+box at 4.368153022,3.2891775 wid 0.953814705444444 height 0.257699444444444
+linethick = 1;
+box at 3.724226044,2.50604 wid 0.739657706 height 0.313255 "Capability"
 linethick = 1;
-box at 3.529392311,2.68196 wid 0.791580494 height 0.335245 "Capability"
+box at 5.020788489,2.50604 wid 0.4698825 height 0.313255 "SSL"
 linethick = 1;
-box at 4.916904317,2.68196 wid 0.5028675 height 0.335245 "SSL"
+box at 5.743029217,2.50604 wid 0.669989794 height 0.313255 "Manager"
 linethick = 1;
-box at 5.689845189,2.68196 wid 0.717022006 height 0.335245 "Manager"
+box at 2.749689739,0.939765 wid 0.678698283 height 0.313255 "Interface"
 linethick = 1;
-box at 2.942713561,1.005735 wid 0.726341817 height 0.335245 "Interface"
+spline -> from 2.677515787,2.47659403 to 2.677515787,2.47659403 to 2.342708843,2.434805813 to 1.749967732,2.345778742 to 1.261728489,2.192785 to 0.937948121,2.091353031 to 0.872853732,2.023376696 to 0.5656006978,1.87953 to 0.5408284924,1.867939565 to 0.5150601361,1.85572262 to 0.4893732261,1.843380373
+"sflow?" at 1.457512864,2.11447125
 linethick = 1;
-spline -> from 2.411216138,2.64038962 to 2.411216138,2.64038962 to 2.110501373,2.590639262 to 1.621781212,2.494960339 to 1.219889506,2.346715 to 0.974758362,2.256265899 to 0.712999066,2.115798244 to 0.5250137847,2.006240178
-"sflow?" at 1.429417631,2.26290375
+spline -> from 2.67645072,2.4120635 to 2.67645072,2.4120635 to 2.505664094,2.350853473 to 2.278992776,2.26859271 to 2.079637294,2.192785 to 1.75736055,2.070239644 to 1.388408811,1.922571237 to 1.149332595,1.825900744
+"mirrors*" at 2.331995522,2.11447125
 linethick = 1;
-spline -> from 2.409137619,2.545448236 to 2.409137619,2.545448236 to 2.08321243,2.387748988 to 1.550776321,2.130079681 to 1.229812758,1.974794197
-"mirrors*" at 2.253583939,2.26290375
+spline -> from 2.791164701,2.348347433 to 2.791164701,2.348347433 to 2.751318665,2.300920626 to 2.710846119,2.246978115 to 2.680084478,2.192785 to 2.624575692,2.09504944 to 2.582787475,1.97538603 to 2.555095733,1.881284228
+"ports*" at 2.867160364,2.11447125
 linethick = 1;
-spline -> from 2.691279811,2.51232603 to 2.691279811,2.51232603 to 2.691279811,2.366963798 to 2.691279811,2.158508457 to 2.691279811,2.013213274
-"ports*" at 2.891488125,2.26290375
+spline -> from 3.009816691,2.34753297 to 3.009816691,2.34753297 to 3.06864598,2.211705602 to 3.153036877,2.016923643 to 3.211928817,1.881158926
+"controller*" at 3.463221978,2.11447125
 linethick = 1;
-spline -> from 2.937148494,2.511990785 to 2.937148494,2.511990785 to 3.002387171,2.46203928 to 3.0708442,2.40504763 to 3.128975683,2.346715 to 3.230286722,2.244934618 to 3.327910066,2.115664146 to 3.399049055,2.013682617
-"controller*" at 3.613203561,2.26290375
+spline -> from 3.202531167,2.380675349 to 3.202531167,2.380675349 to 3.228656634,2.369586122 to 3.254970054,2.358935452 to 3.280469011,2.3494125 to 3.514345194,2.262077006 to 3.611015687,2.329489482 to 3.819956772,2.192785 to 3.941687665,2.113155579 to 4.045312419,1.98415717 to 4.115293586,1.881472181
+"flow_tables value*" at 4.555228908,2.11447125
 linethick = 1;
-spline -> from 2.974025444,2.543101521 to 2.974025444,2.543101521 to 3.000845044,2.532507779 to 3.02793284,2.522651576 to 3.054484244,2.5143375 to 3.459527253,2.387078498 to 3.62668041,2.557986399 to 3.994980567,2.346715 to 4.133302654,2.267396033 to 4.247688248,2.125989692 to 4.323386569,2.013213274
-"netflow?" at 4.474582064,2.26290375
+spline -> from 3.20459865,2.37384639 to 3.20459865,2.37384639 to 3.229847003,2.364386089 to 3.255408611,2.356053506 to 3.280469011,2.3494125 to 3.6838788,2.242968451 to 4.788102675,2.398029676 to 5.151290522,2.192785 to 5.274838294,2.122991786 to 5.365243687,1.987540324 to 5.421504285,1.879905906
+"netflow?" at 5.582016147,2.11447125
 linethick = 0.5;
-spline -> from 1.237456344,1.8438475 to 1.237456344,1.8438475 to 1.569281845,1.8438475 to 2.119083645,1.8438475 to 2.439041473,1.8438475
-"select_src_port*" at 1.834527689,1.92765875
+spline -> from 1.156286856,1.7229025 to 1.156286856,1.7229025 to 1.466346655,1.7229025 to 1.980084855,1.7229025 to 2.279055427,1.7229025
+"select_src_port*" at 1.714194011,1.80121625
 linethick = 0.5;
-spline -> from 1.215196076,1.675420412 to 1.215196076,1.675420412 to 1.2471114,1.660535534 to 1.280166557,1.647796224 to 1.313020567,1.638945756 to 1.76070674,1.518726899 to 1.909823716,1.513430028 to 2.356034811,1.638945756 to 2.387547841,1.647863273 to 2.419060871,1.660602583 to 2.449367019,1.675420412
-"output_port?" at 1.834527689,1.722757006
+spline -> from 1.135486724,1.565523188 to 1.135486724,1.565523188 to 1.1653086,1.551614666 to 1.196195543,1.539710976 to 1.226894533,1.531441044 to 1.64521526,1.419107801 to 1.784551084,1.414158372 to 2.201493489,1.531441044 to 2.230939459,1.539773627 to 2.260385429,1.551677317 to 2.288703681,1.565523188
+"output_port?" at 1.714194011,1.609754794
 linethick = 0.5;
-spline -> from 1.027123631,1.675018118 to 1.027123631,1.675018118 to 1.084316428,1.557883515 to 1.178520273,1.410845058 to 1.313020567,1.34098 to 1.724433231,1.127428935 to 1.947304107,1.122266162 to 2.356034811,1.34098 to 2.487383802,1.411314401 to 2.576156678,1.558285809 to 2.629192437,1.675353363
-"select_dst_port*" at 1.834527689,1.42479125
+spline -> from 0.959750669,1.565147282 to 0.959750669,1.565147282 to 1.013191972,1.455695985 to 1.101216627,1.318302342 to 1.226894533,1.25302 to 1.611321069,1.053476565 to 1.819572993,1.048652438 to 2.201493489,1.25302 to 2.324226798,1.318740899 to 2.407176722,1.456071891 to 2.456733663,1.565460537
+"select_dst_port*" at 1.714194011,1.33133375
 linethick = 1;
-spline -> from 2.582124039,1.673408942 to 2.582124039,1.673408942 to 2.548331343,1.620641379 to 2.511119148,1.5622417 to 2.477058256,1.5086025 to 2.40638861,1.397100013 to 2.327203741,1.271383138 to 2.265719808,1.173558647
-"qos?" at 2.621414753,1.42479125
+spline -> from 2.412752661,1.563643658 to 2.412752661,1.563643658 to 2.381176557,1.514337321 to 2.346405252,1.4597683 to 2.314578544,1.4096475 to 2.24854439,1.305458887 to 2.174553559,1.187988262 to 2.117102592,1.096580453
+"qos?" at 2.449466147,1.33133375
 linethick = 1;
-spline -> from 2.742170002,1.67421353 to 2.742170002,1.67421353 to 2.785751852,1.528851298 to 2.848308569,1.320395957 to 2.891890419,1.175100774
-"interfaces+" at 3.212786933,1.42479125
+spline -> from 2.562300598,1.56439547 to 2.562300598,1.56439547 to 2.603023748,1.428568102 to 2.661477131,1.233786143 to 2.702200281,1.098021426
+"interfaces+" at 3.002047967,1.33133375
 linethick = 1;
-spline -> from 2.160452878,0.83610103 to 2.160452878,0.83610103 to 2.160452878,0.690738798 to 2.160452878,0.4823102766 to 2.160452878,0.3369748642
-"queues value*" at 2.626108183,0.58667875
+spline -> from 2.018740522,0.78125797 to 2.018740522,0.78125797 to 2.018740522,0.645430602 to 2.018740522,0.4506737034 to 2.018740522,0.3148713958
+"queues value*" at 2.453851717,0.54819625
 linethick = 1;
-spline -> from 3.676631915,3.429757497 to 3.676631915,3.429757497 to 3.372899945,3.369815691 to 3.032089878,3.28406002 to 2.914754128,3.1848275 to 2.814113579,3.09967527 to 2.75705488,2.96021335 to 2.725810046,2.849716598
-"bridges*" at 3.1848275,3.10101625
+spline -> from 3.86180764,3.204786603 to 3.86180764,3.204786603 to 3.57799861,3.148776609 to 3.259543577,3.06864598 to 3.149966978,2.9759225 to 3.055927827,2.89635573 to 3.002549175,2.76604165 to 2.973353809,2.662792802
+"bridges*" at 3.402325206,2.89760875
 linethick = 1;
-spline -> from 3.710961003,3.350840824 to 3.710961003,3.350840824 to 3.642839219,3.307124876 to 3.581690531,3.252614039 to 3.538712122,3.1848275 to 3.47649065,3.086801862 to 3.476356552,2.95484943 to 3.490637989,2.850789382
-"capabilities value*" at 4.125390872,3.10101625
+spline -> from 3.893947603,3.131046376 to 3.893947603,3.131046376 to 3.830231536,3.090197924 to 3.773156475,3.039262661 to 3.732934533,2.9759225 to 3.674794405,2.884326738 to 3.674731754,2.76102957 to 3.688076417,2.663795218
+"capabilities value*" at 4.281130783,2.89760875
 linethick = 1;
-spline -> from 4.526209794,3.352114755 to 4.526209794,3.352114755 to 4.593862235,3.304711112 to 4.66057599,3.248457001 to 4.712069622,3.1848275 to 4.790919246,3.087405303 to 4.843150417,2.955452871 to 4.87513279,2.851191676
-"ssl?" at 4.930917558,3.10101625
+spline -> from 4.655721112,3.132236745 to 4.655721112,3.132236745 to 4.71887332,3.087942488 to 4.781211065,3.035378299 to 4.829327033,2.9759225 to 4.90306726,2.884890597 to 4.951872389,2.761593429 to 4.981694265,2.664171124
+"ssl?" at 5.033819897,2.89760875
 linethick = 1;
-spline -> from 4.735067429,3.351645412 to 4.735067429,3.351645412 to 4.853341865,3.304577014 to 4.976108584,3.24852405 to 5.084526817,3.1848275 to 5.243567045,3.091361194 to 5.403210714,2.956659753 to 5.518602043,2.850185941
-"manager_options*" at 5.908693125,3.10101625
+spline -> from 4.850878977,3.131798188 to 4.850878977,3.131798188 to 4.961395341,3.087817186 to 5.076109322,3.03544095 to 5.177415989,2.9759225 to 5.326024161,2.888587006 to 5.475133541,2.762721147 to 5.582955912,2.663231359
+"manager_options*" at 5.947522081,2.89760875
 .PE
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index e28b053..529c3b9 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -388,6 +388,11 @@
         </p>
       </column>
 
+      <column name="flow_tables">
+        Configuration for OpenFlow tables.  Each pair maps from an OpenFlow
+        table ID to configuration for that table.
+      </column>
+
       <column name="fail_mode">
         <p>When a controller is configured, it is, ordinarily, responsible
         for setting up all flows on the switch.  Thus, if the connection to
@@ -1867,6 +1872,99 @@
     </group>
   </table>
 
+  <table name="Flow_Table" title="OpenFlow table configuration">
+    <p>Configuration for a particular OpenFlow table.</p>
+
+    <column name="name">
+      The table's name.  Set this column to change the name that controllers
+      will receive when they request table statistics, e.g. <code>ovs-ofctl
+      dump-tables</code>.  The name does not affect switch behavior.
+    </column>
+
+    <column name="flow_limit">
+      If set, limits the number of flows that may be added to the table.  Open
+      vSwitch may limit the number of flows in a table for other reasons,
+      e.g. due to hardware limitations or for resource availability or
+      performance reasons.
+    </column>
+
+    <column name="overflow_policy">
+      <p>
+        Controls the switch's behavior when an OpenFlow flow table modification
+        request would add flows in excess of <ref column="flow_limit"/>.  The
+        supported values are:
+      </p>
+
+      <dl>
+        <dt><code>refuse</code></dt>
+        <dd>
+          Refuse to add the flow or flows.  This is also the default policy
+          when <ref column="overflow_policy"/> is unset.
+        </dd>
+
+        <dt><code>evict</code></dt>
+        <dd>
+          Delete the flow that will expire soonest.  See <ref column="groups"/>
+          for details.
+        </dd>
+      </dl>
+    </column>
+
+    <column name="groups">
+      <p>
+        When <ref column="overflow_policy"/> is <code>evict</code>, this
+        controls how flows are chosen for eviction when the flow table would
+        otherwise exceed <ref column="flow_limit"/> flows.  Its value is a set
+        of NXM fields or sub-fields, each of which takes one of the forms
+        <code><var>field</var>[]</code> or
+        <code><var>field</var>[<var>start</var>..<var>end</var>]</code>,
+        e.g. <code>NXM_OF_IN_PORT[]</code>.  Please see
+        <code>nicira-ext.h</code> for a complete list of NXM field names.
+      </p>
+
+      <p>
+        When a flow must be evicted due to overflow, the flow to evict is
+        chosen through an approximation of the following algorithm:
+      </p>
+
+      <ol>
+        <li>
+          Divide the flows in the table into groups based on the values of the
+          specified fields or subfields, so that all of the flows in a given
+          group have the same values for those fields.  If a flow does not
+          specify a given field, that field's value is treated as 0.
+        </li>
+
+        <li>
+          Consider the flows in the largest group, that is, the group that
+          contains the greatest number of flows.  If two or more groups all
+          have the same largest number of flows, consider the flows in all of
+          those groups.
+        </li>
+
+        <li>
+          Among the flows under consideration, choose the flow that expires
+          soonest for eviction.
+        </li>
+      </ol>
+
+      <p>
+        The eviction process only considers flows that have an idle timeout or
+        a hard timeout.  That is, eviction never deletes permanent flows.
+        (Permanent flows do count against <ref column="flow_limit"/>.
+      </p>
+
+      <p>
+        Open vSwitch ignores any invalid or unknown field specifications.
+      </p>
+
+      <p>
+        When <ref column="overflow_policy"/> is not <code>evict</code>, this
+        column has no effect.
+      </p>
+    </column>
+  </table>
+
   <table name="QoS" title="Quality of Service configuration">
     <p>Quality of Service (QoS) configuration for each Port that
     references it.</p>
-- 
1.7.2.5




More information about the dev mailing list