[ovs-dev] [eviction 12/12] Add support for limiting the number of flows in an OpenFlow flow table.
Ethan Jackson
ethan at nicira.com
Wed Feb 1 02:00:33 UTC 2012
I'm surprised that the Flow_Table configuration went into the database
instead of being implemented as OpenFlow extensions. Naively, to me it
seems like a bit of a layering violation. Can you please briefly
explain your thinking on this issue?
In ofproto_configure_table() I would think the initial assertion
should come before we dereference ofproto->tables. Practically
speaking, it doesn't matter much, just looks a tad strange to me.
Otherwise looks good,
Ethan
On Fri, Jan 13, 2012 at 16:43, Ben Pfaff <blp at nicira.com> wrote:
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> NEWS | 4 +
> ofproto/ofproto-provider.h | 34 +++-
> ofproto/ofproto.c | 512 ++++++++++++++++++++++++++++++++++++++++++--
> ofproto/ofproto.h | 22 ++-
> tests/ofproto-macros.at | 16 ++
> tests/ofproto.at | 220 +++++++++++++++++++
> tests/ovs-vsctl.at | 2 +
> utilities/ovs-vsctl.8.in | 3 +
> utilities/ovs-vsctl.c | 6 +-
> vswitchd/bridge.c | 63 ++++++
> vswitchd/vswitch.gv | 2 +
> vswitchd/vswitch.ovsschema | 26 ++-
> vswitchd/vswitch.pic | 99 +++++----
> vswitchd/vswitch.xml | 98 +++++++++
> 14 files changed, 1037 insertions(+), 70 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 1fe83bb..189cc4b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,9 @@
> post-v1.5.0
> ------------------------
> + - New support for limiting the number of flows in an OpenFlow flow
> + table, with configurable policy for evicting flows upon
> + overflow. See the Flow_Table table in ovs-vswitch.conf.db(5)
> + for more information.
> - ofproto-provider interface:
> - "struct rule" has a new member "used" that ofproto implementations
> should maintain by updating with ofproto_rule_update_used().
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 9fb5eff..5cb2b0f 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -22,6 +22,7 @@
> #include "ofproto/ofproto.h"
> #include "cfm.h"
> #include "classifier.h"
> +#include "heap.h"
> #include "list.h"
> #include "ofp-errors.h"
> #include "shash.h"
> @@ -111,6 +112,33 @@ enum oftable_flags {
> struct oftable {
> enum oftable_flags flags;
> struct classifier cls; /* Contains "struct rule"s. */
> + char *name; /* Table name exposed via OpenFlow, or NULL. */
> +
> + /* Maximum number of flows or UINT_MAX if there is no limit besides any
> + * limit imposed by resource limitations. */
> + unsigned int max_flows;
> +
> + /* These members determine the handling of an attempt to add a flow that
> + * would cause the table to have more than 'max_flows' flows.
> + *
> + * If 'eviction_fields' is NULL, overflows will be rejected with an error.
> + *
> + * If 'eviction_fields' is nonnull (regardless of whether n_eviction_fields
> + * is nonzero), an overflow will cause a flow to be removed. The flow to
> + * be removed is chosen to give fairness among groups distinguished by
> + * different values for the subfields within 'groups'. */
> + struct mf_subfield *eviction_fields;
> + size_t n_eviction_fields;
> +
> + /* Eviction groups.
> + *
> + * When a flow is added that would cause the table to have more than
> + * 'max_flows' flows, and 'eviction_fields' is nonnull, these groups are
> + * used to decide which rule to evict: the rule is chosen from the eviction
> + * group that contains the greatest number of rules.*/
> + uint32_t eviction_group_id_basis;
> + struct hmap eviction_groups_by_id;
> + struct heap eviction_groups_by_size;
> };
>
> /* Assigns TABLE to each oftable, in turn, in OFPROTO.
> @@ -142,6 +170,10 @@ struct rule {
> uint8_t table_id; /* Index in ofproto's 'tables' array. */
> bool send_flow_removed; /* Send a flow removed message? */
>
> + /* Eviction groups. */
> + struct heap_node evg_node; /* In eviction_group's "rules" heap. */
> + struct eviction_group *eviction_group; /* NULL if not in any group. */
> +
> union ofp_action *actions; /* OpenFlow actions. */
> int n_actions; /* Number of elements in actions[]. */
> };
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 357c488..ccfc181 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -29,6 +29,7 @@
> #include "dynamic-string.h"
> #include "hash.h"
> #include "hmap.h"
> +#include "meta-flow.h"
> #include "netdev.h"
> #include "nx-match.h"
> #include "ofp-errors.h"
> @@ -42,6 +43,7 @@
> #include "pinsched.h"
> #include "pktbuf.h"
> #include "poll-loop.h"
> +#include "random.h"
> #include "shash.h"
> #include "sset.h"
> #include "timeval.h"
> @@ -63,6 +65,7 @@ COVERAGE_DEFINE(ofproto_update_port);
>
> enum ofproto_state {
> S_OPENFLOW, /* Processing OpenFlow commands. */
> + S_EVICT, /* Evicting flows from over-limit tables. */
> S_FLUSH, /* Deleting all flow table rules. */
> };
>
> @@ -128,15 +131,42 @@ static void ofoperation_destroy(struct ofoperation *);
> static void oftable_init(struct oftable *);
> static void oftable_destroy(struct oftable *);
>
> +static void oftable_set_name(struct oftable *, const char *name);
> +
> +static void oftable_disable_eviction(struct oftable *);
> +static void oftable_enable_eviction(struct oftable *,
> + const struct mf_subfield *fields,
> + size_t n_fields);
> +
> static void oftable_remove_rule(struct rule *);
> static struct rule *oftable_replace_rule(struct rule *);
> static void oftable_substitute_rule(struct rule *old, struct rule *new);
>
> -/* rule. */
> -static void ofproto_rule_destroy__(struct rule *);
> -static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
> -static bool rule_is_modifiable(const struct rule *);
> -static bool rule_is_hidden(const struct rule *);
> +/* A set of rules within a single OpenFlow table (oftable) that have the same
> + * values for the oftable's eviction_fields. A rule to be evicted, when one is
> + * needed, is taken from the eviction group that contains the greatest number
> + * of rules.
> + *
> + * An oftable owns any number of eviction groups, each of which contains any
> + * number of rules.
> + *
> + * Membership in an eviction group is imprecise, based on the hash of the
> + * oftable's eviction_fields (in the eviction_group's id_node.hash member).
> + * That is, if two rules have different eviction_fields, but those
> + * eviction_fields hash to the same value, then they will belong to the same
> + * eviction_group anyway.
> + *
> + * (When eviction is not enabled on an oftable, we don't track any eviction
> + * groups, to save time and space.) */
> +struct eviction_group {
> + struct hmap_node id_node; /* In oftable's "eviction_groups_by_id". */
> + struct heap_node size_node; /* In oftable's "eviction_groups_by_size". */
> + struct heap rules; /* Contains "struct rule"s. */
> +};
> +
> +static struct rule *choose_rule_to_evict(struct oftable *, struct rule *avoid);
> +static void ofproto_evict(struct ofproto *);
> +static uint32_t rule_eviction_priority(struct rule *);
>
> /* ofport. */
> static void ofport_destroy__(struct ofport *);
> @@ -146,11 +176,17 @@ static void update_port(struct ofproto *, const char *devname);
> static int init_ports(struct ofproto *);
> static void reinit_ports(struct ofproto *);
>
> +/* rule. */
> +static void ofproto_rule_destroy__(struct rule *);
> +static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
> +static bool rule_is_modifiable(const struct rule *);
> +static bool rule_is_hidden(const struct rule *);
> +
> /* OpenFlow. */
> static enum ofperr add_flow(struct ofproto *, struct ofconn *,
> const struct ofputil_flow_mod *,
> const struct ofp_header *);
> -
> +static void delete_flow__(struct rule *, struct ofopgroup *);
> static bool handle_openflow(struct ofconn *, struct ofpbuf *);
> static enum ofperr handle_flow_mod__(struct ofproto *, struct ofconn *,
> const struct ofputil_flow_mod *,
> @@ -804,6 +840,57 @@ ofproto_is_mirror_output_bundle(const struct ofproto *ofproto, void *aux)
> : false);
> }
>
> +/* Configuration of OpenFlow tables. */
> +
> +/* Returns the number of OpenFlow tables in 'ofproto'. */
> +int
> +ofproto_get_n_tables(const struct ofproto *ofproto)
> +{
> + return ofproto->n_tables;
> +}
> +
> +/* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the
> + * settings from 's'. 'table_id' must be in the range 0 through the number of
> + * OpenFlow tables in 'ofproto' minus 1, inclusive.
> + *
> + * For read-only tables, only the name may be configured. */
> +void
> +ofproto_configure_table(struct ofproto *ofproto, int table_id,
> + const struct ofproto_table_settings *s)
> +{
> + struct oftable *table = &ofproto->tables[table_id];
> + assert(table_id >= 0 && table_id < ofproto->n_tables);
> +
> + oftable_set_name(table, s->name);
> +
> + if (table->flags & OFTABLE_READONLY) {
> + return;
> + }
> +
> + if (s->groups) {
> + oftable_enable_eviction(table, s->groups, s->n_groups);
> + } else {
> + oftable_disable_eviction(table);
> + }
> +
> + table->max_flows = s->max_flows;
> + if (classifier_count(&table->cls) > table->max_flows
> + && table->eviction_fields) {
> + /* 'table' contains more flows than allowed. We might not be able to
> + * evict them right away because of the asynchronous nature of flow
> + * table changes. Schedule eviction for later. */
> + switch (ofproto->state) {
> + case S_OPENFLOW:
> + ofproto->state = S_EVICT;
> + break;
> + case S_EVICT:
> + case S_FLUSH:
> + /* We're already deleting flows, nothing more to do. */
> + break;
> + }
> + }
> +}
> +
> bool
> ofproto_has_snoops(const struct ofproto *ofproto)
> {
> @@ -950,12 +1037,19 @@ ofproto_run(struct ofproto *p)
> }
> }
>
> -
> switch (p->state) {
> case S_OPENFLOW:
> connmgr_run(p->connmgr, handle_openflow);
> break;
>
> + case S_EVICT:
> + connmgr_run(p->connmgr, NULL);
> + ofproto_evict(p);
> + if (list_is_empty(&p->pending) && hmap_is_empty(&p->deletions)) {
> + p->state = S_OPENFLOW;
> + }
> + break;
> +
> case S_FLUSH:
> connmgr_run(p->connmgr, NULL);
> ofproto_flush__(p);
> @@ -1012,6 +1106,7 @@ ofproto_wait(struct ofproto *p)
> connmgr_wait(p->connmgr, true);
> break;
>
> + case S_EVICT:
> case S_FLUSH:
> connmgr_wait(p->connmgr, false);
> if (list_is_empty(&p->pending) && hmap_is_empty(&p->deletions)) {
> @@ -1248,7 +1343,7 @@ ofproto_delete_flow(struct ofproto *ofproto, const struct cls_rule *target)
> struct ofopgroup *group = ofopgroup_create_unattached(ofproto);
> ofoperation_create(group, rule, OFOPERATION_DELETE);
> oftable_remove_rule(rule);
> - rule->ofproto->ofproto_class->rule_destruct(rule);
> + ofproto->ofproto_class->rule_destruct(rule);
> ofopgroup_submit(group);
> return true;
> }
> @@ -1991,6 +2086,18 @@ handle_table_stats_request(struct ofconn *ofconn,
>
> p->ofproto_class->get_tables(p, ots);
>
> + for (i = 0; i < p->n_tables; i++) {
> + const struct oftable *table = &p->tables[i];
> +
> + if (table->name) {
> + ovs_strzcpy(ots[i].name, table->name, sizeof ots[i].name);
> + }
> +
> + if (table->max_flows < ntohl(ots[i].max_entries)) {
> + ots[i].max_entries = htonl(table->max_flows);
> + }
> + }
> +
> ofconn_send_reply(ofconn, msg);
> return 0;
> }
> @@ -2580,6 +2687,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
> rule->send_flow_removed = (fm->flags & OFPFF_SEND_FLOW_REM) != 0;
> rule->actions = ofputil_actions_clone(fm->actions, fm->n_actions);
> rule->n_actions = fm->n_actions;
> + rule->eviction_group = NULL;
>
> /* Insert new rule. */
> victim = oftable_replace_rule(rule);
> @@ -2588,6 +2696,21 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
> } else if (victim && victim->pending) {
> error = OFPROTO_POSTPONE;
> } else {
> + struct rule *evict;
> +
> + if (classifier_count(&table->cls) > table->max_flows) {
> + evict = choose_rule_to_evict(table, rule);
> + if (!evict) {
> + error = OFPERR_OFPFMFC_ALL_TABLES_FULL;
> + goto exit;
> + } else if (evict->pending) {
> + error = OFPROTO_POSTPONE;
> + goto exit;
> + }
> + } else {
> + evict = NULL;
> + }
> +
> group = ofopgroup_create(ofproto, ofconn, request, fm->buffer_id);
> ofoperation_create(group, rule, OFOPERATION_ADD);
> rule->pending->victim = victim;
> @@ -2595,10 +2718,13 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
> error = ofproto->ofproto_class->rule_construct(rule);
> if (error) {
> ofoperation_destroy(rule->pending);
> + } else if (evict) {
> + delete_flow__(evict, group);
> }
> ofopgroup_submit(group);
> }
>
> +exit:
> /* Back out if an error occurred. */
> if (error) {
> oftable_substitute_rule(rule, victim);
> @@ -2642,7 +2768,7 @@ modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
> rule->pending->n_actions = rule->n_actions;
> rule->actions = ofputil_actions_clone(fm->actions, fm->n_actions);
> rule->n_actions = fm->n_actions;
> - rule->ofproto->ofproto_class->rule_modify_actions(rule);
> + ofproto->ofproto_class->rule_modify_actions(rule);
> } else {
> rule->modified = time_msec();
> }
> @@ -2699,6 +2825,18 @@ modify_flow_strict(struct ofproto *ofproto, struct ofconn *ofconn,
>
> /* OFPFC_DELETE implementation. */
>
> +static void
> +delete_flow__(struct rule *rule, struct ofopgroup *group)
> +{
> + struct ofproto *ofproto = rule->ofproto;
> +
> + ofproto_rule_send_removed(rule, OFPRR_DELETE);
> +
> + ofoperation_create(group, rule, OFOPERATION_DELETE);
> + oftable_remove_rule(rule);
> + ofproto->ofproto_class->rule_destruct(rule);
> +}
> +
> /* Deletes the rules listed in 'rules'.
> *
> * Returns 0 on success, otherwise an OpenFlow error code. */
> @@ -2711,11 +2849,7 @@ delete_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
>
> group = ofopgroup_create(ofproto, ofconn, request, UINT32_MAX);
> LIST_FOR_EACH_SAFE (rule, next, ofproto_node, rules) {
> - ofproto_rule_send_removed(rule, OFPRR_DELETE);
> -
> - ofoperation_create(group, rule, OFOPERATION_DELETE);
> - oftable_remove_rule(rule);
> - rule->ofproto->ofproto_class->rule_destruct(rule);
> + delete_flow__(rule, group);
> }
> ofopgroup_submit(group);
>
> @@ -2782,7 +2916,13 @@ void
> ofproto_rule_update_used(struct rule *rule, long long int used)
> {
> if (used > rule->used) {
> + struct eviction_group *evg = rule->eviction_group;
> +
> rule->used = used;
> + if (evg) {
> + heap_change(&evg->rules, &rule->evg_node,
> + rule_eviction_priority(rule));
> + }
> }
> }
>
> @@ -2805,7 +2945,7 @@ ofproto_rule_expire(struct rule *rule, uint8_t reason)
> group = ofopgroup_create_unattached(ofproto);
> ofoperation_create(group, rule, OFOPERATION_DELETE);
> oftable_remove_rule(rule);
> - rule->ofproto->ofproto_class->rule_destruct(rule);
> + ofproto->ofproto_class->rule_destruct(rule);
> ofopgroup_submit(group);
> }
>
> @@ -3365,6 +3505,255 @@ pick_fallback_dpid(void)
> return eth_addr_to_uint64(ea);
> }
>
> +/* Table overflow policy. */
> +
> +/* Chooses and returns a rule to evict from 'table'. If 'avoid' is nonnull,
> + * then it will not be chosen for eviction. Returns NULL if the table is not
> + * configured to evict rules or if 'avoid' (if nonnull) is the only rule within
> + * its table that is a candidate for eviction. */
> +static struct rule *
> +choose_rule_to_evict(struct oftable *table, struct rule *avoid)
> +{
> + struct eviction_group *evg;
> +
> + if (!table->eviction_fields) {
> + return NULL;
> + }
> +
> + /* In the common case, the outer and inner loops here will each be entered
> + * exactly once:
> + *
> + * - The inner loop normally "return"s in its first iteration. If the
> + * eviction group has more than one rule, then it always returns,
> + * either in the first or second iteration.
> + *
> + * - The outer loop only iterates a second time if the first eviction
> + * group visited has only a single rule (which is unusual since the
> + * first group is also the largest group). It never iterates more
> + * than twice.
> + *
> + * - The outer loop can exit only if table's 'max_flows' is all filled up
> + * by permanent rules and 'avoid'. */
> + HEAP_FOR_EACH (evg, size_node, &table->eviction_groups_by_size) {
> + struct rule *rule;
> +
> + HEAP_FOR_EACH (rule, evg_node, &evg->rules) {
> + if (rule != avoid) {
> + return rule;
> + }
> + }
> + }
> +
> + return NULL;
> +}
> +
> +/* Searches 'ofproto' for tables that have more flows than their configured
> + * maximum and that have flow eviction enabled, and evicts as many flows as
> + * necessary and currently feasible from them.
> + *
> + * This triggers only when an OpenFlow table has N flows in it and then the
> + * client configures a maximum number of flows less than N. */
> +static void
> +ofproto_evict(struct ofproto *ofproto)
> +{
> + struct ofopgroup *group;
> + struct oftable *table;
> +
> + group = ofopgroup_create_unattached(ofproto);
> + OFPROTO_FOR_EACH_TABLE (table, ofproto) {
> + while (classifier_count(&table->cls) > table->max_flows
> + && table->eviction_fields) {
> + struct rule *rule;
> +
> + rule = choose_rule_to_evict(table, NULL);
> + if (!rule || rule->pending) {
> + break;
> + }
> +
> + ofoperation_create(group, rule, OFOPERATION_DELETE);
> + oftable_remove_rule(rule);
> + ofproto->ofproto_class->rule_destruct(rule);
> + }
> + }
> + ofopgroup_submit(group);
> +}
> +
> +/* Eviction groups. */
> +
> +/* Returns the priority to use for an eviction_group that contains 'n_rules'
> + * rules. The priority contains low-order random bits to ensure that eviction
> + * groups with the same number of rules are prioritized randomly. */
> +static uint32_t
> +eviction_group_priority(size_t n_rules)
> +{
> + uint16_t size = MIN(UINT16_MAX, n_rules);
> + return (size << 16) | random_uint16();
> +}
> +
> +/* Updates 'evg', an eviction_group within 'table', following a change that
> + * adds or removes rules in 'evg'. */
> +static void
> +eviction_group_resized(struct oftable *table, struct eviction_group *evg)
> +{
> + heap_change(&table->eviction_groups_by_size, &evg->size_node,
> + eviction_group_priority(heap_count(&evg->rules)));
> +}
> +
> +/* Destroys 'evg', an eviction_group within 'table':
> + *
> + * - Removes all the rules, if any, from 'evg'. (It doesn't destroy the
> + * rules themselves, just removes them from the eviction group.)
> + *
> + * - Removes 'evg' from 'table'.
> + *
> + * - Frees 'evg'. */
> +static void
> +eviction_group_destroy(struct oftable *table, struct eviction_group *evg)
> +{
> + while (!heap_is_empty(&evg->rules)) {
> + struct rule *rule;
> +
> + rule = CONTAINER_OF(heap_pop(&evg->rules), struct rule, evg_node);
> + rule->eviction_group = NULL;
> + }
> + hmap_remove(&table->eviction_groups_by_id, &evg->id_node);
> + heap_remove(&table->eviction_groups_by_size, &evg->size_node);
> + heap_destroy(&evg->rules);
> + free(evg);
> +}
> +
> +/* Removes 'rule' from its eviction group, if any. */
> +static void
> +eviction_group_remove_rule(struct rule *rule)
> +{
> + if (rule->eviction_group) {
> + struct oftable *table = &rule->ofproto->tables[rule->table_id];
> + struct eviction_group *evg = rule->eviction_group;
> +
> + rule->eviction_group = NULL;
> + heap_remove(&evg->rules, &rule->evg_node);
> + if (heap_is_empty(&evg->rules)) {
> + eviction_group_destroy(table, evg);
> + } else {
> + eviction_group_resized(table, evg);
> + }
> + }
> +}
> +
> +/* Hashes the 'rule''s values for the eviction_fields of 'rule''s table, and
> + * returns the hash value. */
> +static uint32_t
> +eviction_group_hash_rule(struct rule *rule)
> +{
> + struct oftable *table = &rule->ofproto->tables[rule->table_id];
> + const struct mf_subfield *sf;
> + uint32_t hash;
> +
> + hash = table->eviction_group_id_basis;
> + for (sf = table->eviction_fields;
> + sf < &table->eviction_fields[table->n_eviction_fields];
> + sf++)
> + {
> + if (mf_are_prereqs_ok(sf->field, &rule->cr.flow)) {
> + union mf_value value;
> +
> + mf_get_value(sf->field, &rule->cr.flow, &value);
> + if (sf->ofs) {
> + bitwise_zero(&value, sf->field->n_bytes, 0, sf->ofs);
> + }
> + if (sf->ofs + sf->n_bits < sf->field->n_bytes * 8) {
> + unsigned int start = sf->ofs + sf->n_bits;
> + bitwise_zero(&value, sf->field->n_bytes, start,
> + sf->field->n_bytes * 8 - start);
> + }
> + hash = hash_bytes(&value, sf->field->n_bytes, hash);
> + } else {
> + hash = hash_int(hash, 0);
> + }
> + }
> +
> + return hash;
> +}
> +
> +/* Returns an eviction group within 'table' with the given 'id', creating one
> + * if necessary. */
> +static struct eviction_group *
> +eviction_group_find(struct oftable *table, uint32_t id)
> +{
> + struct eviction_group *evg;
> +
> + HMAP_FOR_EACH_WITH_HASH (evg, id_node, id, &table->eviction_groups_by_id) {
> + return evg;
> + }
> +
> + evg = xmalloc(sizeof *evg);
> + hmap_insert(&table->eviction_groups_by_id, &evg->id_node, id);
> + heap_insert(&table->eviction_groups_by_size, &evg->size_node,
> + eviction_group_priority(0));
> + heap_init(&evg->rules);
> +
> + return evg;
> +}
> +
> +/* Returns an eviction priority for 'rule'. The return value should be
> + * interpreted so that higher priorities make a rule more attractive candidates
> + * for eviction. */
> +static uint32_t
> +rule_eviction_priority(struct rule *rule)
> +{
> + long long int hard_expiration;
> + long long int idle_expiration;
> + long long int expiration;
> + uint32_t expiration_offset;
> +
> + /* Calculate time of expiration. */
> + hard_expiration = (rule->hard_timeout
> + ? rule->modified + rule->hard_timeout * 1000
> + : LLONG_MAX);
> + idle_expiration = (rule->idle_timeout
> + ? rule->used + rule->idle_timeout * 1000
> + : LLONG_MAX);
> + expiration = MIN(hard_expiration, idle_expiration);
> + if (expiration == LLONG_MAX) {
> + return 0;
> + }
> +
> + /* Calculate the time of expiration as a number of (approximate) seconds
> + * after program startup.
> + *
> + * This should work OK for program runs that last UINT32_MAX seconds or
> + * less. Therefore, please restart OVS at least once every 136 years. */
> + expiration_offset = (expiration >> 10) - (time_boot_msec() >> 10);
> +
> + /* Invert the expiration offset because we're using a max-heap. */
> + return UINT32_MAX - expiration_offset;
> +}
> +
> +/* Adds 'rule' to an appropriate eviction group for its oftable's
> + * configuration. Does nothing if 'rule''s oftable doesn't have eviction
> + * enabled, or if 'rule' is a permanent rule (one that will never expire on its
> + * own).
> + *
> + * The caller must ensure that 'rule' is not already in an eviction group. */
> +static void
> +eviction_group_add_rule(struct rule *rule)
> +{
> + struct ofproto *ofproto = rule->ofproto;
> + struct oftable *table = &ofproto->tables[rule->table_id];
> +
> + if (table->eviction_fields
> + && (rule->hard_timeout || rule->idle_timeout)) {
> + struct eviction_group *evg;
> +
> + evg = eviction_group_find(table, eviction_group_hash_rule(rule));
> +
> + rule->eviction_group = evg;
> + heap_insert(&evg->rules, &rule->evg_node,
> + rule_eviction_priority(rule));
> + eviction_group_resized(table, evg);
> + }
> +}
> +
> /* oftables. */
>
> /* Initializes 'table'. */
> @@ -3375,14 +3764,96 @@ oftable_init(struct oftable *table)
> classifier_init(&table->cls);
> }
>
> -/* Destroys 'table'.
> +/* Destroys 'table', including its classifier and eviction groups.
> *
> * The caller is responsible for freeing 'table' itself. */
> static void
> oftable_destroy(struct oftable *table)
> {
> assert(classifier_is_empty(&table->cls));
> + oftable_disable_eviction(table);
> classifier_destroy(&table->cls);
> + free(table->name);
> +}
> +
> +/* Changes the name of 'table' to 'name'. If 'name' is NULL or the empty
> + * string, then 'table' will use its default name.
> + *
> + * This only affects the name exposed for a table exposed through the OpenFlow
> + * OFPST_TABLE (as printed by "ovs-ofctl dump-tables"). */
> +static void
> +oftable_set_name(struct oftable *table, const char *name)
> +{
> + if (name && name[0]) {
> + int len = strnlen(name, OFP_MAX_TABLE_NAME_LEN);
> + if (!table->name || strncmp(name, table->name, len)) {
> + free(table->name);
> + table->name = xmemdup0(name, len);
> + }
> + } else {
> + free(table->name);
> + table->name = NULL;
> + }
> +}
> +
> +/* oftables support a choice of two policies when adding a rule would cause the
> + * number of flows in the table to exceed the configured maximum number: either
> + * they can refuse to add the new flow or they can evict some existing flow.
> + * This function configures the former policy on 'table'. */
> +static void
> +oftable_disable_eviction(struct oftable *table)
> +{
> + if (table->eviction_fields) {
> + struct eviction_group *evg, *next;
> +
> + HMAP_FOR_EACH_SAFE (evg, next, id_node,
> + &table->eviction_groups_by_id) {
> + eviction_group_destroy(table, evg);
> + }
> + hmap_destroy(&table->eviction_groups_by_id);
> + heap_destroy(&table->eviction_groups_by_size);
> +
> + free(table->eviction_fields);
> + table->eviction_fields = NULL;
> + table->n_eviction_fields = 0;
> + }
> +}
> +
> +/* oftables support a choice of two policies when adding a rule would cause the
> + * number of flows in the table to exceed the configured maximum number: either
> + * they can refuse to add the new flow or they can evict some existing flow.
> + * This function configures the latter policy on 'table', with fairness based
> + * on the values of the 'n_fields' fields specified in 'fields'. (Specifying
> + * 'n_fields' as 0 disables fairness.) */
> +static void
> +oftable_enable_eviction(struct oftable *table,
> + const struct mf_subfield *fields, size_t n_fields)
> +{
> + struct cls_cursor cursor;
> + struct rule *rule;
> +
> + if (table->eviction_fields
> + && n_fields == table->n_eviction_fields
> + && (!n_fields
> + || !memcmp(fields, table->eviction_fields,
> + n_fields * sizeof *fields))) {
> + /* No change. */
> + return;
> + }
> +
> + oftable_disable_eviction(table);
> +
> + table->n_eviction_fields = n_fields;
> + table->eviction_fields = xmemdup(fields, n_fields * sizeof *fields);
> +
> + table->eviction_group_id_basis = random_uint32();
> + hmap_init(&table->eviction_groups_by_id);
> + heap_init(&table->eviction_groups_by_size);
> +
> + cls_cursor_init(&cursor, &table->cls, NULL);
> + CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
> + eviction_group_add_rule(rule);
> + }
> }
>
> /* Removes 'rule' from the oftable that contains it. */
> @@ -3393,6 +3864,7 @@ oftable_remove_rule(struct rule *rule)
> struct oftable *table = &ofproto->tables[rule->table_id];
>
> classifier_remove(&table->cls, &rule->cr);
> + eviction_group_remove_rule(rule);
> }
>
> /* Inserts 'rule' into its oftable. Removes any existing rule from 'rule''s
> @@ -3403,8 +3875,14 @@ oftable_replace_rule(struct rule *rule)
> {
> struct ofproto *ofproto = rule->ofproto;
> struct oftable *table = &ofproto->tables[rule->table_id];
> + struct rule *victim;
>
> - return rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
> + victim = rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
> + if (victim) {
> + eviction_group_remove_rule(victim);
> + }
> + eviction_group_add_rule(rule);
> + return victim;
> }
>
> /* Removes 'old' from its oftable then, if 'new' is nonnull, inserts 'new'. */
> diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
> index 2d47878..dd42ecf 100644
> --- a/ofproto/ofproto.h
> +++ b/ofproto/ofproto.h
> @@ -217,7 +217,6 @@ int ofproto_set_stp(struct ofproto *, const struct ofproto_stp_settings *);
> int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *);
>
> /* Configuration of ports. */
> -
> void ofproto_port_unregister(struct ofproto *, uint16_t ofp_port);
>
> void ofproto_port_clear_cfm(struct ofproto *, uint16_t ofp_port);
> @@ -314,6 +313,27 @@ int ofproto_mirror_get_stats(struct ofproto *, void *aux,
> int ofproto_set_flood_vlans(struct ofproto *, unsigned long *flood_vlans);
> bool ofproto_is_mirror_output_bundle(const struct ofproto *, void *aux);
>
> +/* Configuration of OpenFlow tables. */
> +struct ofproto_table_settings {
> + char *name; /* Name exported via OpenFlow or NULL. */
> + unsigned int max_flows; /* Maximum number of flows or UINT_MAX. */
> +
> + /* These members determine the handling of an attempt to add a flow that
> + * would cause the table to have more than 'max_flows' flows.
> + *
> + * If 'groups' is NULL, overflows will be rejected with an error.
> + *
> + * If 'groups' is nonnull, an overflow will cause a flow to be removed.
> + * The flow to be removed is chosen to give fairness among groups
> + * distinguished by different values for the subfields within 'groups'. */
> + struct mf_subfield *groups;
> + size_t n_groups;
> +};
> +
> +int ofproto_get_n_tables(const struct ofproto *);
> +void ofproto_configure_table(struct ofproto *, int table_id,
> + const struct ofproto_table_settings *);
> +
> /* Configuration querying. */
> bool ofproto_has_snoops(const struct ofproto *);
> void ofproto_get_snoops(const struct ofproto *, struct sset *);
> diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
> index 13586c3..3656ecf 100644
> --- a/tests/ofproto-macros.at
> +++ b/tests/ofproto-macros.at
> @@ -1,3 +1,19 @@
> +m4_divert_push([PREPARE_TESTS])
> +[
> +# Strips out uninteresting parts of ovs-ofctl output, as well as parts
> +# that vary from one run to another.
> +ofctl_strip () {
> + sed '
> +s/ (xid=0x[0-9a-fA-F]*)//
> +s/ duration=[0-9.]*s,//
> +s/ cookie=0x0,//
> +s/ table=0,//
> +s/ n_packets=0,//
> +s/ n_bytes=0,//
> +'
> +}]
> +m4_divert_pop([PREPARE_TESTS])
> +
> m4_define([STRIP_XIDS], [[sed 's/ (xid=0x[0-9a-fA-F]*)//']])
> m4_define([STRIP_DURATION], [[sed 's/\bduration=[0-9.]*s/duration=?s/']])
> m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m'])
> diff --git a/tests/ofproto.at b/tests/ofproto.at
> index b54d1dd..7c34e14 100644
> --- a/tests/ofproto.at
> +++ b/tests/ofproto.at
> @@ -201,3 +201,223 @@ NXST_FLOW reply:
> ])
> OVS_VSWITCHD_STOP
> AT_CLEANUP
> +
> +AT_SETUP([ofproto - flow table configuration])
> +OVS_VSWITCHD_START
> +# Check the default configuration.
> +(echo "OFPST_TABLE reply (xid=0x1): 255 tables
> + 0: classifier: wild=0x3fffff, max=1000000, active=0
> + lookup=0, matched=0"
> + x=1
> + while test $x -lt 255; do
> + printf " %d: %-8s: wild=0x3fffff, max=1000000, active=0
> + lookup=0, matched=0
> +" $x table$x
> + x=`expr $x + 1`
> + done) > expout
> +AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout])
> +# Change the configuration.
> +AT_CHECK(
> + [ovs-vsctl \
> + -- --id=@t0 create Flow_Table name=main \
> + -- --id=@t1 create Flow_Table flow-limit=1024 \
> + -- set bridge br0 'flow_tables={1=@t1,0=@t0}' \
> + | perl $srcdir/uuidfilt.pl],
> + [0], [<0>
> +<1>
> +])
> +# Check that the configuration was updated.
> +mv expout orig-expout
> +(echo "OFPST_TABLE reply (xid=0x1): 255 tables
> + 0: main : wild=0x3fffff, max=1000000, active=0
> + lookup=0, matched=0
> + 1: table1 : wild=0x3fffff, max= 1024, active=0
> + lookup=0, matched=0"
> + tail -n +6 orig-expout) > expout
> +AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +AT_SETUP([ofproto - hard limits on flow table size])
> +OVS_VSWITCHD_START
> +# Configure a maximum of 4 flows.
> +AT_CHECK(
> + [ovs-vsctl \
> + -- --id=@t0 create Flow_Table flow-limit=4 \
> + -- set bridge br0 flow_tables:0=@t0 \
> + | perl $srcdir/uuidfilt.pl],
> + [0], [<0>
> +])
> +# Add 4 flows.
> +for in_port in 1 2 3 4; do
> + ovs-ofctl add-flow br0 in_port=$in_port,actions=drop
> +done
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + in_port=1 actions=drop
> + in_port=2 actions=drop
> + in_port=3 actions=drop
> + in_port=4 actions=drop
> +NXST_FLOW reply:
> +])
> +# Adding another flow will be refused.
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop], [1], [], [stderr])
> +AT_CHECK([head -n 1 stderr], [0],
> + [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
> +])
> +# Also a mod-flow that would add a flow will be refused.
> +AT_CHECK([ovs-ofctl mod-flows br0 in_port=5,actions=drop], [1], [], [stderr])
> +AT_CHECK([head -n 1 stderr], [0],
> + [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
> +])
> +# Replacing or modifying an existing flow is allowed.
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal])
> +AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + in_port=1 actions=drop
> + in_port=2 actions=drop
> + in_port=3 actions=output:1
> + in_port=4 actions=NORMAL
> +NXST_FLOW reply:
> +])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +AT_SETUP([ofproto - eviction upon table overflow])
> +OVS_VSWITCHD_START
> +# Configure a maximum of 4 flows.
> +AT_CHECK(
> + [ovs-vsctl \
> + -- --id=@t0 create Flow_Table flow-limit=4 overflow-policy=evict \
> + -- set bridge br0 flow_tables:0=@t0 \
> + | perl $srcdir/uuidfilt.pl],
> + [0], [<0>
> +])
> +# Add 4 flows.
> +for in_port in 4 3 2 1; do
> + ovs-ofctl add-flow br0 idle_timeout=${in_port}0,in_port=$in_port,actions=drop
> +done
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=10,in_port=1 actions=drop
> + idle_timeout=20,in_port=2 actions=drop
> + idle_timeout=30,in_port=3 actions=drop
> + idle_timeout=40,in_port=4 actions=drop
> +NXST_FLOW reply:
> +])
> +# Adding another flow will cause the one that expires soonest to be evicted.
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=20,in_port=2 actions=drop
> + idle_timeout=30,in_port=3 actions=drop
> + idle_timeout=40,in_port=4 actions=drop
> + in_port=5 actions=drop
> +NXST_FLOW reply:
> +])
> +# A mod-flow that adds a flow also causes eviction, but replacing or
> +# modifying an existing flow doesn't.
> +AT_CHECK([ovs-ofctl mod-flows br0 in_port=6,actions=drop])
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal])
> +AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=30,in_port=3 actions=output:1
> + in_port=4 actions=NORMAL
> + in_port=5 actions=drop
> + in_port=6 actions=drop
> +NXST_FLOW reply:
> +])
> +# Flows with no timeouts at all cannot be evicted.
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=7,actions=normal])
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=8,actions=drop], [1], [], [stderr])
> +AT_CHECK([head -n 1 stderr], [0],
> + [OFPT_ERROR (xid=0x1): OFPFMFC_ALL_TABLES_FULL
> +])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + in_port=4 actions=NORMAL
> + in_port=5 actions=drop
> + in_port=6 actions=drop
> + in_port=7 actions=NORMAL
> +NXST_FLOW reply:
> +])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +AT_SETUP([ofproto - eviction upon table overflow, with fairness])
> +OVS_VSWITCHD_START
> +# Configure a maximum of 4 flows.
> +AT_CHECK(
> + [ovs-vsctl \
> + -- --id=@t0 create Flow_Table name=evict flow-limit=4 \
> + overflow-policy=evict \
> + groups='"NXM_OF_IN_PORT[[]]"' \
> + -- set bridge br0 flow_tables:0=@t0 \
> + | perl $srcdir/uuidfilt.pl],
> + [0], [<0>
> +])
> +# Add 4 flows.
> +ovs-ofctl add-flows br0 - <<EOF
> +idle_timeout=10 in_port=2 dl_src=00:44:55:66:77:88 actions=drop
> +idle_timeout=20 in_port=1 dl_src=00:11:22:33:44:55 actions=drop
> +idle_timeout=30 in_port=1 dl_src=00:22:33:44:55:66 actions=drop
> +idle_timeout=40 in_port=1 dl_src=00:33:44:55:66:77 actions=drop
> +EOF
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
> + idle_timeout=20,in_port=1,dl_src=00:11:22:33:44:55 actions=drop
> + idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
> + idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
> +NXST_FLOW reply:
> +])
> +# Adding another flow will cause the one that expires soonest within
> +# the largest group (those with in_port=1) to be evicted. In this
> +# case this is not the same as the one that expires soonest overall
> +# (which is what makes the test interesting):
> +AT_CHECK([ovs-ofctl add-flow br0 in_port=2,dl_src=00:55:66:77:88:99,actions=drop])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
> + idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
> + idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
> + in_port=2,dl_src=00:55:66:77:88:99 actions=drop
> +NXST_FLOW reply:
> +])
> +# Enlarge the flow limit, change the eviction policy back to strictly
> +# based on expiration, and and add some flows.
> +AT_CHECK([ovs-vsctl set Flow_Table evict groups='[[]]' flow-limit=7])
> +ovs-ofctl add-flows br0 - <<EOF
> +idle_timeout=50 in_port=2 dl_src=00:66:77:88:99:aa actions=drop
> +idle_timeout=60 in_port=2 dl_src=00:77:88:99:aa:bb actions=drop
> +idle_timeout=70 in_port=2 dl_src=00:88:99:aa:bb:cc actions=drop
> +EOF
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=10,in_port=2,dl_src=00:44:55:66:77:88 actions=drop
> + idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
> + idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
> + idle_timeout=50,in_port=2,dl_src=00:66:77:88:99:aa actions=drop
> + idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
> + idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
> + in_port=2,dl_src=00:55:66:77:88:99 actions=drop
> +NXST_FLOW reply:
> +])
> +# Adding another flow will cause the one that expires soonest overall
> +# to be evicted.
> +AT_CHECK([ovs-ofctl add-flow br0 'idle_timeout=80 in_port=2 dl_src=00:99:aa:bb:cc:dd actions=drop'])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=30,in_port=1,dl_src=00:22:33:44:55:66 actions=drop
> + idle_timeout=40,in_port=1,dl_src=00:33:44:55:66:77 actions=drop
> + idle_timeout=50,in_port=2,dl_src=00:66:77:88:99:aa actions=drop
> + idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
> + idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
> + idle_timeout=80,in_port=2,dl_src=00:99:aa:bb:cc:dd actions=drop
> + in_port=2,dl_src=00:55:66:77:88:99 actions=drop
> +NXST_FLOW reply:
> +])
> +# Reducing the flow limit also causes the flows that expire soonest
> +# overall to be evicted.
> +AT_CHECK([ovs-vsctl set Flow_Table evict flow-limit=4])
> +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
> + idle_timeout=60,in_port=2,dl_src=00:77:88:99:aa:bb actions=drop
> + idle_timeout=70,in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop
> + idle_timeout=80,in_port=2,dl_src=00:99:aa:bb:cc:dd actions=drop
> + in_port=2,dl_src=00:55:66:77:88:99 actions=drop
> +NXST_FLOW reply:
> +])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
> index 8ade172..69f06b3 100644
> --- a/tests/ovs-vsctl.at
> +++ b/tests/ovs-vsctl.at
> @@ -573,6 +573,7 @@ datapath_type : ""
> external_ids : {}
> fail_mode : []
> flood_vlans : []
> +flow_tables : {}
> mirrors : []
> name : "br0"
> netflow : []
> @@ -1019,6 +1020,7 @@ datapath_type : ""
> external_ids : {}
> fail_mode : []
> flood_vlans : []
> +flow_tables : {}
> mirrors : []
> name : "br0"
> netflow : []
> diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
> index d24af14..55b3c4f 100644
> --- a/utilities/ovs-vsctl.8.in
> +++ b/utilities/ovs-vsctl.8.in
> @@ -486,6 +486,9 @@ A bridge port. Records may be identified by port name.
> .IP "\fBInterface\fR"
> A network device attached to a port. Records may be identified by
> name.
> +.IP "\fBFlow_Table\fR"
> +Configuration for a particular OpenFlow flow table. Records may be
> +identified by name.
> .IP "\fBQoS\fR"
> Quality-of-service configuration for a \fBPort\fR. Records may be
> identified by port name.
> diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
> index a2af2f6..48ae56b 100644
> --- a/utilities/ovs-vsctl.c
> +++ b/utilities/ovs-vsctl.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -2340,6 +2340,10 @@ static const struct vsctl_table_class tables[] = {
> &ovsrec_bridge_col_sflow},
> {NULL, NULL, NULL}}},
>
> + {&ovsrec_table_flow_table,
> + {{&ovsrec_table_flow_table, &ovsrec_flow_table_col_name, NULL},
> + {NULL, NULL, NULL}}},
> +
> {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}
> };
>
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index b45b972..312d1aa 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -32,6 +32,7 @@
> #include "jsonrpc.h"
> #include "lacp.h"
> #include "list.h"
> +#include "meta-flow.h"
> #include "netdev.h"
> #include "ofp-print.h"
> #include "ofpbuf.h"
> @@ -158,6 +159,7 @@ static void bridge_configure_netflow(struct bridge *);
> static void bridge_configure_forward_bpdu(struct bridge *);
> static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number);
> static void bridge_configure_stp(struct bridge *);
> +static void bridge_configure_tables(struct bridge *);
> static void bridge_configure_remotes(struct bridge *,
> const struct sockaddr_in *managers,
> size_t n_managers);
> @@ -469,6 +471,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
> bridge_configure_netflow(br);
> bridge_configure_sflow(br, &sflow_bridge_number);
> bridge_configure_stp(br);
> + bridge_configure_tables(br);
> }
> free(managers);
>
> @@ -2472,6 +2475,66 @@ bridge_configure_remotes(struct bridge *br,
> sset_destroy(&snoops);
> }
> }
> +
> +static void
> +bridge_configure_tables(struct bridge *br)
> +{
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> + int n_tables;
> + int i, j;
> +
> + n_tables = ofproto_get_n_tables(br->ofproto);
> + j = 0;
> + for (i = 0; i < n_tables; i++) {
> + struct ofproto_table_settings s;
> +
> + s.name = NULL;
> + s.max_flows = UINT_MAX;
> + s.groups = NULL;
> + s.n_groups = 0;
> +
> + if (j < br->cfg->n_flow_tables && i == br->cfg->key_flow_tables[j]) {
> + struct ovsrec_flow_table *cfg = br->cfg->value_flow_tables[j++];
> +
> + s.name = cfg->name;
> + if (cfg->n_flow_limit && *cfg->flow_limit < UINT_MAX) {
> + s.max_flows = *cfg->flow_limit;
> + }
> + if (cfg->overflow_policy
> + && !strcmp(cfg->overflow_policy, "evict")) {
> + size_t k;
> +
> + s.groups = xmalloc(cfg->n_groups * sizeof *s.groups);
> + for (k = 0; k < cfg->n_groups; k++) {
> + const char *string = cfg->groups[k];
> + char *msg;
> +
> + msg = mf_parse_subfield__(&s.groups[k], &string);
> + if (msg) {
> + VLOG_WARN_RL(&rl, "bridge %s table %d: error parsing "
> + "'groups' (%s)", br->name, i, msg);
> + free(msg);
> + } else if (*string) {
> + VLOG_WARN_RL(&rl, "bridge %s table %d: 'groups' "
> + "element '%s' contains trailing garbage",
> + br->name, i, cfg->groups[k]);
> + } else {
> + s.n_groups++;
> + }
> + }
> + }
> + }
> +
> + ofproto_configure_table(br->ofproto, i, &s);
> +
> + free(s.groups);
> + }
> + for (; j < br->cfg->n_flow_tables; j++) {
> + VLOG_WARN_RL(&rl, "bridge %s: ignoring configuration for flow table "
> + "%"PRId64" not supported by this datapath", br->name,
> + br->cfg->key_flow_tables[j]);
> + }
> +}
>
> /* Port functions. */
>
> diff --git a/vswitchd/vswitch.gv b/vswitchd/vswitch.gv
> index 65916d4..3a0980f 100644
> --- a/vswitchd/vswitch.gv
> +++ b/vswitchd/vswitch.gv
> @@ -8,6 +8,7 @@ digraph Open_vSwitch {
> Bridge -> Mirror [label="mirrors*"];
> Bridge -> Port [label="ports*"];
> Bridge -> Controller [label="controller*"];
> + Bridge -> Flow_Table [label="flow_tables value*"];
> Bridge -> NetFlow [label="netflow?"];
> QoS [style=bold];
> QoS -> Queue [label="queues value*"];
> @@ -18,6 +19,7 @@ digraph Open_vSwitch {
> Open_vSwitch -> SSL [label="ssl?"];
> Open_vSwitch -> Manager [label="manager_options*"];
> Controller [];
> + Flow_Table [];
> Queue [style=bold];
> SSL [];
> Manager [];
> diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
> index 9d91b0f..6f2c458 100644
> --- a/vswitchd/vswitch.ovsschema
> +++ b/vswitchd/vswitch.ovsschema
> @@ -1,6 +1,6 @@
> {"name": "Open_vSwitch",
> - "version": "6.4.0",
> - "cksum": "923041702 15687",
> + "version": "6.5.0",
> + "cksum": "2847700438 16419",
> "tables": {
> "Open_vSwitch": {
> "columns": {
> @@ -99,7 +99,14 @@
> "type": {"key": {"type": "integer",
> "minInteger": 0,
> "maxInteger": 4095},
> - "min": 0, "max": 4096}}},
> + "min": 0, "max": 4096}},
> + "flow_tables": {
> + "type": {"key": {"type": "integer",
> + "minInteger": 0,
> + "maxInteger": 254},
> + "value": {"type": "uuid",
> + "refTable": "Flow_Table"},
> + "min": 0, "max": "unlimited"}}},
> "indexes": [["name"]]},
> "Port": {
> "columns": {
> @@ -236,6 +243,19 @@
> "type": {"key": "integer", "min": 0, "max": 1},
> "ephemeral": true}},
> "indexes": [["name"]]},
> + "Flow_Table": {
> + "columns": {
> + "name": {
> + "type": {"key": "string", "min": 0, "max": 1}},
> + "flow_limit": {
> + "type": {"key": {"type": "integer", "minInteger": 0},
> + "min": 0, "max": 1}},
> + "overflow_policy": {
> + "type": {"key": {"type": "string",
> + "enum": ["set", ["refuse", "evict"]]},
> + "min": 0, "max": 1}},
> + "groups": {
> + "type": {"key": "string", "min": 0, "max": "unlimited"}}}},
> "QoS": {
> "columns": {
> "type": {
> diff --git a/vswitchd/vswitch.pic b/vswitchd/vswitch.pic
> index e17a696..97a5537 100644
> --- a/vswitchd/vswitch.pic
> +++ b/vswitchd/vswitch.pic
> @@ -1,78 +1,83 @@
> -.\" Generated from vswitch.gv with cksum "3079400319 1035"
> +.\" Generated from vswitch.gv with cksum "3861934566 1103"
> .PS
> linethick = 1;
> linethick = 1;
> -box at 2.691279811,2.68196 wid 0.5680525378 height 0.335245 "Bridge"
> +box at 2.941088544,2.50604 wid 0.5307918022 height 0.313255 "Bridge"
> linethick = 1;
> -box at 0.2607468561,1.8438475 wid 0.5214937122 height 0.335245 "sFlow"
> +box at 0.2436434739,1.7229025 wid 0.4872869478 height 0.313255 "sFlow"
> linethick = 1;
> -box at 0.959202994,1.8438475 wid 0.5494263256 height 0.335245 "Mirror"
> +box at 0.896285206,1.7229025 wid 0.5133873544 height 0.313255 "Mirror"
> linethick = 1;
> -box at 2.691279811,1.8438475 wid 0.5028675 height 0.335245 "Port"
> +box at 2.514748489,1.7229025 wid 0.4698825 height 0.313255 "Port"
> linethick = 1;
> -box at 3.510752689,1.8438475 wid 0.800833256 height 0.335245 "Controller"
> +box at 3.280469011,1.7229025 wid 0.748303544 height 0.313255 "Controller"
> linethick = 1;
> -box at 4.423356628,1.8438475 wid 0.689129622 height 0.335245 "NetFlow"
> +box at 4.211525522,1.7229025 wid 0.800554478 height 0.313255 "Flow_Table"
> +linethick = 1;
> +box at 5.490670989,1.7229025 wid 0.643926978 height 0.313255 "NetFlow"
> linethick = 0.5;
> -box at 2.160452878,1.005735 wid 0.5028675 height 0.335245 "QoS"
> -box at 2.160452878,1.005735 wid 0.447311944444444 height 0.279689444444444
> +box at 2.018740522,0.939765 wid 0.4698825 height 0.313255 "QoS"
> +box at 2.018740522,0.939765 wid 0.414326944444444 height 0.257699444444444
> linethick = 0.5;
> -box at 2.160452878,0.1676225 wid 0.5773656439 height 0.335245 "Queue"
> -box at 2.160452878,0.1676225 wid 0.521810088344444 height 0.279689444444444
> +box at 2.018740522,0.1566275 wid 0.5394940261 height 0.313255 "Queue"
> +box at 2.018740522,0.1566275 wid 0.483938470544444 height 0.257699444444444
> linethick = 0.5;
> -box at 4.218521933,3.5200725 wid 1.080226439 height 0.335245 "Open_vSwitch"
> -box at 4.218521933,3.5200725 wid 1.02467088344444 height 0.279689444444444
> +box at 4.368153022,3.2891775 wid 1.009370261 height 0.313255 "Open_vSwitch"
> +box at 4.368153022,3.2891775 wid 0.953814705444444 height 0.257699444444444
> +linethick = 1;
> +box at 3.724226044,2.50604 wid 0.739657706 height 0.313255 "Capability"
> linethick = 1;
> -box at 3.529392311,2.68196 wid 0.791580494 height 0.335245 "Capability"
> +box at 5.020788489,2.50604 wid 0.4698825 height 0.313255 "SSL"
> linethick = 1;
> -box at 4.916904317,2.68196 wid 0.5028675 height 0.335245 "SSL"
> +box at 5.743029217,2.50604 wid 0.669989794 height 0.313255 "Manager"
> linethick = 1;
> -box at 5.689845189,2.68196 wid 0.717022006 height 0.335245 "Manager"
> +box at 2.749689739,0.939765 wid 0.678698283 height 0.313255 "Interface"
> linethick = 1;
> -box at 2.942713561,1.005735 wid 0.726341817 height 0.335245 "Interface"
> +spline -> from 2.677515787,2.47659403 to 2.677515787,2.47659403 to 2.342708843,2.434805813 to 1.749967732,2.345778742 to 1.261728489,2.192785 to 0.937948121,2.091353031 to 0.872853732,2.023376696 to 0.5656006978,1.87953 to 0.5408284924,1.867939565 to 0.5150601361,1.85572262 to 0.4893732261,1.843380373
> +"sflow?" at 1.457512864,2.11447125
> linethick = 1;
> -spline -> from 2.411216138,2.64038962 to 2.411216138,2.64038962 to 2.110501373,2.590639262 to 1.621781212,2.494960339 to 1.219889506,2.346715 to 0.974758362,2.256265899 to 0.712999066,2.115798244 to 0.5250137847,2.006240178
> -"sflow?" at 1.429417631,2.26290375
> +spline -> from 2.67645072,2.4120635 to 2.67645072,2.4120635 to 2.505664094,2.350853473 to 2.278992776,2.26859271 to 2.079637294,2.192785 to 1.75736055,2.070239644 to 1.388408811,1.922571237 to 1.149332595,1.825900744
> +"mirrors*" at 2.331995522,2.11447125
> linethick = 1;
> -spline -> from 2.409137619,2.545448236 to 2.409137619,2.545448236 to 2.08321243,2.387748988 to 1.550776321,2.130079681 to 1.229812758,1.974794197
> -"mirrors*" at 2.253583939,2.26290375
> +spline -> from 2.791164701,2.348347433 to 2.791164701,2.348347433 to 2.751318665,2.300920626 to 2.710846119,2.246978115 to 2.680084478,2.192785 to 2.624575692,2.09504944 to 2.582787475,1.97538603 to 2.555095733,1.881284228
> +"ports*" at 2.867160364,2.11447125
> linethick = 1;
> -spline -> from 2.691279811,2.51232603 to 2.691279811,2.51232603 to 2.691279811,2.366963798 to 2.691279811,2.158508457 to 2.691279811,2.013213274
> -"ports*" at 2.891488125,2.26290375
> +spline -> from 3.009816691,2.34753297 to 3.009816691,2.34753297 to 3.06864598,2.211705602 to 3.153036877,2.016923643 to 3.211928817,1.881158926
> +"controller*" at 3.463221978,2.11447125
> linethick = 1;
> -spline -> from 2.937148494,2.511990785 to 2.937148494,2.511990785 to 3.002387171,2.46203928 to 3.0708442,2.40504763 to 3.128975683,2.346715 to 3.230286722,2.244934618 to 3.327910066,2.115664146 to 3.399049055,2.013682617
> -"controller*" at 3.613203561,2.26290375
> +spline -> from 3.202531167,2.380675349 to 3.202531167,2.380675349 to 3.228656634,2.369586122 to 3.254970054,2.358935452 to 3.280469011,2.3494125 to 3.514345194,2.262077006 to 3.611015687,2.329489482 to 3.819956772,2.192785 to 3.941687665,2.113155579 to 4.045312419,1.98415717 to 4.115293586,1.881472181
> +"flow_tables value*" at 4.555228908,2.11447125
> linethick = 1;
> -spline -> from 2.974025444,2.543101521 to 2.974025444,2.543101521 to 3.000845044,2.532507779 to 3.02793284,2.522651576 to 3.054484244,2.5143375 to 3.459527253,2.387078498 to 3.62668041,2.557986399 to 3.994980567,2.346715 to 4.133302654,2.267396033 to 4.247688248,2.125989692 to 4.323386569,2.013213274
> -"netflow?" at 4.474582064,2.26290375
> +spline -> from 3.20459865,2.37384639 to 3.20459865,2.37384639 to 3.229847003,2.364386089 to 3.255408611,2.356053506 to 3.280469011,2.3494125 to 3.6838788,2.242968451 to 4.788102675,2.398029676 to 5.151290522,2.192785 to 5.274838294,2.122991786 to 5.365243687,1.987540324 to 5.421504285,1.879905906
> +"netflow?" at 5.582016147,2.11447125
> linethick = 0.5;
> -spline -> from 1.237456344,1.8438475 to 1.237456344,1.8438475 to 1.569281845,1.8438475 to 2.119083645,1.8438475 to 2.439041473,1.8438475
> -"select_src_port*" at 1.834527689,1.92765875
> +spline -> from 1.156286856,1.7229025 to 1.156286856,1.7229025 to 1.466346655,1.7229025 to 1.980084855,1.7229025 to 2.279055427,1.7229025
> +"select_src_port*" at 1.714194011,1.80121625
> linethick = 0.5;
> -spline -> from 1.215196076,1.675420412 to 1.215196076,1.675420412 to 1.2471114,1.660535534 to 1.280166557,1.647796224 to 1.313020567,1.638945756 to 1.76070674,1.518726899 to 1.909823716,1.513430028 to 2.356034811,1.638945756 to 2.387547841,1.647863273 to 2.419060871,1.660602583 to 2.449367019,1.675420412
> -"output_port?" at 1.834527689,1.722757006
> +spline -> from 1.135486724,1.565523188 to 1.135486724,1.565523188 to 1.1653086,1.551614666 to 1.196195543,1.539710976 to 1.226894533,1.531441044 to 1.64521526,1.419107801 to 1.784551084,1.414158372 to 2.201493489,1.531441044 to 2.230939459,1.539773627 to 2.260385429,1.551677317 to 2.288703681,1.565523188
> +"output_port?" at 1.714194011,1.609754794
> linethick = 0.5;
> -spline -> from 1.027123631,1.675018118 to 1.027123631,1.675018118 to 1.084316428,1.557883515 to 1.178520273,1.410845058 to 1.313020567,1.34098 to 1.724433231,1.127428935 to 1.947304107,1.122266162 to 2.356034811,1.34098 to 2.487383802,1.411314401 to 2.576156678,1.558285809 to 2.629192437,1.675353363
> -"select_dst_port*" at 1.834527689,1.42479125
> +spline -> from 0.959750669,1.565147282 to 0.959750669,1.565147282 to 1.013191972,1.455695985 to 1.101216627,1.318302342 to 1.226894533,1.25302 to 1.611321069,1.053476565 to 1.819572993,1.048652438 to 2.201493489,1.25302 to 2.324226798,1.318740899 to 2.407176722,1.456071891 to 2.456733663,1.565460537
> +"select_dst_port*" at 1.714194011,1.33133375
> linethick = 1;
> -spline -> from 2.582124039,1.673408942 to 2.582124039,1.673408942 to 2.548331343,1.620641379 to 2.511119148,1.5622417 to 2.477058256,1.5086025 to 2.40638861,1.397100013 to 2.327203741,1.271383138 to 2.265719808,1.173558647
> -"qos?" at 2.621414753,1.42479125
> +spline -> from 2.412752661,1.563643658 to 2.412752661,1.563643658 to 2.381176557,1.514337321 to 2.346405252,1.4597683 to 2.314578544,1.4096475 to 2.24854439,1.305458887 to 2.174553559,1.187988262 to 2.117102592,1.096580453
> +"qos?" at 2.449466147,1.33133375
> linethick = 1;
> -spline -> from 2.742170002,1.67421353 to 2.742170002,1.67421353 to 2.785751852,1.528851298 to 2.848308569,1.320395957 to 2.891890419,1.175100774
> -"interfaces+" at 3.212786933,1.42479125
> +spline -> from 2.562300598,1.56439547 to 2.562300598,1.56439547 to 2.603023748,1.428568102 to 2.661477131,1.233786143 to 2.702200281,1.098021426
> +"interfaces+" at 3.002047967,1.33133375
> linethick = 1;
> -spline -> from 2.160452878,0.83610103 to 2.160452878,0.83610103 to 2.160452878,0.690738798 to 2.160452878,0.4823102766 to 2.160452878,0.3369748642
> -"queues value*" at 2.626108183,0.58667875
> +spline -> from 2.018740522,0.78125797 to 2.018740522,0.78125797 to 2.018740522,0.645430602 to 2.018740522,0.4506737034 to 2.018740522,0.3148713958
> +"queues value*" at 2.453851717,0.54819625
> linethick = 1;
> -spline -> from 3.676631915,3.429757497 to 3.676631915,3.429757497 to 3.372899945,3.369815691 to 3.032089878,3.28406002 to 2.914754128,3.1848275 to 2.814113579,3.09967527 to 2.75705488,2.96021335 to 2.725810046,2.849716598
> -"bridges*" at 3.1848275,3.10101625
> +spline -> from 3.86180764,3.204786603 to 3.86180764,3.204786603 to 3.57799861,3.148776609 to 3.259543577,3.06864598 to 3.149966978,2.9759225 to 3.055927827,2.89635573 to 3.002549175,2.76604165 to 2.973353809,2.662792802
> +"bridges*" at 3.402325206,2.89760875
> linethick = 1;
> -spline -> from 3.710961003,3.350840824 to 3.710961003,3.350840824 to 3.642839219,3.307124876 to 3.581690531,3.252614039 to 3.538712122,3.1848275 to 3.47649065,3.086801862 to 3.476356552,2.95484943 to 3.490637989,2.850789382
> -"capabilities value*" at 4.125390872,3.10101625
> +spline -> from 3.893947603,3.131046376 to 3.893947603,3.131046376 to 3.830231536,3.090197924 to 3.773156475,3.039262661 to 3.732934533,2.9759225 to 3.674794405,2.884326738 to 3.674731754,2.76102957 to 3.688076417,2.663795218
> +"capabilities value*" at 4.281130783,2.89760875
> linethick = 1;
> -spline -> from 4.526209794,3.352114755 to 4.526209794,3.352114755 to 4.593862235,3.304711112 to 4.66057599,3.248457001 to 4.712069622,3.1848275 to 4.790919246,3.087405303 to 4.843150417,2.955452871 to 4.87513279,2.851191676
> -"ssl?" at 4.930917558,3.10101625
> +spline -> from 4.655721112,3.132236745 to 4.655721112,3.132236745 to 4.71887332,3.087942488 to 4.781211065,3.035378299 to 4.829327033,2.9759225 to 4.90306726,2.884890597 to 4.951872389,2.761593429 to 4.981694265,2.664171124
> +"ssl?" at 5.033819897,2.89760875
> linethick = 1;
> -spline -> from 4.735067429,3.351645412 to 4.735067429,3.351645412 to 4.853341865,3.304577014 to 4.976108584,3.24852405 to 5.084526817,3.1848275 to 5.243567045,3.091361194 to 5.403210714,2.956659753 to 5.518602043,2.850185941
> -"manager_options*" at 5.908693125,3.10101625
> +spline -> from 4.850878977,3.131798188 to 4.850878977,3.131798188 to 4.961395341,3.087817186 to 5.076109322,3.03544095 to 5.177415989,2.9759225 to 5.326024161,2.888587006 to 5.475133541,2.762721147 to 5.582955912,2.663231359
> +"manager_options*" at 5.947522081,2.89760875
> .PE
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index e28b053..529c3b9 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -388,6 +388,11 @@
> </p>
> </column>
>
> + <column name="flow_tables">
> + Configuration for OpenFlow tables. Each pair maps from an OpenFlow
> + table ID to configuration for that table.
> + </column>
> +
> <column name="fail_mode">
> <p>When a controller is configured, it is, ordinarily, responsible
> for setting up all flows on the switch. Thus, if the connection to
> @@ -1867,6 +1872,99 @@
> </group>
> </table>
>
> + <table name="Flow_Table" title="OpenFlow table configuration">
> + <p>Configuration for a particular OpenFlow table.</p>
> +
> + <column name="name">
> + The table's name. Set this column to change the name that controllers
> + will receive when they request table statistics, e.g. <code>ovs-ofctl
> + dump-tables</code>. The name does not affect switch behavior.
> + </column>
> +
> + <column name="flow_limit">
> + If set, limits the number of flows that may be added to the table. Open
> + vSwitch may limit the number of flows in a table for other reasons,
> + e.g. due to hardware limitations or for resource availability or
> + performance reasons.
> + </column>
> +
> + <column name="overflow_policy">
> + <p>
> + Controls the switch's behavior when an OpenFlow flow table modification
> + request would add flows in excess of <ref column="flow_limit"/>. The
> + supported values are:
> + </p>
> +
> + <dl>
> + <dt><code>refuse</code></dt>
> + <dd>
> + Refuse to add the flow or flows. This is also the default policy
> + when <ref column="overflow_policy"/> is unset.
> + </dd>
> +
> + <dt><code>evict</code></dt>
> + <dd>
> + Delete the flow that will expire soonest. See <ref column="groups"/>
> + for details.
> + </dd>
> + </dl>
> + </column>
> +
> + <column name="groups">
> + <p>
> + When <ref column="overflow_policy"/> is <code>evict</code>, this
> + controls how flows are chosen for eviction when the flow table would
> + otherwise exceed <ref column="flow_limit"/> flows. Its value is a set
> + of NXM fields or sub-fields, each of which takes one of the forms
> + <code><var>field</var>[]</code> or
> + <code><var>field</var>[<var>start</var>..<var>end</var>]</code>,
> + e.g. <code>NXM_OF_IN_PORT[]</code>. Please see
> + <code>nicira-ext.h</code> for a complete list of NXM field names.
> + </p>
> +
> + <p>
> + When a flow must be evicted due to overflow, the flow to evict is
> + chosen through an approximation of the following algorithm:
> + </p>
> +
> + <ol>
> + <li>
> + Divide the flows in the table into groups based on the values of the
> + specified fields or subfields, so that all of the flows in a given
> + group have the same values for those fields. If a flow does not
> + specify a given field, that field's value is treated as 0.
> + </li>
> +
> + <li>
> + Consider the flows in the largest group, that is, the group that
> + contains the greatest number of flows. If two or more groups all
> + have the same largest number of flows, consider the flows in all of
> + those groups.
> + </li>
> +
> + <li>
> + Among the flows under consideration, choose the flow that expires
> + soonest for eviction.
> + </li>
> + </ol>
> +
> + <p>
> + The eviction process only considers flows that have an idle timeout or
> + a hard timeout. That is, eviction never deletes permanent flows.
> + (Permanent flows do count against <ref column="flow_limit"/>.
> + </p>
> +
> + <p>
> + Open vSwitch ignores any invalid or unknown field specifications.
> + </p>
> +
> + <p>
> + When <ref column="overflow_policy"/> is not <code>evict</code>, this
> + column has no effect.
> + </p>
> + </column>
> + </table>
> +
> <table name="QoS" title="Quality of Service configuration">
> <p>Quality of Service (QoS) configuration for each Port that
> references it.</p>
> --
> 1.7.2.5
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list