[ovs-dev] [PATCH 3/3] classifier: Speed up lookup when metadata partitions the flow table.
Rajahalme, Jarno (NSN - FI/Espoo)
jarno.rajahalme at nsn.com
Wed May 8 08:13:18 UTC 2013
I did not see this pushed yet, so here is a review:
On Mar 27, 2013, at 6:32 , ext Ben Pfaff wrote:
> We have a controller that puts many rules with different metadata values
> into the flow table, where metadata is used (by "resubmit"s) to distinguish
> stages in a pipeline. Thus, any given flow only needs to be hashed into
> classifier "cls_table"s that contain a match for the flow's metadata value.
> This commit optimizes the classifier lookup by (probabilistically) skipping
> the "cls_table"s that can't possibly match.
>
> (The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
> which is a 64-bit field similar in purpose to the "registers" defined by
> Open vSwitch.)
>
> This speeds up flow setup performance with the controller in question by
> about 19%.
>
> Bug #14282.
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/classifier.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> lib/classifier.h | 74 +++++++++++++++++++++++++++++++---
> lib/flow.h | 27 +++++++++++++
> lib/tag.h | 8 +++-
> 4 files changed, 209 insertions(+), 16 deletions(-)
>
> diff --git a/lib/classifier.c b/lib/classifier.c
> index b720378..a01ef5c 100644
> --- a/lib/classifier.c
> +++ b/lib/classifier.c
> @@ -41,7 +41,7 @@ static void update_tables_after_removal(struct classifier *,
> unsigned int del_priority);
>
> static struct cls_rule *find_match(const struct cls_table *,
> - const struct flow *);
> + const struct flow *, tag_type tags);
> static struct cls_rule *find_equal(struct cls_table *,
> const struct miniflow *, uint32_t hash);
> static struct cls_rule *insert_rule(struct classifier *,
> @@ -143,6 +143,7 @@ classifier_init(struct classifier *cls)
> cls->n_rules = 0;
> hmap_init(&cls->tables);
> list_init(&cls->tables_priority);
> + hmap_init(&cls->partitions);
> }
>
> /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
> @@ -151,12 +152,20 @@ void
> classifier_destroy(struct classifier *cls)
> {
> if (cls) {
> + struct cls_table *partition, *next_partition;
> struct cls_table *table, *next_table;
>
> HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) {
> destroy_table(cls, table);
> }
> hmap_destroy(&cls->tables);
> +
> + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node,
> + &cls->partitions) {
> + hmap_remove(&cls->partitions, &partition->hmap_node);
> + free(partition);
> + }
> + hmap_destroy(&cls->partitions);
> }
> }
>
> @@ -174,6 +183,46 @@ classifier_count(const struct classifier *cls)
> return cls->n_rules;
> }
>
> +static uint32_t
> +hash_metadata(ovs_be64 metadata_)
> +{
> + uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
> + return hash_2words(metadata, metadata >> 32);
> +}
> +
> +static struct cls_partition *
> +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
> +{
> + struct cls_partition *partition;
> +
> + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) {
> + if (partition->metadata == metadata) {
> + return partition;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static struct cls_partition *
> +create_partition(struct classifier *cls, struct cls_table *table,
> + ovs_be64 metadata)
> +{
> + uint32_t hash = hash_metadata(metadata);
> + struct cls_partition *partition = find_partition(cls, metadata, hash);
> + if (partition) {
> + partition->n_refs++;
> + partition->tags |= table->tag;
In principle, partition->tags could fill up over time as the bits are left there as rules are removed from the classifier. However, is seems that the rule/table dynamics could be such that this is no problem in practice?
> + } else {
> + partition = xmalloc(sizeof *partition);
> + partition->n_refs = 1;
> + partition->tags = table->tag;
> + partition->metadata = metadata;
> + hmap_insert(&cls->partitions, &partition->hmap_node, hash);
> + }
> + return partition;
> +}
> +
> /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
> * must not modify or free it.
> *
> @@ -200,8 +249,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
>
> old_rule = insert_rule(cls, table, rule);
> if (!old_rule) {
> + if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
> + ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
> + rule->partition = create_partition(cls, table, metadata);
I guess that Andy's comment on masked metadata support would be a generalization to this, where the metadata mask would be made part of the cls_partition, and the hmap would be keyed by the combination of the (masked) metadata and the mask. It would be pretty straightforward, but would be a bit more work. With this generalization this part would look like something like this:
ovs_be64 metadata_mask = minimask_get_metadata_mask(&rule->match.mask);
if (metadata_mask) {
ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
rule->partition = create_partition(cls, table, metadata, metadata_mask);
Corresponding changes would be needed elsewhere.
I have no idea of the performance impact, though.
> + } else {
> + rule->partition = NULL;
> + }
> +
> table->n_table_rules++;
> cls->n_rules++;
> + } else {
> + rule->partition = old_rule->partition;
> }
> return old_rule;
> }
> @@ -225,6 +283,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule)
> void
> classifier_remove(struct classifier *cls, struct cls_rule *rule)
> {
> + struct cls_partition *partition;
> struct cls_rule *head;
> struct cls_table *table;
>
> @@ -242,6 +301,12 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
> hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node);
> }
>
> + partition = rule->partition;
> + if (partition && --partition->n_refs == 0) {
> + hmap_remove(&cls->partitions, &partition->hmap_node);
> + free(partition);
> + }
> +
> if (--table->n_table_rules == 0) {
> destroy_table(cls, table);
> } else {
> @@ -256,12 +321,38 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
> struct cls_rule *
> classifier_lookup(const struct classifier *cls, const struct flow *flow)
> {
> + const struct cls_partition *partition;
> struct cls_table *table;
> struct cls_rule *best;
> + tag_type tags;
> +
> + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then
> + * 'flow' cannot possibly match in 'table':
> + *
> + * - If flow->metadata maps to a given 'partition', then we can use
> + * 'tags' for 'partition->tags'.
> + *
> + * - If flow->metadata has no partition, then no rule in 'cls' has an
> + * exact-match for flow->metadata. That means that we don't need to
> + * search any table that includes flow->metadata in its mask.
> + *
> + * In either case, we always need to search any cls_tables that do not
> + * include flow->metadata in its mask. One way to do that would be to
> + * check the "cls_table"s explicitly for that, but that would require an
> + * extra branch per table. Instead, we mark such a cls_table's 'tags' as
> + * TAG_ALL and make sure that 'tags' is never empty. This means that
> + * 'tags' always intersects such a cls_table's 'tags', so we don't need a
> + * special case.
> + */
> + partition = (hmap_is_empty(&cls->partitions)
> + ? NULL
> + : find_partition(cls, flow->metadata,
> + hash_metadata(flow->metadata)));
> + tags = partition ? partition->tags : TAG_ARBITRARY;
>
> best = NULL;
> LIST_FOR_EACH (table, list_node, &cls->tables_priority) {
> - struct cls_rule *rule = find_match(table, flow);
> + struct cls_rule *rule = find_match(table, flow, tags);
How about keeping the tags check here instead? Like:
LIST_FOR_EACH (table, list_node, &cls->tables_priority) {
if (!tag_intersects(tags, table->tag))
continue;
struct cls_rule *rule = find_match(table, flow);
> if (rule) {
> best = rule;
> LIST_FOR_EACH_CONTINUE (table, list_node, &cls->tables_priority) {
> @@ -270,7 +361,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow)
> * can not find anything better. */
> return best;
> }
> - rule = find_match(table, flow);
> + rule = find_match(table, flow, tags);
And here:
if (!tag_intersects(tags, table->tag))
continue;
rule = find_match(table, flow);
Maybe it could be a little bit faster, in addition to keeping the find_match() intact.
> if (rule && rule->priority > best->priority) {
> best = rule;
> }
> @@ -523,6 +614,7 @@ find_table(const struct classifier *cls, const struct minimask *mask)
> static struct cls_table *
> insert_table(struct classifier *cls, const struct minimask *mask)
> {
> + uint32_t hash = minimask_hash(mask, 0);
> struct cls_table *table;
>
> table = xzalloc(sizeof *table);
> @@ -530,6 +622,9 @@ insert_table(struct classifier *cls, const struct minimask *mask)
> minimask_clone(&table->mask, mask);
> hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0));
> list_push_back(&cls->tables_priority, &table->list_node);
> + table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
> + ? tag_create_deterministic(hash)
> + : TAG_ALL);
>
> return table;
> }
> @@ -635,14 +730,17 @@ update_tables_after_removal(struct classifier *cls, struct cls_table *table,
> }
>
> static struct cls_rule *
> -find_match(const struct cls_table *table, const struct flow *flow)
> +find_match(const struct cls_table *table, const struct flow *flow,
> + tag_type tags)
> {
> - uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0);
> - struct cls_rule *rule;
> + if (tag_intersects(tags, table->tag)) {
> + uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0);
> + struct cls_rule *rule;
>
> - HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) {
> - if (minimatch_matches_flow(&rule->match, flow)) {
> - return rule;
> + HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) {
> + if (minimatch_matches_flow(&rule->match, flow)) {
> + return rule;
> + }
> }
> }
>
> diff --git a/lib/classifier.h b/lib/classifier.h
> index d318864..d0bf1dd 100644
> --- a/lib/classifier.h
> +++ b/lib/classifier.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
> + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -19,17 +19,66 @@
>
> /* Flow classifier.
> *
> - * A classifier is a "struct classifier",
> - * a hash map from a set of wildcards to a "struct cls_table",
> - * a hash map from fixed field values to "struct cls_rule",
> - * which can contain a list of otherwise identical rules
> - * with lower priorities.
> + * Basic Design
> + * ============
> + *
Maybe start with a note that each OpenFlow flow table is represented by a
classifier instance so the difference between OF tables and classifier tables
would be more apparent.
> + * Suppose that all the rules in a classifier had the same form. For example,
> + * suppose that they all matched on the source and destination Ethernet address
> + * and wildcarded all the other fields. Then the obvious way to implement a
> + * classifier would be a hash table on the source and destination Ethernet
> + * addresses. If new classification rules came along with a different form,
> + * you could add a second hash table that hashed on the fields matched in those
> + * rules. With two hash tables, you look up a given flow in each hash table.
> + * If there are no matches, the classifier didn't contain a match; if you find
> + * a match in one of them, that's the result; if you find a match in both of
> + * them, then the result is the rule with the higher priority.
> + *
> + * This is how the classifier works. In a "struct classifier", each form of
> + * "struct cls_rule" present (based on its ->match.mask) goes into a separate
> + * "struct cls_table". A lookup does a hash lookup in every "struct cls_table"
> + * in the classifier and returns the highest-priority match that it finds.
Could add:
"The tables are kept in a descending priority order according to the highest
priority rule in each table. This way the lookups following a found match can
be limited to tables that possibly have higher priority rules than already found."
> + *
> + * One detail: a classifier can contain multiple rules that are identical other
> + * than their priority. When this happens, only the highest priority rule out
> + * of a group of otherwise identical rules is stored directly in the "struct
> + * cls_table", with the other almost-identical rules chained off a linked list
> + * inside that highest-priority rule.
> + *
> + *
> + * Partitioning
> + * ============
> + *
> + * Suppose that a given classifier is being used to handle multiple stages in a
> + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field
> + * named "metadata") distinguishing between the different stages. For example,
> + * metadata value 1 might identify ingress rules, metadata value 2 might
> + * identify ACLs, and metadata value 3 might identify egress rules. Such a
> + * classifier is essentially partitioned into multiple sub-classifiers on the
> + * basis of the metadata value.
> + *
> + * The classifier has a special optimization to speed up matching in this
> + * scenario:
> + *
> + * - Each cls_table that matches on metadata gets a random tag (see tag.h).
To me the word "random" caused some confusion at first, so maybe put it like
this instead:
"Each cls_table that matches on metadata gets a tag derived from the table's
mask, so that it is highly likely that each table has a unique tag."
And maybe add that the possible non-unique table tags lead to some
harmless checking of more tables than needed.
> + *
> + * - For each metadata value matched by any cls_rule, the classifier
> + * constructs a "struct cls_partition" indexed by the metadata value.
> + * The cls_partition has a 'tags' member whose value is the bitwise-OR of
> + * the tags of each cls_table that contains any rule that matches on the
> + * cls_partition's metadata value.
So in other words 'struct cls_partition' associates metadata values with tables that
need to be checked with flows with that specific metadata value.
> + *
> + * Thus, a flow lookup can start by looking up the partition associated with
> + * the flow's metadata, and then skip over any cls_table whose 'tag' does not
> + * intersect the partition's 'tags'. (The flow must also be looked up in any
> + * cls_table that doesn't match on metadata. We handle that by giving any such
> + * cls_table TAG_ALL as its 'tags' so that it matches any tag.)
> */
>
> #include "flow.h"
> #include "hmap.h"
> #include "list.h"
> #include "match.h"
> +#include "tag.h"
> #include "openflow/nicira-ext.h"
> #include "openflow/openflow.h"
>
> @@ -42,6 +91,7 @@ struct classifier {
> int n_rules; /* Total number of rules. */
> struct hmap tables; /* Contains "struct cls_table"s. */
> struct list tables_priority; /* Tables in descending priority order */
> + struct hmap partitions; /* Contains "struct cls_partition"s. */
> };
>
> /* A set of rules that all have the same fields wildcarded. */
> @@ -53,6 +103,7 @@ struct cls_table {
> int n_table_rules; /* Number of rules, including duplicates. */
> unsigned int max_priority; /* Max priority of any rule in the table. */
> unsigned int max_count; /* Count of max_priority rules. */
> + tag_type tag; /* Tag generated from mask for partitioning. */
> };
>
> /* Returns true if 'table' is a "catch-all" table that will match every
> @@ -69,6 +120,17 @@ struct cls_rule {
> struct list list; /* List of identical, lower-priority rules. */
> struct minimatch match; /* Matching rule. */
> unsigned int priority; /* Larger numbers are higher priorities. */
> + struct cls_partition *partition;
> +};
> +
> +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
> + * field) with tags for the "cls_table"s that contain rules that match that
> + * metadata value. */
> +struct cls_partition {
> + struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */
> + ovs_be64 metadata; /* metadata value for this partition. */
> + unsigned int n_refs; /* # of flows that refer to this partition. */
> + tag_type tags; /* OR of each included flow's cls_table tag. */
> };
>
> void cls_rule_init(struct cls_rule *, const struct match *,
> diff --git a/lib/flow.h b/lib/flow.h
> index 09a472d..9d8803a 100644
> --- a/lib/flow.h
> +++ b/lib/flow.h
> @@ -21,6 +21,7 @@
> #include <stdbool.h>
> #include <stdint.h>
> #include <string.h>
> +#include "byte-order.h"
> #include "openflow/nicira-ext.h"
> #include "openflow/openflow.h"
> #include "hash.h"
> @@ -259,6 +260,7 @@ void miniflow_expand(const struct miniflow *, struct flow *);
>
> uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs);
> uint16_t miniflow_get_vid(const struct miniflow *);
> +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
>
> bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
> bool miniflow_equal_in_minimask(const struct miniflow *a,
> @@ -291,11 +293,36 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *);
>
> uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs);
> uint16_t minimask_get_vid_mask(const struct minimask *);
> +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);
>
> bool minimask_equal(const struct minimask *a, const struct minimask *b);
> uint32_t minimask_hash(const struct minimask *, uint32_t basis);
>
> bool minimask_has_extra(const struct minimask *, const struct minimask *);
> bool minimask_is_catchall(const struct minimask *);
> +
> +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
> +static inline ovs_be64
> +miniflow_get_metadata(const struct miniflow *flow)
> +{
> + enum { MD_OFS = offsetof(struct flow, metadata) };
> + BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0);
> + ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4);
> + ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1);
> +
> + return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo));
> +}
> +
> +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
> + *
> + * The return value is all-1-bits if 'mask' matches on the whole value of the
> + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field,
> + * or some other value if the metadata field is partially matched, partially
> + * wildcarded. */
> +static inline ovs_be64
> +minimask_get_metadata_mask(const struct minimask *mask)
> +{
> + return miniflow_get_metadata(&mask->masks);
> +}
>
> #endif /* flow.h */
> diff --git a/lib/tag.h b/lib/tag.h
> index 9d6b4aa..6fec24b 100644
> --- a/lib/tag.h
> +++ b/lib/tag.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2008, 2011, 2012 Nicira, Inc.
> + * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -68,6 +68,12 @@
> /* Represents a tag, or the combination of 0 or more tags. */
> typedef uint32_t tag_type;
>
> +/* A 'tag_type' value that intersects every tag. */
> +#define TAG_ALL UINT32_MAX
> +
> +/* An arbitrary tag. */
> +#define TAG_ARBITRARY UINT32_C(3)
> +
> tag_type tag_create_random(void);
> tag_type tag_create_deterministic(uint32_t seed);
> static inline bool tag_intersects(tag_type, tag_type);
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list