[ovs-dev] [PATCH v3 2/2] Classifier: Track address prefixes.

Jarno Rajahalme jrajahalme at nicira.com
Thu Nov 21 22:25:31 UTC 2013


Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a flow being classified.
This enables less unwildcarding for datapath flows in parts of the
address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "fieldspec" is
added to the database table "Flow_Table".  "fieldspec" is a string map
with only the "prefix" key defined so far (other keys can be added in
future):

"prefix"   A comma separated list of field names for which prefix
           lookup should be used.

The fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- metadata
- reg0 - reg7
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 fieldspec:prefix=tun_id
ovs-vsctl set Flow_Table table1 fieldspec:prefix=ip_dst,ip_src

ovs-vsctl remove Flow_Table table1 fieldspec prefix

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
 lib/classifier.c           |  694 +++++++++++++++++++++++++++++++++++++++++++-
 lib/classifier.h           |   70 ++++-
 lib/flow.c                 |   12 +-
 lib/meta-flow.c            |   56 ++++
 lib/meta-flow.h            |    3 +
 lib/ofp-util.h             |    2 +-
 ofproto/ofproto.c          |    6 +-
 ofproto/ofproto.h          |    8 +
 tests/classifier.at        |    3 +-
 tests/ofproto-dpif.at      |    6 +-
 tests/test-classifier.c    |   17 +-
 vswitchd/bridge.c          |   60 ++++
 vswitchd/vswitch.ovsschema |    9 +-
 13 files changed, 917 insertions(+), 29 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 33ade96..2f0970b 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -27,6 +27,7 @@
 #include "packets.h"
 #include "ovs-thread.h"
 
+struct trie_ctx;
 static struct cls_subtable *find_subtable(const struct classifier *,
                                           const struct minimask *);
 static struct cls_subtable *insert_subtable(struct classifier *,
@@ -42,7 +43,8 @@ static void update_subtables_after_removal(struct classifier *,
                                            unsigned int del_priority);
 
 static struct cls_rule *find_match_wc(const struct cls_subtable *,
-                                      const struct flow *,
+                                      const struct flow *, struct trie_ctx *,
+                                      unsigned int n_tries,
                                       struct flow_wildcards *);
 static struct cls_rule *find_equal(struct cls_subtable *,
                                    const struct miniflow *, uint32_t hash);
@@ -59,6 +61,22 @@ static struct cls_rule *insert_rule(struct classifier *,
 
 static struct cls_rule *next_rule_in_list__(struct cls_rule *);
 static struct cls_rule *next_rule_in_list(struct cls_rule *);
+
+static uint8_t minimask_get_prefix_len(const struct minimask *,
+                                       const struct mf_field *,
+                                       unsigned int *);
+static void trie_init(struct classifier *, int trie_idx,
+                      const struct mf_field *);
+static uint8_t trie_lookup(const struct cls_trie *, const struct flow *,
+                           uint8_t *checkbits);
+
+static void trie_destroy(struct trie_node *);
+static void trie_insert(struct cls_trie *, const struct cls_rule *);
+static void trie_remove(struct cls_trie *, const struct cls_rule *);
+static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t u32ofs,
+                                 uint8_t nbits);
+static bool mask_prefix_bits_set(const struct flow_wildcards *,
+                                 uint8_t u32ofs, uint8_t nbits);
 
 /* cls_rule. */
 
@@ -164,6 +182,7 @@ classifier_init(struct classifier *cls, const uint8_t *flow_segments)
             cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
         }
     }
+    cls->n_tries = 0;
 }
 
 /* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
@@ -174,6 +193,13 @@ classifier_destroy(struct classifier *cls)
     if (cls) {
         struct cls_subtable *partition, *next_partition;
         struct cls_subtable *subtable, *next_subtable;
+        int i;
+
+        for (i = 0; i < cls->n_tries; i++) {
+            if (cls->tries[i].root) {
+                trie_destroy(cls->tries[i].root);
+            }
+        }
 
         HMAP_FOR_EACH_SAFE (subtable, next_subtable, hmap_node,
                             &cls->subtables) {
@@ -191,6 +217,79 @@ classifier_destroy(struct classifier *cls)
     }
 }
 
+void
+classifier_set_prefix_fields(struct classifier *cls,
+                             const enum mf_field_id *trie_fields,
+                             unsigned int n_fields)
+{
+    int i, trie;
+
+    for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) {
+        const struct mf_field *field = mf_from_id(trie_fields[i]);
+        if (!field || field->flow_u32ofs < 0) {
+            /* Non-existing or incompatible field. */
+            continue;
+        }
+        if (trie >= cls->n_tries || field != cls->tries[trie].field) {
+            if (trie < cls->n_tries && cls->tries[trie].root) {
+                trie_destroy(cls->tries[trie].root);
+            }
+            trie_init(cls, trie, field);
+        }
+        trie++;
+    }
+
+    /* Destroy the rest. */
+    for (i = trie; i < cls->n_tries; i++) {
+        struct cls_subtable *subtable;
+
+        if (cls->tries[i].root) {
+            trie_destroy(cls->tries[i].root);
+            cls->tries[i].root = NULL;
+        }
+        cls->tries[i].field = NULL;
+
+        /* Remove the field plen from subtables. */
+        LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) {
+            subtable->trie_plen[i] = 0;
+        }
+    }
+    cls->n_tries = trie;
+}
+
+static void
+trie_init(struct classifier *cls, int trie_idx,
+          const struct mf_field *field)
+{
+    struct cls_trie *trie = &cls->tries[trie_idx];
+    struct cls_subtable *subtable;
+
+    trie->root = NULL;
+    trie->field = field;
+
+    /* Add existing rules to the trie. */
+    LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) {
+        struct cls_rule *head;
+
+        /* Initialize subtable's prefix length on this field. */
+        subtable->trie_plen[trie_idx]
+            = minimask_get_prefix_len(&subtable->mask, field, NULL);
+
+        if (!subtable->trie_plen[trie_idx]) {
+            /* Field not in this subtable. */
+            continue;
+        }
+
+        HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
+            struct cls_rule *rule;
+            FOR_EACH_RULE_IN_LIST (rule, head) {
+                trie_insert(trie, rule);
+            }
+        }
+    }
+}
+
+
 /* Returns true if 'cls' contains no classification rules, false otherwise. */
 bool
 classifier_is_empty(const struct classifier *cls)
@@ -269,6 +368,8 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
 
     old_rule = insert_rule(cls, subtable, rule);
     if (!old_rule) {
+        int i;
+
         if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
             ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
             rule->partition = create_partition(cls, subtable, metadata);
@@ -278,6 +379,10 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
 
         subtable->n_rules++;
         cls->n_rules++;
+
+        for (i = 0; i < cls->n_tries; i++) {
+            trie_insert(&cls->tries[i], rule);
+        }
     } else {
         rule->partition = old_rule->partition;
     }
@@ -343,9 +448,34 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
     } else {
         update_subtables_after_removal(cls, subtable, rule->priority);
     }
+
+    for (i = 0; i < cls->n_tries; i++) {
+        trie_remove(&cls->tries[i], rule);
+    }
+
     cls->n_rules--;
 }
 
+/* Prefix tree context.  Valid when 'lookup_done' is true.  Can skip all
+ * subtables which have more than 'match_plen' bits in their corresponding
+ * field at offset 'u32ofs'.  If skipped, 'maskbits' prefix bits should be
+ * unwildcarded to quarantee datapath flow matches only packets it should. */
+struct trie_ctx {
+    const struct cls_trie *trie;
+    bool lookup_done;     /* Status of the lookup. */
+    uint8_t u32ofs;       /* U32 offset of the field in question. */
+    uint8_t match_plen;   /* Longest prefix than could possibly match. */
+    uint8_t maskbits;     /* Prefix length needed to avoid false matches. */
+};
+
+static void
+trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
+{
+    ctx->trie = trie;
+    ctx->u32ofs = trie->field->flow_u32ofs;
+    ctx->lookup_done = false;
+}
+
 /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
  * Returns a null pointer if no rules in 'cls' match 'flow'.  If multiple rules
  * of equal priority match 'flow', returns one arbitrarily.
@@ -362,6 +492,8 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
     struct cls_subtable *subtable;
     struct cls_rule *best;
     tag_type tags;
+    struct trie_ctx trie_ctx[CLS_MAX_TRIES];
+    int i;
 
     /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
      * then 'flow' cannot possibly match in 'subtable':
@@ -387,6 +519,10 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
                                   hash_metadata(flow->metadata)));
     tags = partition ? partition->tags : TAG_ARBITRARY;
 
+    /* Initialize trie contexts for match_find_wc(). */
+    for (i = 0; i < cls->n_tries; i++) {
+        trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
+    }
     best = NULL;
     LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) {
         struct cls_rule *rule;
@@ -395,7 +531,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
             continue;
         }
 
-        rule = find_match_wc(subtable, flow, wc);
+        rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc);
         if (rule) {
             best = rule;
             LIST_FOR_EACH_CONTINUE (subtable, list_node,
@@ -409,7 +545,8 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
                     continue;
                 }
 
-                rule = find_match_wc(subtable, flow, wc);
+                rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries,
+                                     wc);
                 if (rule && rule->priority > best->priority) {
                     best = rule;
                 }
@@ -417,6 +554,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
             break;
         }
     }
+
     return best;
 }
 
@@ -708,6 +846,12 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
                      ? tag_create_deterministic(hash)
                      : TAG_ALL);
 
+    for (i = 0; i < cls->n_tries; i++) {
+        subtable->trie_plen[i] = minimask_get_prefix_len(mask,
+                                                         cls->tries[i].field,
+                                                         NULL);
+    }
+
     return subtable;
 }
 
@@ -820,6 +964,65 @@ update_subtables_after_removal(struct classifier *cls,
     }
 }
 
+struct range {
+    uint8_t start;
+    uint8_t end;
+};
+
+/* Return 'true' if can skip rest of the subtable based on the prefix trie
+ * lookup results. */
+static inline bool
+check_tries(const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
+            const struct range *tries, const uint8_t field_plen[CLS_MAX_TRIES],
+            const struct range *ofs, struct flow_wildcards *wc)
+{
+    int j;
+    /* Check if we could avoid fully unwildcarding the next level of
+     * fields using the prefix tries.  The trie checks are done only as
+     * needed to avoid folding in additional bits to the wildcards mask. */
+    for (j = tries->start; j < tries->end; j++) {
+        struct trie_ctx *ctx = &trie_ctx[j];
+
+        /* Is the trie field relevant for this subtable and range of fields? */
+        if (field_plen[j] &&
+            ctx->u32ofs >= ofs->start && ctx->u32ofs < ofs->end) {
+            /* On-demand trie lookup. */
+            if (!ctx->lookup_done) {
+                ctx->match_plen = trie_lookup(ctx->trie, flow, &ctx->maskbits);
+                ctx->lookup_done = true;
+            }
+            /* Possible to skip the rest of the subtable if subtable's prefix
+             * on the field is longer than what is known to match based on the
+             * trie lookup. */
+            if (field_plen[j] > ctx->match_plen) {
+                /* RFC: We want the trie lookup to never result in
+                 * unwildcarding any bits that would not be unwildcarded
+                 * otherwise.  Since the trie is shared by the whole
+                 * classifier, it is possible that the 'maskbits' contain bits
+                 * that are irrelevant for the partition of the classifier
+                 * relevant for the current flow. */
+
+                /* Can skip if the field is already unwildcarded. */
+                if (mask_prefix_bits_set(wc, ctx->u32ofs, ctx->maskbits)) {
+                    return true;
+                }
+                /* Check that the trie result will not unwildcard more bits
+                 * than this stage will. */
+                if (ctx->maskbits <= field_plen[j]) {
+                    /* Unwildcard the bits and skip the rest. */
+                    mask_set_prefix_bits(wc, ctx->u32ofs, ctx->maskbits);
+                    /* Note: Prerequisite already unwildcarded, as the only
+                     * prerequisite of the supported trie lookup fields is
+                     * the ethertype, which is currently always unwildcarded.
+                     */
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
 static inline struct cls_rule *
 find_match(const struct cls_subtable *subtable, const struct flow *flow,
            uint32_t hash)
@@ -837,32 +1040,50 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow,
 
 static struct cls_rule *
 find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
-              struct flow_wildcards * wc)
+              struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
+              struct flow_wildcards *wc)
 {
     uint32_t basis = 0, hash;
     struct cls_rule *rule = NULL;
-    uint8_t prev_u32ofs = 0;
     int i;
+    struct range tries, ofs;
 
     if (!wc) {
         return find_match(subtable, flow,
                           flow_hash_in_minimask(flow, &subtable->mask, 0));
     }
 
+    /* Skip to the first relevant trie. */
+    for (tries.start = 0; tries.start < n_tries; tries.start++) {
+        if (subtable->trie_plen[tries.start]) {
+            break;
+        }
+    }
+    /* Trim irrelevant tries from the end. */
+    for (tries.end = n_tries; tries.end > tries.start; tries.end--) {
+        if (subtable->trie_plen[tries.end - 1]) {
+            break;
+        }
+    }
+
+    ofs.start = 0;
     /* Try to finish early by checking fields in segments. */
     for (i = 0; i < subtable->n_indices; i++) {
         struct hindex_node *inode;
+        ofs.end = subtable->index_ofs[i];
 
-        hash = flow_hash_in_minimask_range(flow, &subtable->mask, prev_u32ofs,
-                                           subtable->index_ofs[i], &basis);
-        prev_u32ofs = subtable->index_ofs[i];
+        if (check_tries(flow, trie_ctx, &tries, subtable->trie_plen, &ofs,
+                        wc)) {
+            goto range_out;
+        }
+        hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
+                                           ofs.end, &basis);
+        ofs.start = ofs.end;
         inode = hindex_node_with_hash(&subtable->indices[i], hash);
         if (!inode) {
             /* No match, can stop immediately, but must fold in the mask
              * covered so far. */
-            flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0,
-                                               prev_u32ofs);
-            return NULL;
+            goto range_out;
         }
 
         /* If we have narrowed down to a single rule already, check whether
@@ -884,11 +1105,15 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
             }
         }
     }
-
+    ofs.end = FLOW_U32S;
+    /* Trie check for the final range. */
+    if (check_tries(flow, trie_ctx, &tries, subtable->trie_plen, &ofs, wc)) {
+        goto range_out;
+    }
     if (!rule) {
         /* Multiple potential matches exist, look for one. */
-        hash = flow_hash_in_minimask_range(flow, &subtable->mask, prev_u32ofs,
-                                           FLOW_U32S, &basis);
+        hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
+                                           ofs.end, &basis);
         rule = find_match(subtable, flow, hash);
     } else {
         /* We already narrowed the matching candidates down to just 'rule',
@@ -896,8 +1121,16 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
         rule = NULL;
     }
  out:
+    /* Must unwildcard all the fields, as they were looked at. */
     flow_wildcards_fold_minimask(wc, &subtable->mask);
     return rule;
+
+ range_out:
+    /* Must unwildcard the fields looked up so far, if any. */
+    if (ofs.start) {
+        flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start);
+    }
+    return NULL;
 }
 
 static struct cls_rule *
@@ -992,3 +1225,436 @@ next_rule_in_list(struct cls_rule *rule)
     struct cls_rule *next = next_rule_in_list__(rule);
     return next->priority < rule->priority ? next : NULL;
 }
+
+/* A longest-prefix match tree. */
+/* Max bits per node. Also tested with 16, 8, and 5. */
+#define TRIE_PREFIX_BITS 32
+
+struct trie_node {
+    uint32_t prefix;           /* Prefix bits for this node, MSB first. */
+    uint8_t  nbits;            /* Never zero, except for the root node. */
+    unsigned int n_rules;      /* Number of rules that have this prefix. */
+    struct trie_node *edges[2]; /* both NULL if leaf. */
+};
+
+/* Return min(TRIE_PREFIX_BITS,plen) bits of the 'prefix', starting at bit
+ *  offset 'ofs'. */
+static uint32_t
+trie_get_prefix(ovs_be32 *pr, const unsigned int ofs, const unsigned int plen)
+{
+    uint32_t prefix;
+    const unsigned int keep = MIN(plen, TRIE_PREFIX_BITS);
+
+    if (!plen) {
+        return 0;
+    }
+    pr += ofs / 32; /* Where to start. */
+    prefix = ntohl(*pr) << ofs % 32;
+    if (ofs % 32 > 32 - TRIE_PREFIX_BITS && plen > 32 - ofs % 32) {
+        /* Have space and need more than we have already. */
+        prefix |= ntohl(*++pr) >> (32 - ofs % 32);
+    }
+    if (keep < 32) { /* Clear extra bits */
+        prefix &= ~0u << (32 - keep);
+    }
+    return prefix;
+}
+
+/* Return the bit at (0-based) offset 'ofs' as an int. */
+static unsigned int
+be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
+{
+    return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1;
+}
+
+/* Return the bit at (0-based) offset 'ofs' as an int. */
+static unsigned int
+get_bit_at(const uint32_t prefix, unsigned int ofs)
+{
+    return (prefix >> (31 - ofs)) & 1;
+}
+
+/* Create new branch. */
+static struct trie_node *
+trie_branch_create(ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
+                   unsigned int n_rules)
+{
+    struct trie_node *node = xmalloc(sizeof *node);
+
+    node->prefix = trie_get_prefix(prefix, ofs, plen);
+
+    if (plen <= TRIE_PREFIX_BITS) {
+        node->nbits = plen;
+        node->edges[0] = NULL;
+        node->edges[1] = NULL;
+        node->n_rules = n_rules;
+    } else { /* Need intermediate nodes. */
+        struct trie_node *subnode = trie_branch_create(prefix,
+                                                       ofs + TRIE_PREFIX_BITS,
+                                                       plen - TRIE_PREFIX_BITS,
+                                                       n_rules);
+        int bit = get_bit_at(subnode->prefix, 0);
+        node->nbits = TRIE_PREFIX_BITS;
+        node->edges[bit] = subnode;
+        node->edges[!bit] = NULL;
+        node->n_rules = 0;
+    }
+    return node;
+}
+
+static void
+trie_node_destroy(struct trie_node *node)
+{
+    ovs_assert(!node->n_rules);
+    free(node);
+}
+
+static void
+trie_destroy(struct trie_node *node)
+{
+    if (node->edges[0]) {
+        trie_destroy(node->edges[0]);
+    }
+    if (node->edges[1]) {
+        trie_destroy(node->edges[1]);
+    }
+    free(node);
+}
+
+static bool
+trie_is_leaf(const struct trie_node *trie) {
+    return !trie->edges[0] && !trie->edges[1]; /* No children. */
+};
+
+/* Return the number of equal bits in 'node' prefix and a 'value' starting
+ * at bit 'ofs'. */
+static unsigned int
+trie_equal_bits(const struct trie_node *node, const ovs_be32 value[],
+                const unsigned int ofs)
+{
+    uint32_t diff = node->prefix ^ ntohl(value[ofs / 32]) << ofs % 32;
+
+    ovs_assert(node->nbits <= TRIE_PREFIX_BITS);
+
+    if (node->nbits < 32) {
+        return raw_clz(diff | ~0u >> node->nbits) - 32;
+    }
+    return clz(diff);
+}
+
+/* Return the number of equal bits in 'node' prefix and a 'prefix' starting
+ * at bit 'ofs', of length 'plen'. */
+static unsigned int
+trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
+                       const unsigned int ofs, const unsigned int plen)
+{
+    uint32_t diff = node->prefix ^ ntohl(prefix[ofs / 32]) << ofs % 32;
+    int bits = MIN(node->nbits, plen - ofs);
+
+    ovs_assert(node->nbits <= TRIE_PREFIX_BITS);
+
+    if (bits < 32) {
+        return raw_clz(diff | ~0u >> bits) - 32;
+    }
+    return clz(diff);
+}
+
+static void
+mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t u32ofs, uint8_t nbits)
+{
+    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[u32ofs];
+    unsigned int i;
+
+    for (i = 0; i < nbits / 32; i++) {
+        mask[i] = htonl(~0);
+    }
+    if (nbits % 32) {
+        mask[i] |= htonl(~0u << (32 - nbits % 32));
+    }
+}
+
+static bool
+mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t u32ofs,
+                     uint8_t nbits)
+{
+    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[u32ofs];
+    unsigned int i;
+    ovs_be32 zeroes = 0;
+
+    for (i = 0; i < nbits / 32; i++) {
+        zeroes |= ~mask[i];
+    }
+    if (nbits % 32) {
+        zeroes |= ~mask[i] & htonl(~0u << (32 - nbits % 32));
+    }
+
+    return !zeroes; /* All 'nbits' bits set. */
+}
+
+/* Return the prefix mask length necessary to find the longest-prefix match for
+ * the '*value' in the prefix tree 'node'.
+ * '*checkbits' is set to the number of bits in the prefix mask necessary to
+ * determine a mismatch, in case there are longer prefixes in the tree below
+ * the one that matched. */
+static uint8_t
+trie_lookup_value(const struct trie_node *node, const ovs_be32 *value,
+                  uint8_t *checkbits)
+{
+    unsigned int plen = 0, match_len = 0;
+
+    for (;;) {
+        unsigned int eqbits;
+        /* Check if this edge can be followed. */
+        eqbits = trie_equal_bits(node, value, plen);
+        plen += eqbits;
+        if (eqbits < node->nbits) { /* Mismatch, nothing more to be found. */
+            /* Bit at offset plen differed. */
+            *checkbits = plen + 1; /* Includes the one mismatching bit. */
+            break;
+        }
+        /* Full match, check if rules exist at this prefix length. */
+        if (node->n_rules > 0) {
+            match_len = plen;
+        }
+        if (trie_is_leaf(node)) {
+            *checkbits = plen;
+            break;
+        }
+        node = node->edges[be_get_bit_at(value, plen)];
+        if (!node) {
+            /* Dead end, but the other branch exists (since !leaf) */
+            *checkbits = plen + 1;
+            break;
+        }
+    }
+    return match_len;
+}
+
+static uint8_t
+trie_lookup(const struct cls_trie *trie, const struct flow *flow,
+            uint8_t *checkbits)
+{
+    const struct mf_field *mf = trie->field;
+
+    /* Check that can skip based on this field, and that the current flow
+     * matches the prerequisites for the field.  Some match
+     * fields are used for multiple purposes, so we should try to
+     * use the trie only when prerequisities are met. */
+    return (!trie->root || !mf || !mf_are_prereqs_ok(mf, flow)) ? UINT8_MAX
+        : trie_lookup_value(trie->root, &((ovs_be32 *)flow)[mf->flow_u32ofs],
+                            checkbits);
+}
+
+/* Returns the length of a longest-prefix match mask for the field 'mf' in
+ * 'match'. When the returned length is non-zero, returns the u32 offset to
+ * the miniflow data in '*miniflow_index', if 'miniflow_index' is not NULL.
+ */
+static uint8_t
+minimask_get_prefix_len(const struct minimask *minimask,
+                        const struct mf_field *mf,
+                        unsigned int *miniflow_index)
+{
+    const struct miniflow *mflow = &minimask->masks;
+    unsigned int u32_ofs, u32_end;
+    unsigned int ofs32;
+    uint8_t nbits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
+
+    if (!mf || mf->n_bytes % 4) {
+        return 0;
+    }
+
+    u32_ofs = mf->flow_u32ofs;
+
+    /* Scan to the starting offset of the mask data. */
+    ofs32 = count_1bits(mflow->map & ((UINT64_C(1) << u32_ofs) - 1));
+
+    if (miniflow_index) {
+        *miniflow_index = ofs32; /* Starting miniflow data offset. */
+    }
+
+    u32_end = u32_ofs + mf->n_bytes / 4;
+
+    for (; u32_ofs < u32_end; ++u32_ofs) {
+        uint32_t mask = mflow->map & (UINT64_C(1) << u32_ofs)
+            ? ntohl((OVS_FORCE ovs_be32)mflow->values[ofs32++]) : 0;
+
+        /* Validate mask, count the mask length. */
+        if (mask_tz) {
+            if (mask) {
+                return 0; /* No bits allowed after mask ended. */
+            }
+        } else {
+            if (mask) {
+                mask_tz = raw_ctz(mask);
+                if (~mask & (~0u << mask_tz)) {
+                    return 0; /* Mask not contiguous. */
+                }
+                nbits += 32 - mask_tz;
+            } else {
+                mask_tz = 32; /* End of mask seen. */
+            }
+        }
+    }
+    return nbits;
+}
+
+/* Insert rule in to the prefix tree. */
+static void
+trie_insert(struct cls_trie *trie, const struct cls_rule *rule)
+{
+    struct trie_node *node;
+    const struct mf_field *mf = trie->field;
+    int ofs, mlen;
+    ovs_be32 *prefix;
+    struct trie_node **edge;
+    unsigned int miniflow_index;
+
+    if (!mf) {
+        return;
+    }
+
+    ovs_assert(mf->flow_u32ofs >= 0 && mf->n_bits % 32 == 0);
+
+    /* Get mask length and location in the miniflow.
+     * Note: flow and mask have data at same offsets!
+     */
+    mlen = minimask_get_prefix_len(&rule->match.mask, mf, &miniflow_index);
+    if (!mlen) {
+        return; /* Rule's mask not compatible or zero length. */
+    }
+    prefix = (OVS_FORCE ovs_be32 *)&rule->match.flow.values[miniflow_index];
+
+    if (!trie->root) {
+        /* Insert as the lone rule in the tree. */
+        trie->root = trie_branch_create(prefix, 0, mlen, 1);
+        return;
+    }
+    /* Walk the tree. */
+    edge = &trie->root; /* Edge most recently followed. */
+    node = *edge;
+    ofs = 0;
+    while (node) {
+        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
+
+        /* Only the root may have no prefix */
+        ovs_assert(edge == &trie->root || node->nbits);
+
+        ofs += eqbits;
+
+        if (eqbits < node->nbits) {
+            /* Mismatch, new node needs to be inserted above. */
+            int old_branch = get_bit_at(node->prefix, eqbits);
+
+            /* New parent node. */
+            *edge = trie_branch_create(prefix, ofs - eqbits, eqbits,
+                                       ofs == mlen ? 1 : 0);
+
+            /* Adjust old node for it's new position in the tree. */
+            node->prefix <<= eqbits;
+            node->nbits -= eqbits;
+            (*edge)->edges[old_branch] = node;
+
+            /* Check if need a new branch for the new rule. */
+            if (ofs < mlen) {
+                (*edge)->edges[!old_branch]
+                    = trie_branch_create(prefix, ofs, mlen - ofs, 1);
+            }
+            return;
+        }
+
+        /* Full match so far. */
+
+        if (ofs == mlen) {
+            /* Full match at the current node, rule needs to be added here. */
+            node->n_rules++;
+            return;
+        }
+        /* Go deeper. */
+        edge = &node->edges[be_get_bit_at(prefix, ofs)];
+        if (!*edge) {
+            /* Must insert a new tree branch for the new rule. */
+            *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1);
+            return;
+        }
+        node = *edge;
+
+        ovs_assert(ofs < mlen);
+    }
+}
+
+static void
+trie_remove(struct cls_trie *trie, const struct cls_rule *rule)
+{
+    struct trie_node *node;
+    const struct mf_field *mf = trie->field;
+    int ofs, mlen;
+    ovs_be32 *prefix;
+    struct trie_node **edge;
+    unsigned int miniflow_index;
+
+    if (!mf || !trie->root) {
+        return; /* Nothing to be done. */
+    }
+
+    ovs_assert(mf->flow_u32ofs >= 0 && mf->n_bits % 32 == 0);
+
+    /* Get mask length and location in the miniflow. */
+    mlen = minimask_get_prefix_len(&rule->match.mask, mf, &miniflow_index);
+    if (!mlen) {
+        return; /* Rule's mask not compatible or zero length. */
+    }
+    prefix = (OVS_FORCE ovs_be32 *)&rule->match.flow.values[miniflow_index];
+
+    /* Walk the tree. */
+    edge = &trie->root; /* Edge most recently followed. */
+    node = *edge;
+    ofs = 0;
+    while (node) {
+        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
+
+        if (eqbits < node->nbits) {
+            /* Mismatch, nothing to be removed.  This should never happen, as
+             * only rules in the classifier are ever removed. */
+            ovs_assert(eqbits == node->nbits);
+            return;
+        }
+
+        /* Full match so far. */
+        ofs += eqbits;
+
+        if (ofs == mlen) {
+            /* Full match at the current node, rule needs to be added here. */
+            if (node->n_rules) {
+                node->n_rules--;
+            }
+            if (!node->n_rules && /* Remove this node from the tree? */
+                !(node->edges[0] && node->edges[1])) {
+                /* No rules and at most one child node, remove this node. */
+                struct trie_node *next;
+                next = node->edges[0] ? node->edges[0] : node->edges[1];
+
+                if (next) {
+                    if (node->nbits + next->nbits > TRIE_PREFIX_BITS) {
+                        return; /* Cannot combine. */
+                    }
+                    /* Combine node with next. */
+                    next->prefix = node->prefix | next->prefix >> node->nbits;
+                    next->nbits += node->nbits;
+                }
+                *edge = next;
+                trie_node_destroy(node);
+            }
+            return;
+        }
+        /* Go deeper. */
+        edge = &node->edges[be_get_bit_at(prefix, ofs)];
+        if (!*edge) {
+            /* Cannot go deeper. This should never happen, since only rules
+             * that actually exist in the classifier are ever removed. */
+            ovs_assert(*edge);
+            return;
+        }
+        node = *edge;
+        ovs_assert(ofs < mlen);
+    }
+}
diff --git a/lib/classifier.h b/lib/classifier.h
index 3c3e7d1..5ab0fa4 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -71,6 +71,54 @@
  * logic is that as many fields as possible can remain wildcarded.
  *
  *
+ * Prefix Tracking
+ * ===============
+ *
+ * Classifier uses prefix trees ("tries") for tracking the used
+ * address space, enabling skipping classifier tables containing
+ * longer masks than necessary for the given address.  This enables
+ * less unwildcarding for datapath flows in parts of the address space
+ * without host routes.
+ *
+ * Trie lookup is interwoven with staged lookup, so that a trie is
+ * searched only when the configured trie field becomes relevant for
+ * the lookup.  The trie lookup results are retained so that each trie
+ * is checked at most once for each classifier lookup.
+ *
+ * This implementation tracks the number of rules at each address
+ * prefix for the whole classifier.  More aggressive table skipping
+ * would be possible by maintaining lists of tables that have prefixes
+ * at the lengths encountered on tree traversal, or by maintaining
+ * separate tries for subsets of rules separated by metadata fields.
+ *
+ * Prefix tracking is configured via OVSDB "Flow_Table" table,
+ * "fieldspec" column.  "fieldspec" is a string map where a "prefix"
+ * key tells which fields should be used for prefix tracking.  The
+ * value of the "prefix" key is a comma separated list of field names.
+ *
+ * The fields for which prefix lookup can be enabled are:
+ * - tun_id, tun_src, tun_dst
+ * - metadata
+ * - reg0 - reg7
+ * - nw_src, nw_dst (or aliases ip_src and ip_dst)
+ * - ipv6_src, ipv6_dst
+ *
+ * There is a maximum number of fields that can be enabled for any one
+ * flow table.  Currently this limit is 3.
+ *
+ * Examples:
+ *
+ * ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
+ *  --id=@N1 create Flow_Table name=table0
+ * ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
+ *  --id=@N1 create Flow_Table name=table1
+ *
+ * ovs-vsctl set Flow_Table table0 fieldspec:prefix=tun_id
+ * ovs-vsctl set Flow_Table table1 fieldspec:prefix=ip_dst,ip_src
+ *
+ * ovs-vsctl remove Flow_Table table1 fieldspec prefix
+ *
+ *
  * Partitioning
  * ============
  *
@@ -116,6 +164,7 @@
 #include "hmap.h"
 #include "list.h"
 #include "match.h"
+#include "meta-flow.h"
 #include "tag.h"
 #include "openflow/nicira-ext.h"
 #include "openflow/openflow.h"
@@ -128,9 +177,18 @@ extern "C" {
 
 /* Needed only for the lock annotation in struct classifier. */
 extern struct ovs_mutex ofproto_mutex;
+struct trie_node;
+
+/* Prefix trie for a 'field' */
+struct cls_trie {
+    const struct mf_field *field; /* Trie field, or NULL. */
+    struct trie_node *root;       /* NULL if none. */
+};
 
-/* Maximum number of staged lookup indices for each subtable. */
-enum { CLS_MAX_INDICES = 3 };
+enum {
+    CLS_MAX_INDICES = 3, /* Maximum number of lookup indices per subtable. */
+    CLS_MAX_TRIES = 3    /* Maximum number of prefix trees per classifier. */
+};
 
 /* A flow classifier. */
 struct classifier {
@@ -143,6 +201,8 @@ struct classifier {
                                      */
     struct hmap partitions;     /* Contains "struct cls_partition"s. */
     struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
+    struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
+    unsigned int n_tries;
 };
 
 /* A set of rules that all have the same fields wildcarded. */
@@ -160,6 +220,7 @@ struct cls_subtable {
     uint8_t n_indices;           /* How many indices to use. */
     uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
     struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
+    uint8_t trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
 };
 
 /* Returns true if 'table' is a "catch-all" subtable that will match every
@@ -211,6 +272,11 @@ bool cls_rule_is_loose_match(const struct cls_rule *rule,
 
 void classifier_init(struct classifier *cls, const uint8_t *flow_segments);
 void classifier_destroy(struct classifier *);
+void classifier_set_prefix_fields(struct classifier *cls,
+                                  const enum mf_field_id *trie_fields,
+                                  unsigned int n_trie_fields)
+    OVS_REQ_WRLOCK(cls->rwlock);
+
 bool classifier_is_empty(const struct classifier *cls)
     OVS_REQ_RDLOCK(cls->rwlock);
 int classifier_count(const struct classifier *cls)
diff --git a/lib/flow.c b/lib/flow.c
index c6683a5..c27650e 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1162,12 +1162,16 @@ miniflow_alloc_values(struct miniflow *flow, int n)
 
 /* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
  * the caller.  The caller must have already initialized 'dst->map' properly
- * to indicate the nonzero uint32_t elements of 'src'.  'n' must be the number
- * of 1-bits in 'dst->map'.
+ * to indicate the significant uint32_t elements of 'src'.  'n' must be the
+ * number of 1-bits in 'dst->map'.
+ *
+ * Normally the significant elements are the ones that are non-zero.  However,
+ * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
+ * so that the flow and mask always have the same maps.
  *
  * This function initializes 'dst->values' (either inline if possible or with
- * malloc() otherwise) and copies the nonzero uint32_t elements of 'src' into
- * it. */
+ * malloc() otherwise) and copies the uint32_t elements of 'src' indicated by
+ * 'dst->map' into it. */
 static void
 miniflow_init__(struct miniflow *dst, const struct flow *src, int n)
 {
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index b37f781..0ba28b2 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -37,6 +37,9 @@
 
 VLOG_DEFINE_THIS_MODULE(meta_flow);
 
+#define FLOW_U32OFS(FIELD)                                              \
+    offsetof(struct flow, FIELD) % 4 ? -1 : offsetof(struct flow, FIELD) / 4
+
 #define MF_FIELD_SIZES(MEMBER)                  \
     sizeof ((union mf_value *)0)->MEMBER,       \
     8 * sizeof ((union mf_value *)0)->MEMBER
@@ -59,6 +62,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_TUNNEL_ID, "OXM_OF_TUNNEL_ID",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        FLOW_U32OFS(tunnel.tun_id),
     }, {
         MFF_TUN_SRC, "tun_src", NULL,
         MF_FIELD_SIZES(be32),
@@ -70,6 +74,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_TUN_IPV4_SRC, "NXM_NX_TUN_IPV4_SRC",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        FLOW_U32OFS(tunnel.ip_src),
     }, {
         MFF_TUN_DST, "tun_dst", NULL,
         MF_FIELD_SIZES(be32),
@@ -81,6 +86,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_TUN_IPV4_DST, "NXM_NX_TUN_IPV4_DST",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        FLOW_U32OFS(tunnel.ip_dst),
     }, {
         MFF_TUN_FLAGS, "tun_flags", NULL,
         MF_FIELD_SIZES(be16),
@@ -92,6 +98,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_NONE,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_TUN_TTL, "tun_ttl", NULL,
         MF_FIELD_SIZES(u8),
@@ -103,6 +110,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_NONE,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_TUN_TOS, "tun_tos", NULL,
         MF_FIELD_SIZES(u8),
@@ -114,6 +122,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_NONE,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_METADATA, "metadata", NULL,
         MF_FIELD_SIZES(be64),
@@ -125,6 +134,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_METADATA, "OXM_OF_METADATA",
         OFPUTIL_P_NXM_OF11_UP,
         OFPUTIL_P_NXM_OF11_UP,
+        FLOW_U32OFS(metadata),
     }, {
         MFF_IN_PORT, "in_port", NULL,
         MF_FIELD_SIZES(be16),
@@ -136,6 +146,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_OF_IN_PORT, "NXM_OF_IN_PORT",
         OFPUTIL_P_ANY,   /* OF11+ via mapping to 32 bits. */
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IN_PORT_OXM, "in_port_oxm", NULL,
         MF_FIELD_SIZES(be32),
@@ -147,6 +158,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IN_PORT, "OXM_OF_IN_PORT",
         OFPUTIL_P_OF11_UP,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_SKB_PRIORITY, "skb_priority", NULL,
         MF_FIELD_SIZES(be32),
@@ -158,6 +170,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_NONE,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_PKT_MARK, "pkt_mark", NULL,
         MF_FIELD_SIZES(be32),
@@ -169,6 +182,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_PKT_MARK, "NXM_NX_PKT_MARK",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
 #define REGISTER(IDX)                           \
@@ -183,6 +197,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_REG(IDX), "NXM_NX_REG" #IDX,     \
         OFPUTIL_P_NXM_OXM_ANY,                  \
         OFPUTIL_P_NXM_OXM_ANY,                  \
+        FLOW_U32OFS(regs[IDX]),                 \
     }
 #if FLOW_N_REGS > 0
     REGISTER(0),
@@ -227,6 +242,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ETH_SRC, "OXM_OF_ETH_SRC",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,   /* Bitwise masking only with NXM and OF11+! */
+        -1,
     }, {
         MFF_ETH_DST, "eth_dst", "dl_dst",
         MF_FIELD_SIZES(mac),
@@ -238,6 +254,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ETH_DST, "OXM_OF_ETH_DST",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,   /* Bitwise masking only with NXM and OF11+! */
+        -1,
     }, {
         MFF_ETH_TYPE, "eth_type", "dl_type",
         MF_FIELD_SIZES(be16),
@@ -249,6 +266,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ETH_TYPE, "OXM_OF_ETH_TYPE",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NONE,
+        -1,
     },
 
     {
@@ -262,6 +280,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_OF_VLAN_TCI, "NXM_OF_VLAN_TCI",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_DL_VLAN, "dl_vlan", NULL,
         sizeof(ovs_be16), 12,
@@ -273,6 +292,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_VLAN_VID, "vlan_vid", NULL,
         sizeof(ovs_be16), 12,
@@ -284,6 +304,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_VLAN_VID, "OXM_OF_VLAN_VID",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_DL_VLAN_PCP, "dl_vlan_pcp", NULL,
         1, 3,
@@ -295,6 +316,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         0, NULL,
         OFPUTIL_P_ANY,   /* Will be mapped to NXM and OXM. */
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_VLAN_PCP, "vlan_pcp", NULL,
         1, 3,
@@ -306,6 +328,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_VLAN_PCP, "OXM_OF_VLAN_PCP",
         OFPUTIL_P_ANY,   /* Will be mapped to OF10 and NXM. */
         OFPUTIL_P_NONE,
+        -1,
     },
 
     /* ## ---- ## */
@@ -322,6 +345,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_MPLS_LABEL, "OXM_OF_MPLS_LABEL",
         OFPUTIL_P_NXM_OF11_UP,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_MPLS_TC, "mpls_tc", NULL,
         1, 3,
@@ -333,6 +357,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_MPLS_TC, "OXM_OF_MPLS_TC",
         OFPUTIL_P_NXM_OF11_UP,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_MPLS_BOS, "mpls_bos", NULL,
         1, 1,
@@ -344,6 +369,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_MPLS_BOS, "OXM_OF_MPLS_BOS",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NONE,
+        -1,
     },
 
     /* ## -- ## */
@@ -361,6 +387,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV4_SRC, "OXM_OF_IPV4_SRC",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,
+        FLOW_U32OFS(nw_src),
     }, {
         MFF_IPV4_DST, "ip_dst", "nw_dst",
         MF_FIELD_SIZES(be32),
@@ -372,6 +399,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV4_DST, "OXM_OF_IPV4_DST",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,
+        FLOW_U32OFS(nw_dst),
     },
 
     {
@@ -385,6 +413,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_SRC, "OXM_OF_IPV6_SRC",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        FLOW_U32OFS(ipv6_src),
     }, {
         MFF_IPV6_DST, "ipv6_dst", NULL,
         MF_FIELD_SIZES(ipv6),
@@ -396,6 +425,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_DST, "OXM_OF_IPV6_DST",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        FLOW_U32OFS(ipv6_dst),
     },
     {
         MFF_IPV6_LABEL, "ipv6_label", NULL,
@@ -408,6 +438,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_FLABEL, "OXM_OF_IPV6_FLABEL",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     {
@@ -421,6 +452,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IP_PROTO, "OXM_OF_IP_PROTO",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IP_DSCP, "nw_tos", NULL,
         MF_FIELD_SIZES(u8),
@@ -432,6 +464,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_OF_IP_TOS, "NXM_OF_IP_TOS",
         OFPUTIL_P_ANY,   /* Will be shifted for OXM. */
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IP_DSCP_SHIFTED, "ip_dscp", NULL,
         1, 6,
@@ -443,6 +476,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IP_DSCP, "OXM_OF_IP_DSCP",
         OFPUTIL_P_ANY,   /* Will be shifted for non-OXM. */
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IP_ECN, "nw_ecn", "ip_ecn",
         1, 2,
@@ -454,6 +488,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IP_ECN, "OXM_OF_IP_ECN",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IP_TTL, "nw_ttl", NULL,
         MF_FIELD_SIZES(u8),
@@ -465,6 +500,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_IP_TTL, "NXM_NX_IP_TTL",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_IP_FRAG, "ip_frag", NULL,
         1, 2,
@@ -476,6 +512,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_IP_FRAG, "NXM_NX_IP_FRAG",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     {
@@ -489,6 +526,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ARP_OP, "OXM_OF_ARP_OP",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_ARP_SPA, "arp_spa", NULL,
         MF_FIELD_SIZES(be32),
@@ -500,6 +538,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ARP_SPA, "OXM_OF_ARP_SPA",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,
+        -1,
     }, {
         MFF_ARP_TPA, "arp_tpa", NULL,
         MF_FIELD_SIZES(be32),
@@ -511,6 +550,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ARP_TPA, "OXM_OF_ARP_TPA",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OF11_UP,
+        -1,
     }, {
         MFF_ARP_SHA, "arp_sha", NULL,
         MF_FIELD_SIZES(mac),
@@ -522,6 +562,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ARP_SHA, "OXM_OF_ARP_SHA",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_ARP_THA, "arp_tha", NULL,
         MF_FIELD_SIZES(mac),
@@ -533,6 +574,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ARP_THA, "OXM_OF_ARP_THA",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     /* ## -- ## */
@@ -550,6 +592,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_TCP_SRC, "OXM_OF_TCP_SRC",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_TCP_DST, "tcp_dst", "tp_dst",
         MF_FIELD_SIZES(be16),
@@ -561,6 +604,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_TCP_DST, "OXM_OF_TCP_DST",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_TCP_FLAGS, "tcp_flags", NULL,
         2, 12,
@@ -572,6 +616,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         NXM_NX_TCP_FLAGS, "NXM_NX_TCP_FLAGS",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     {
@@ -585,6 +630,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_UDP_SRC, "OXM_OF_UDP_SRC",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_UDP_DST, "udp_dst", NULL,
         MF_FIELD_SIZES(be16),
@@ -596,6 +642,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_UDP_DST, "OXM_OF_UDP_DST",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     {
@@ -609,6 +656,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_SCTP_SRC, "OXM_OF_SCTP_SRC",
         OFPUTIL_P_NXM_OF11_UP,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_SCTP_DST, "sctp_dst", NULL,
         MF_FIELD_SIZES(be16),
@@ -620,6 +668,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_SCTP_DST, "OXM_OF_SCTP_DST",
         OFPUTIL_P_NXM_OF11_UP,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     },
 
     {
@@ -633,6 +682,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ICMPV4_TYPE, "OXM_OF_ICMPV4_TYPE",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_ICMPV4_CODE, "icmp_code", NULL,
         MF_FIELD_SIZES(u8),
@@ -644,6 +694,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ICMPV4_CODE, "OXM_OF_ICMPV4_CODE",
         OFPUTIL_P_ANY,
         OFPUTIL_P_NONE,
+        -1,
     },
 
     {
@@ -657,6 +708,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ICMPV6_TYPE, "OXM_OF_ICMPV6_TYPE",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NONE,
+        -1,
     }, {
         MFF_ICMPV6_CODE, "icmpv6_code", NULL,
         MF_FIELD_SIZES(u8),
@@ -668,6 +720,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_ICMPV6_CODE, "OXM_OF_ICMPV6_CODE",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NONE,
+        -1,
     },
 
     /* ## ---- ## */
@@ -685,6 +738,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_ND_TARGET, "OXM_OF_IPV6_ND_TARGET",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_ND_SLL, "nd_sll", NULL,
         MF_FIELD_SIZES(mac),
@@ -696,6 +750,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_ND_SLL, "OXM_OF_IPV6_ND_SLL",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }, {
         MFF_ND_TLL, "nd_tll", NULL,
         MF_FIELD_SIZES(mac),
@@ -707,6 +762,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = {
         OXM_OF_IPV6_ND_TLL, "OXM_OF_IPV6_ND_TLL",
         OFPUTIL_P_NXM_OXM_ANY,
         OFPUTIL_P_NXM_OXM_ANY,
+        -1,
     }
 };
 
diff --git a/lib/meta-flow.h b/lib/meta-flow.h
index c2b0bbc..d17eb85 100644
--- a/lib/meta-flow.h
+++ b/lib/meta-flow.h
@@ -296,6 +296,9 @@ struct mf_field {
     enum ofputil_protocol usable_protocols; /* If fully/cidr masked. */
     /* If partially/non-cidr masked. */
     enum ofputil_protocol usable_protocols_bitwise;
+
+    int flow_u32ofs;  /* Field's U32 offset in "struct flow", if prefix tree
+                       * lookup is supported for the field, or -1. */
 };
 
 /* The representation of a field's value. */
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index fef85e0..36d8801 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -20,9 +20,9 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include "classifier.h"
 #include "compiler.h"
 #include "flow.h"
+#include "list.h"
 #include "match.h"
 #include "netdev.h"
 #include "openflow/nicira-ext.h"
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 5cd6b1e..d5ee99c 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1138,7 +1138,7 @@ ofproto_configure_table(struct ofproto *ofproto, int table_id,
     }
 
     table->max_flows = s->max_flows;
-    ovs_rwlock_rdlock(&table->cls.rwlock);
+    ovs_rwlock_wrlock(&table->cls.rwlock);
     if (classifier_count(&table->cls) > table->max_flows
         && table->eviction_fields) {
         /* 'table' contains more flows than allowed.  We might not be able to
@@ -1154,6 +1154,10 @@ ofproto_configure_table(struct ofproto *ofproto, int table_id,
             break;
         }
     }
+
+    classifier_set_prefix_fields(&table->cls,
+                                 s->prefix_fields, s->n_prefix_fields);
+
     ovs_rwlock_unlock(&table->cls.rwlock);
 }
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 903d1f4..2a680c3 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -23,7 +23,9 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cfm.h"
+#include "classifier.h"
 #include "flow.h"
+#include "meta-flow.h"
 #include "netflow.h"
 #include "sset.h"
 #include "stp.h"
@@ -381,6 +383,12 @@ struct ofproto_table_settings {
      * distinguished by different values for the subfields within 'groups'. */
     struct mf_subfield *groups;
     size_t n_groups;
+
+    /*
+     * Fields for which prefix trie lookup is maintained.
+     */
+    unsigned int n_prefix_fields;
+    enum mf_field_id prefix_fields[CLS_MAX_TRIES];
 };
 
 int ofproto_get_n_tables(const struct ofproto *);
diff --git a/tests/classifier.at b/tests/classifier.at
index 546c8f7..0aa723f 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -27,6 +27,7 @@ AT_BANNER([flow classifier lookup segmentation])
 AT_SETUP([flow classifier - lookup segmentation])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [2], [3])
+AT_CHECK([ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- --id=@N1 create Flow_Table name=t0 fieldspec:prefix=nw_dst,nw_src], [0], [ignore], [])
 AT_DATA([flows.txt], [dnl
 table=0 in_port=1 priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
 table=0 in_port=1 priority=32,tcp,nw_dst=10.1.2.15,action=output(2)
@@ -45,7 +46,7 @@ Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Relevant fields: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+  [Relevant fields: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no
 Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index b78e156..a23e2ec 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -2573,6 +2573,7 @@ AT_CLEANUP
 AT_SETUP([ofproto-dpif megaflow - L3 classification])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [2])
+AT_CHECK([ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- --id=@N1 create Flow_Table name=t0 fieldspec:prefix=nw_dst,nw_src], [0], [ignore], [])
 AT_DATA([flows.txt], [dnl
 table=0 in_port=1,icmp,nw_src=10.0.0.4 actions=output(2)
 ])
@@ -2580,7 +2581,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_XOUT], [0], [dnl
-skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.255,dst=10.0.0.1/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions: <del>
+skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions: <del>
 skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions: <del>
 ])
 OVS_VSWITCHD_STOP
@@ -2914,6 +2915,7 @@ AT_CLEANUP
 AT_SETUP([ofproto-dpif megaflow - dec_ttl])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [2])
+AT_CHECK([ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- --id=@N1 create Flow_Table name=t0 fieldspec:prefix=nw_dst,nw_src], [0], [ignore], [])
 AT_DATA([flows.txt], [dnl
 table=0 in_port=1,icmp,nw_src=10.0.0.4 actions=dec_ttl,output(2)
 ])
@@ -2921,7 +2923,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
 AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_XOUT], [0], [dnl
-skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.255,dst=10.0.0.1/0.0.0.0,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions: <del>
+skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.252,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions: <del>
 skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no), packets:0, bytes:0, used:0.0s, actions: <del>
 ])
 OVS_VSWITCHD_STOP
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index ee7e76c..7555feb 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -609,6 +609,10 @@ shuffle_u32s(uint32_t *p, size_t n)
 
 /* Classifier tests. */
 
+static enum mf_field_id trie_fields[2] = {
+    MFF_IPV4_DST, MFF_IPV4_SRC
+};
+
 /* Tests an empty classifier. */
 static void
 test_empty(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
@@ -617,7 +621,8 @@ test_empty(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
     struct tcls tcls;
 
     classifier_init(&cls, flow_segment_u32s);
-    ovs_rwlock_rdlock(&cls.rwlock);
+    ovs_rwlock_wrlock(&cls.rwlock);
+    classifier_set_prefix_fields(&cls, trie_fields, ARRAY_SIZE(trie_fields));
     tcls_init(&tcls);
     assert(classifier_is_empty(&cls));
     assert(tcls_is_empty(&tcls));
@@ -650,6 +655,8 @@ test_single_rule(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 
         classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
+        classifier_set_prefix_fields(&cls, trie_fields,
+                                     ARRAY_SIZE(trie_fields));
         tcls_init(&tcls);
 
         tcls_rule = tcls_insert(&tcls, rule);
@@ -689,6 +696,8 @@ test_rule_replacement(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 
         classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
+        classifier_set_prefix_fields(&cls, trie_fields,
+                                     ARRAY_SIZE(trie_fields));
         tcls_init(&tcls);
         tcls_insert(&tcls, rule1);
         classifier_insert(&cls, &rule1->cls_rule);
@@ -801,6 +810,8 @@ test_many_rules_in_one_list (int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 
             classifier_init(&cls, flow_segment_u32s);
             ovs_rwlock_wrlock(&cls.rwlock);
+            classifier_set_prefix_fields(&cls, trie_fields,
+                                         ARRAY_SIZE(trie_fields));
             tcls_init(&tcls);
 
             for (i = 0; i < ARRAY_SIZE(ops); i++) {
@@ -903,6 +914,8 @@ test_many_rules_in_one_table(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 
         classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
+        classifier_set_prefix_fields(&cls, trie_fields,
+                                     ARRAY_SIZE(trie_fields));
         tcls_init(&tcls);
 
         for (i = 0; i < N_RULES; i++) {
@@ -965,6 +978,8 @@ test_many_rules_in_n_tables(int n_tables)
 
         classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
+        classifier_set_prefix_fields(&cls, trie_fields,
+                                     ARRAY_SIZE(trie_fields));
         tcls_init(&tcls);
 
         for (i = 0; i < MAX_RULES; i++) {
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 6ce7d2b..ed8f997 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3135,6 +3135,44 @@ bridge_configure_remotes(struct bridge *br,
     }
 }
 
+static unsigned int
+parse_field_list(const char *str, enum mf_field_id fields[], unsigned int size,
+                 const char *brname)
+{
+    char *string, *name, *save_ptr;
+    unsigned int n_fields = 0;
+
+    if (!str) {
+        return 0;
+    }
+
+    string = strdup(str);
+
+    for (name = strtok_r(string, ",", &save_ptr); name;
+         name = strtok_r(NULL, ",", &save_ptr)) {
+
+        const struct mf_field *mf = mf_from_name(name);
+        if (!mf) {
+            VLOG_WARN("bridge %s: fieldspec:prefix Unknown field: %s",
+                      brname, name);
+            continue;
+        }
+        if (mf->flow_u32ofs < 0) {
+            VLOG_WARN("bridge %s: Field %s not compatible with "
+                      "prefix lookup.", brname, name);
+            continue;
+        }
+        if (n_fields >= size) {
+            VLOG_WARN("bridge %s: Too many prefix fields, field not used: %s.",
+                      brname, name);
+            continue;
+        }
+        fields[n_fields++] = mf->id;
+    }
+    free(string);
+    return n_fields;
+}
+
 static void
 bridge_configure_tables(struct bridge *br)
 {
@@ -3151,6 +3189,8 @@ bridge_configure_tables(struct bridge *br)
         s.max_flows = UINT_MAX;
         s.groups = NULL;
         s.n_groups = 0;
+        s.n_prefix_fields = 0;
+        memset(s.prefix_fields, ~0, sizeof(s.prefix_fields));
 
         if (j < br->cfg->n_flow_tables && i == br->cfg->key_flow_tables[j]) {
             struct ovsrec_flow_table *cfg = br->cfg->value_flow_tables[j++];
@@ -3182,6 +3222,26 @@ bridge_configure_tables(struct bridge *br)
                     }
                 }
             }
+            /* Parse fieldspec. */
+            s.n_prefix_fields = parse_field_list(smap_get(&cfg->fieldspec,
+                                                          "prefix"),
+                                                 s.prefix_fields,
+                                                 ARRAY_SIZE(s.prefix_fields),
+                                                 br->name);
+            if (s.n_prefix_fields > 0) {
+                int k;
+                struct ds ds = DS_EMPTY_INITIALIZER;
+                for (k = 0; k < s.n_prefix_fields; k++) {
+                    if (k) {
+                        ds_put_char(&ds, ',');
+                    }
+                    ds_put_cstr(&ds, mf_from_id(s.prefix_fields[k])->name);
+                }
+
+                VLOG_INFO("bridge %s table %d: Prefix lookup with: %s.",
+                          br->name, i, ds_cstr(&ds));
+                ds_destroy(&ds);
+            }
         }
 
         ofproto_configure_table(br->ofproto, i, &s);
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 9392457..f7179a1 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "7.3.0",
- "cksum": "2495231516 20311",
+ "version": "7.4.0",
+ "cksum": "87789930 20416",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -300,7 +300,10 @@
 			  "enum": ["set", ["refuse", "evict"]]},
 		  "min": 0, "max": 1}},
        "groups": {
-	 "type": {"key": "string", "min": 0, "max": "unlimited"}}}},
+	 "type": {"key": "string", "min": 0, "max": "unlimited"}},
+       "fieldspec": {
+	 "type": {"key": "string", "value": "string",
+		  "min": 0, "max": "unlimited"}}}},
    "QoS": {
      "columns": {
        "type": {
-- 
1.7.10.4




More information about the dev mailing list