[ovs-dev] [PATCH 1/3] Classifier: Staged sub-table matching.

Jarno Rajahalme jrajahalme at nicira.com
Fri Nov 1 22:20:10 UTC 2013


v2: Properly finish hashes, more flexible configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
 lib/classifier.c             |  174 +++++++++++++++++++++++++++++++++++-------
 lib/classifier.h             |   12 ++-
 lib/flow.c                   |  163 ++++++++++++++++++++++++++++++++++++++-
 lib/flow.h                   |   82 +++++++++++++++-----
 lib/match.c                  |    2 +-
 lib/nx-match.c               |    2 +-
 lib/ofp-util.c               |    2 +-
 ofproto/ofproto-dpif-xlate.c |    2 +-
 ofproto/ofproto-dpif.c       |    2 +-
 ofproto/ofproto.c            |    2 +-
 tests/classifier.at          |   38 +++++++++
 tests/test-classifier.c      |   12 +--
 utilities/ovs-ofctl.c        |    4 +-
 13 files changed, 433 insertions(+), 64 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 5587141..790bf8d 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -42,7 +42,8 @@ static void update_subtables_after_removal(struct classifier *,
                                            unsigned int del_priority);
 
 static struct cls_rule *find_match(const struct cls_subtable *,
-                                   const struct flow *);
+                                   const struct flow *,
+                                   struct flow_wildcards *);
 static struct cls_rule *find_equal(struct cls_subtable *,
                                    const struct miniflow *, uint32_t hash);
 static struct cls_rule *insert_rule(struct classifier *,
@@ -148,14 +149,26 @@ cls_rule_is_catchall(const struct cls_rule *rule)
 
 /* Initializes 'cls' as a classifier that initially contains no classification
  * rules. */
+
 void
-classifier_init(struct classifier *cls)
+classifier_init(struct classifier *cls, const uint8_t *flow_segments)
 {
     cls->n_rules = 0;
     hmap_init(&cls->subtables);
     list_init(&cls->subtables_priority);
     hmap_init(&cls->partitions);
     ovs_rwlock_init(&cls->rwlock);
+    cls->n_flow_segments = 0;
+    if (flow_segments) {
+        while (cls->n_flow_segments < CLS_MAX_INDICES
+               && *flow_segments < FLOW_U32S) {
+            cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
+        }
+        ovs_assert(cls->n_flow_segments == 3 &&
+                   cls->flow_segments[0] == FLOW_SEGMENT_1_ENDS_AT / 4 &&
+                   cls->flow_segments[1] == FLOW_SEGMENT_2_ENDS_AT / 4 &&
+                   cls->flow_segments[2] == FLOW_SEGMENT_3_ENDS_AT / 4);
+    }
 }
 
 /* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
@@ -224,6 +237,7 @@ create_partition(struct classifier *cls, struct cls_subtable *subtable,
 {
     uint32_t hash = hash_metadata(metadata);
     struct cls_partition *partition = find_partition(cls, metadata, hash);
+
     if (!partition) {
         partition = xmalloc(sizeof *partition);
         partition->metadata = metadata;
@@ -273,6 +287,7 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
     } else {
         rule->partition = old_rule->partition;
     }
+
     return old_rule;
 }
 
@@ -298,8 +313,15 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
     struct cls_partition *partition;
     struct cls_rule *head;
     struct cls_subtable *subtable;
+    int i;
 
     subtable = find_subtable(cls, &rule->match.mask);
+
+    /* Remove rule node from indices. */
+    for (i = 0; i < subtable->n_indices; i++) {
+        hindex_remove(&subtable->indices[i], &rule->index_nodes[i]);
+    }
+
     head = find_equal(subtable, &rule->match.flow, rule->hmap_node.hash);
     if (head != rule) {
         list_remove(&rule->list);
@@ -380,10 +402,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
             continue;
         }
 
-        rule = find_match(subtable, flow);
-        if (wc) {
-            flow_wildcards_fold_minimask(wc, &subtable->mask);
-        }
+        rule = find_match(subtable, flow, wc);
         if (rule) {
             best = rule;
             LIST_FOR_EACH_CONTINUE (subtable, list_node,
@@ -397,10 +416,7 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
                     continue;
                 }
 
-                rule = find_match(subtable, flow);
-                if (wc) {
-                    flow_wildcards_fold_minimask(wc, &subtable->mask);
-                }
+                rule = find_match(subtable, flow, wc);
                 if (rule && rule->priority > best->priority) {
                     best = rule;
                 }
@@ -657,15 +673,47 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
 {
     uint32_t hash = minimask_hash(mask, 0);
     struct cls_subtable *subtable;
+    int i, index = 0;
+    struct flow_wildcards old, new;
+    uint8_t prev;
 
     subtable = xzalloc(sizeof *subtable);
     hmap_init(&subtable->rules);
     minimask_clone(&subtable->mask, mask);
-    hmap_insert(&cls->subtables, &subtable->hmap_node, minimask_hash(mask, 0));
+
+    /* Init indices for segmented lookup, if any. */
+    flow_wildcards_init_catchall(&new);
+    old = new;
+    prev = 0;
+    for (i = 0; i < cls->n_flow_segments; i++) {
+        flow_wildcards_fold_minimask_range(&new, mask, prev,
+                                           cls->flow_segments[i]);
+        /* Add an index if it adds mask bits. */
+        if (!flow_wildcards_equal(&new, &old)) {
+            hindex_init(&subtable->indices[index]);
+            subtable->index_ofs[index] = cls->flow_segments[i];
+            index++;
+            old = new;
+        }
+        prev = cls->flow_segments[i];
+    }
+    /* Check if the rest of the subtable's mask adds any bits,
+     * and remove the last index if it doesn't. */
+    if (index > 0) {
+        flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S);
+        if (flow_wildcards_equal(&new, &old)) {
+            --index;
+            subtable->index_ofs[index] = 0;
+            hindex_destroy(&subtable->indices[index]);
+        }
+    }
+    subtable->n_indices = index;
+
+    hmap_insert(&cls->subtables, &subtable->hmap_node, hash);
     list_push_back(&cls->subtables_priority, &subtable->list_node);
     subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
-                     ? tag_create_deterministic(hash)
-                     : TAG_ALL);
+                  ? tag_create_deterministic(hash)
+                  : TAG_ALL);
 
     return subtable;
 }
@@ -673,6 +721,11 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
 static void
 destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
 {
+    int i;
+
+    for (i = 0; i < subtable->n_indices; i++) {
+        hindex_destroy(&subtable->indices[i]);
+    }
     minimask_destroy(&subtable->mask);
     hmap_remove(&cls->subtables, &subtable->hmap_node);
     hmap_destroy(&subtable->rules);
@@ -775,18 +828,64 @@ update_subtables_after_removal(struct classifier *cls,
 }
 
 static struct cls_rule *
-find_match(const struct cls_subtable *subtable, const struct flow *flow)
-{
-    uint32_t hash = flow_hash_in_minimask(flow, &subtable->mask, 0);
-    struct cls_rule *rule;
-
-    HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
-        if (minimatch_matches_flow(&rule->match, flow)) {
-            return rule;
+find_match(const struct cls_subtable *subtable, const struct flow *flow,
+           struct flow_wildcards *wc)
+{
+    uint32_t basis = 0, hash;
+    struct cls_rule *rule = NULL;
+    int i;
+    uint8_t prev_u32ofs = 0;
+
+    /* Try to finish early by checking fields in segments. */
+    for (i = 0; i < subtable->n_indices; i++) {
+        struct hindex_node *inode;
+
+        hash = flow_hash_in_minimask_range(flow, &subtable->mask, prev_u32ofs,
+                                           subtable->index_ofs[i],
+                                           &basis);
+        prev_u32ofs = subtable->index_ofs[i];
+        inode = hindex_node_with_hash(&subtable->indices[i], hash);
+        if (!inode) {
+            /* No match, can stop immediately, but must fold in the mask
+             * covered so far. */
+            if (wc) {
+                flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0,
+                                                   prev_u32ofs);
+            }
+            return NULL;
+        }
+        /* Check if we have narrowed down to a single rule already.  This
+         * check shows a measurable benefit with non-trivial flow tables. */
+        if (!inode->s) {
+            struct cls_rule *rl;
+            /* Found single candidate. */
+            ASSIGN_CONTAINER(rl, inode - i, index_nodes);
+            /* Do not check same rule again. */
+            if (rl != rule) {
+                rule = rl; /* Update last rule we looked at. */
+                if (minimatch_matches_flow(&rule->match, flow)) {
+                    /* Found match, no need to look further. */
+                    goto out;
+                }
+            }
         }
     }
-
-    return NULL;
+    /* Have 'rule' already if there is a single non-matching rule. */
+    if (!rule) {
+        hash = flow_hash_in_minimask_range(flow, &subtable->mask, prev_u32ofs,
+                                           FLOW_U32S, &basis);
+        HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
+            if (minimatch_matches_flow(&rule->match, flow)) {
+                goto out;
+            }
+        }
+    }
+    rule = NULL;
+ out:
+    if (wc) {
+        flow_wildcards_fold_minimask(wc, &subtable->mask);
+    }
+    return rule;
 }
 
 static struct cls_rule *
@@ -809,19 +908,35 @@ insert_rule(struct classifier *cls, struct cls_subtable *subtable,
 {
     struct cls_rule *head;
     struct cls_rule *old = NULL;
+    int i;
+    uint32_t basis = 0, hash;
+    uint8_t prev_u32ofs = 0;
+
+    /* Add new node to segment indices. */
+    for (i = 0; i < subtable->n_indices; i++) {
+        hash = miniflow_hash_in_minimask_range(&new->match.flow,
+                                               &new->match.mask, prev_u32ofs,
+                                               subtable->index_ofs[i],
+                                               &basis);
+        hindex_insert(&subtable->indices[i], &new->index_nodes[i], hash);
+        prev_u32ofs = subtable->index_ofs[i];
+    }
 
-    new->hmap_node.hash = miniflow_hash_in_minimask(&new->match.flow,
-                                                    &new->match.mask, 0);
+    hash = miniflow_hash_in_minimask_range(&new->match.flow, &new->match.mask,
+                                           prev_u32ofs, FLOW_U32S, &basis);
 
-    head = find_equal(subtable, &new->match.flow, new->hmap_node.hash);
+    head = find_equal(subtable, &new->match.flow, hash);
     if (!head) {
-        hmap_insert(&subtable->rules, &new->hmap_node, new->hmap_node.hash);
+        hmap_insert(&subtable->rules, &new->hmap_node, hash);
         list_init(&new->list);
         goto out;
     } else {
         /* Scan the list for the insertion point that will keep the list in
          * order of decreasing priority. */
         struct cls_rule *rule;
+
+        new->hmap_node.hash = hash; /* Otherwise done by hmap_insert. */
+
         FOR_EACH_RULE_IN_LIST (rule, head) {
             if (new->priority >= rule->priority) {
                 if (rule == head) {
@@ -848,6 +963,11 @@ insert_rule(struct classifier *cls, struct cls_subtable *subtable,
  out:
     if (!old) {
         update_subtables_after_insertion(cls, subtable, new->priority);
+    } else {
+        /* Remove old node from indices. */
+        for (i = 0; i < subtable->n_indices; i++) {
+            hindex_remove(&subtable->indices[i], &old->index_nodes[i]);
+        }
     }
     return old;
 }
diff --git a/lib/classifier.h b/lib/classifier.h
index 671bc80..f2a2074 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -104,6 +104,7 @@
  * - Only the main thread is allowed to iterate over rules. */
 
 #include "flow.h"
+#include "hindex.h"
 #include "hmap.h"
 #include "list.h"
 #include "match.h"
@@ -120,9 +121,14 @@ extern "C" {
 /* Needed only for the lock annotation in struct classifier. */
 extern struct ovs_mutex ofproto_mutex;
 
+enum { CLS_MAX_INDICES = 3 };
+
 /* A flow classifier. */
 struct classifier {
     int n_rules;                /* Total number of rules. */
+    uint8_t n_flow_segments;
+    uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
+                                             * for segmented lookup. */
     struct hmap subtables;      /* Contains "struct cls_subtable"s.  */
     struct list subtables_priority; /* Subtables in descending priority order.
                                      */
@@ -142,6 +148,9 @@ struct cls_subtable {
     unsigned int max_priority;  /* Max priority of any rule in the subtable. */
     unsigned int max_count;     /* Count of max_priority rules. */
     tag_type tag;               /* Tag generated from mask for partitioning. */
+    uint8_t n_indices;           /* How many indices to use. */
+    uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
+    struct hindex indices[CLS_MAX_INDICES]; /* Search indices. */
 };
 
 /* Returns true if 'table' is a "catch-all" subtable that will match every
@@ -159,6 +168,7 @@ struct cls_rule {
     struct minimatch match;     /* Matching rule. */
     unsigned int priority;      /* Larger numbers are higher priorities. */
     struct cls_partition *partition;
+    struct hindex_node index_nodes[CLS_MAX_INDICES];
 };
 
 /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
@@ -189,7 +199,7 @@ bool cls_rule_is_catchall(const struct cls_rule *);
 bool cls_rule_is_loose_match(const struct cls_rule *rule,
                              const struct minimatch *criteria);
 
-void classifier_init(struct classifier *cls);
+void classifier_init(struct classifier *cls, const uint8_t *flow_segments);
 void classifier_destroy(struct classifier *);
 bool classifier_is_empty(const struct classifier *cls)
     OVS_REQ_RDLOCK(cls->rwlock);
diff --git a/lib/flow.c b/lib/flow.c
index 31fd07c..cef2dfc 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -41,6 +41,14 @@
 COVERAGE_DEFINE(flow_extract);
 COVERAGE_DEFINE(miniflow_malloc);
 
+/* U32 indices for segmented flow classification. */
+const uint8_t flow_segment_u32s[4] = {
+    FLOW_SEGMENT_1_ENDS_AT / 4,
+    FLOW_SEGMENT_2_ENDS_AT / 4,
+    FLOW_SEGMENT_3_ENDS_AT / 4,
+    FLOW_U32S
+};
+
 static struct arp_eth_header *
 pull_arp(struct ofpbuf *packet)
 {
@@ -515,7 +523,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
 void
 flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 23);
 
     fmd->tun_id = flow->tunnel.tun_id;
     fmd->tun_src = flow->tunnel.ip_src;
@@ -683,6 +691,58 @@ flow_wildcards_fold_minimask(struct flow_wildcards *wc,
     flow_union_with_miniflow(&wc->masks, &mask->masks);
 }
 
+/* Perform a bitwise OR of miniflow 'src' flow data in range [start, end)
+ * with the equivalent fields in 'dst', storing the result in 'dst'. */
+static void
+flow_union_with_miniflow_range(struct flow *dst, const struct miniflow *src,
+                               uint8_t start, uint8_t end)
+{
+    uint32_t *dst_u32 = (uint32_t *) dst;
+    const uint32_t *p = src->values;
+    int i = 0;
+    int end_ofs = end;
+    uint32_t msk;
+
+    while (start >= 32) {
+        p += popcount(src->map[i]);
+        i++;
+        start -= 32;
+        end_ofs -= 32;
+        dst_u32 += 32;
+    }
+    msk = (1u << start) - 1; /* 'start' LSBs set */
+
+    for (; i < MINI_N_MAPS; i++) {
+        uint32_t map = src->map[i];
+
+        if (start > 0) {
+            p += popcount(map & msk);  /* Skip to start. */
+            map &= ~msk;
+            start = 0;
+        }
+
+        for (; map; map = zero_rightmost_1bit(map)) {
+            int ofs = raw_ctz(map);
+            if (ofs >= end_ofs) {
+                return;
+            }
+            dst_u32[ofs] |= *p++;
+        }
+        dst_u32 += 32;
+        end_ofs -= 32;
+    }
+}
+
+/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask
+ * in range [start, end). */
+void
+flow_wildcards_fold_minimask_range(struct flow_wildcards *wc,
+                                   const struct minimask *mask,
+                                   uint8_t start, uint8_t end)
+{
+    flow_union_with_miniflow_range(&wc->masks, &mask->masks, start, end);
+}
+
 /* Returns a hash of the wildcards in 'wc'. */
 uint32_t
 flow_wildcards_hash(const struct flow_wildcards *wc, uint32_t basis)
@@ -1415,6 +1475,53 @@ miniflow_hash_in_minimask(const struct miniflow *flow,
     return mhash_finish(hash, (p - mask->masks.values) * 4);
 }
 
+/* Returns a hash value for the bits of range [start, end) in 'flow',
+ * where there are 1-bits in 'mask', given 'hash'.
+ *
+ * The hash values returned by this function are the same as those returned by
+ * flow_hash_in_minimask_range(), only the form of the arguments differ. */
+uint32_t
+miniflow_hash_in_minimask_range(const struct miniflow *flow,
+                                const struct minimask *mask,
+                                uint8_t start, uint8_t end, uint32_t *basis)
+{
+    const uint32_t *p = mask->masks.values;
+    int i = 0;
+    uint32_t msk;
+    uint32_t hash = *basis;
+
+    while (start >= 32) {
+        p += popcount(mask->masks.map[i]);
+        i++;
+        start -= 32;
+    }
+    msk = (1u << start) - 1; /* 'start' LSB set */
+
+    for (; i < MINI_N_MAPS; i++) {
+        uint32_t map = mask->masks.map[i];
+
+        if (start > 0) {
+            p += popcount(map & msk);  /* Skip to start. */
+            map &= ~msk;
+            start = 0;
+        }
+
+        for (; map; map = zero_rightmost_1bit(map)) {
+            int ofs = raw_ctz(map) + i * 32;
+            if (ofs >= end) {
+                goto out;
+            }
+            if (*p) {
+                hash = mhash_add(hash, miniflow_get(flow, ofs) & *p);
+            }
+            p++;
+        }
+    }
+ out:
+    *basis = hash; /* Allow continuation from the unfinished value. */
+    return mhash_finish(hash, (p - mask->masks.values) * 4);
+}
+
 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
  * 'mask', given 'basis'.
  *
@@ -1445,6 +1552,60 @@ flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
 
     return mhash_finish(hash, (p - mask->masks.values) * 4);
 }
+
+/* Returns a hash value for the bits of range [start, end) in 'flow',
+ * where there are 1-bits in 'mask', given 'hash'.
+ *
+ * The hash values returned by this function are the same as those returned by
+ * miniflow_hash_in_minimask_range(), only the form of the arguments differ. */
+uint32_t
+flow_hash_in_minimask_range(const struct flow *flow,
+                            const struct minimask *mask,
+                            uint8_t start, uint8_t end, uint32_t *basis)
+{
+    const uint32_t *flow_u32 = (const uint32_t *) flow;
+    const uint32_t *p = mask->masks.values;
+    int i = 0;
+    int end_ofs = end;
+    uint32_t msk;
+    uint32_t hash = *basis;
+
+    while (start >= 32) {
+        p += popcount(mask->masks.map[i]);
+        i++;
+        start -= 32;
+        end_ofs -= 32;
+        flow_u32 += 32;
+    }
+    msk = (1u << start) - 1; /* 'start' LSBs set */
+
+    for (; i < MINI_N_MAPS; i++) {
+        uint32_t map = mask->masks.map[i];
+
+        if (start > 0) {
+            p += popcount(map & msk);  /* Skip to start. */
+            map &= ~msk;
+            start = 0;
+        }
+
+        for (; map; map = zero_rightmost_1bit(map)) {
+            int ofs = raw_ctz(map);
+            if (ofs >= end_ofs) {
+                goto out;
+            }
+            if (*p) {
+                hash = mhash_add(hash, flow_u32[ofs] & *p);
+            }
+            p++;
+        }
+        flow_u32 += 32;
+        end_ofs -= 32;
+    }
+ out:
+    *basis = hash; /* Allow continuation from the unfinished value. */
+    return mhash_finish(hash, (p - mask->masks.values) * 4);
+}
+
 
 /* Initializes 'dst' as a copy of 'src'.  The caller must eventually free 'dst'
  * with minimask_destroy(). */
diff --git a/lib/flow.h b/lib/flow.h
index 093d509..5251038 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -37,7 +37,7 @@ struct ofpbuf;
 /* This sequence number should be incremented whenever anything involving flows
  * or the wildcarding of flows changes.  This will cause build assertion
  * failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 22
+#define FLOW_WC_SEQ 23
 
 #define FLOW_N_REGS 8
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
@@ -90,42 +90,70 @@ union flow_in_port {
  * a 32-bit datapath port number.
  */
 struct flow {
+    /* L1 */
     struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
     ovs_be64 metadata;          /* OpenFlow Metadata. */
+    uint32_t regs[FLOW_N_REGS]; /* Registers. */
+    uint32_t skb_priority;      /* Packet priority for QoS. */
+    uint32_t pkt_mark;          /* Packet mark. */
+    union flow_in_port in_port; /* Input port.*/
+
+    /* L2 */
+    uint8_t dl_src[6];          /* Ethernet source address. */
+    uint8_t dl_dst[6];          /* Ethernet destination address. */
+    ovs_be16 dl_type;           /* Ethernet frame type. */
+    ovs_be16 vlan_tci;          /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
+
+    /* L3 */
+    ovs_be32 mpls_lse;          /* MPLS label stack entry. */
     struct in6_addr ipv6_src;   /* IPv6 source address. */
     struct in6_addr ipv6_dst;   /* IPv6 destination address. */
     struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
-    uint32_t skb_priority;      /* Packet priority for QoS. */
-    uint32_t regs[FLOW_N_REGS]; /* Registers. */
+    ovs_be32 ipv6_label;        /* IPv6 flow label. */
     ovs_be32 nw_src;            /* IPv4 source address. */
     ovs_be32 nw_dst;            /* IPv4 destination address. */
-    ovs_be32 ipv6_label;        /* IPv6 flow label. */
-    union flow_in_port in_port; /* Input port.*/
-    uint32_t pkt_mark;          /* Packet mark. */
-    ovs_be32 mpls_lse;          /* MPLS label stack entry. */
-    ovs_be16 vlan_tci;          /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
-    ovs_be16 dl_type;           /* Ethernet frame type. */
-    ovs_be16 tp_src;            /* TCP/UDP/SCTP source port. */
-    ovs_be16 tp_dst;            /* TCP/UDP/SCTP destination port. */
-    ovs_be16 tcp_flags;         /* TCP flags. */
-    uint8_t dl_src[6];          /* Ethernet source address. */
-    uint8_t dl_dst[6];          /* Ethernet destination address. */
-    uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP opcode. */
+    uint8_t nw_frag;            /* FLOW_FRAG_* flags. */
     uint8_t nw_tos;             /* IP ToS (including DSCP and ECN). */
+    uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
+    uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP opcode. */
     uint8_t arp_sha[6];         /* ARP/ND source hardware address. */
     uint8_t arp_tha[6];         /* ARP/ND target hardware address. */
-    uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
-    uint8_t nw_frag;            /* FLOW_FRAG_* flags. Keep last for the
-                                   BUILD_ASSERT_DECL below */
+    ovs_be16 tcp_flags;         /* TCP flags. */
+    ovs_be16 pad;               /* Padding */
+    /* L4 */
+    ovs_be16 tp_src;            /* TCP/UDP/SCTP source port. */
+    ovs_be16 tp_dst;            /* TCP/UDP/SCTP destination port.
+                                 * Keep last for the BUILD_ASSERT_DECL below */
 };
 BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
 
 #define FLOW_U32S (sizeof(struct flow) / 4)
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 1
-                  == sizeof(struct flow_tnl) + 154
-                  && FLOW_WC_SEQ == 22);
+BUILD_ASSERT_DECL(offsetof(struct flow, tp_dst) + 2
+                  == sizeof(struct flow_tnl) + 156
+                  && FLOW_WC_SEQ == 23);
+
+/* Incremental points at which flow classification may be performed in
+ * segments.
+ * This is located here since this is dependent on the structure of the
+ * struct flow defined above:
+ * Each offset must be on a distint, successive U32 boundary srtictly
+ * within the struct flow. */
+enum {
+    FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_src),
+    FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, mpls_lse),
+    FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
+};
+BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % 4 == 0);
+BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % 4 == 0);
+BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT % 4 == 0);
+BUILD_ASSERT_DECL(                     0 < FLOW_SEGMENT_1_ENDS_AT);
+BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT < FLOW_SEGMENT_2_ENDS_AT);
+BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT < FLOW_SEGMENT_3_ENDS_AT);
+BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT < sizeof(struct flow));
+
+extern const uint8_t flow_segment_u32s[];
 
 /* Represents the metadata fields of struct flow. */
 struct flow_metadata {
@@ -263,6 +291,18 @@ bool flow_wildcards_has_extra(const struct flow_wildcards *,
 void flow_wildcards_fold_minimask(struct flow_wildcards *,
                                   const struct minimask *);
 
+void flow_wildcards_fold_minimask_range(struct flow_wildcards *,
+                                        const struct minimask *,
+                                        uint8_t start, uint8_t end);
+uint32_t flow_hash_in_minimask_range(const struct flow *,
+                                     const struct minimask *,
+                                     uint8_t start, uint8_t end,
+                                     uint32_t *basis);
+uint32_t miniflow_hash_in_minimask_range(const struct miniflow *,
+                                         const struct minimask *,
+                                         uint8_t start, uint8_t end,
+                                         uint32_t *basis);
+
 uint32_t flow_wildcards_hash(const struct flow_wildcards *, uint32_t basis);
 bool flow_wildcards_equal(const struct flow_wildcards *,
                           const struct flow_wildcards *);
diff --git a/lib/match.c b/lib/match.c
index 0f674b0..909fa51 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -842,7 +842,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority)
 
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 23);
 
     if (priority != OFP_DEFAULT_PRIORITY) {
         ds_put_format(s, "priority=%u,", priority);
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 1a6fcae..0737522 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -572,7 +572,7 @@ nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match,
     int match_len;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 23);
 
     /* Metadata. */
     if (match->wc.masks.in_port.ofp_port) {
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index d435e99..46fd977 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -84,7 +84,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
 void
 ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 23);
 
     /* Initialize most of wc. */
     flow_wildcards_init_catchall(wc);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 48d078d..43a92db 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1559,7 +1559,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
 
     /* If 'struct flow' gets additional metadata, we'll need to zero it out
      * before traversing a patch port. */
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 23);
 
     if (!xport) {
         xlate_report(ctx, "Nonexistent output port");
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index ff1c74d..18f0c80 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -1233,7 +1233,7 @@ construct(struct ofproto *ofproto_)
     ovs_mutex_init(&ofproto->stats_mutex);
     ovs_mutex_init(&ofproto->vsp_mutex);
 
-    classifier_init(&ofproto->facets);
+    classifier_init(&ofproto->facets, NULL);
     ofproto->consistency_rl = LLONG_MIN;
 
     guarded_list_init(&ofproto->pins);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 6ea7510..08eaa6b 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -6604,7 +6604,7 @@ static void
 oftable_init(struct oftable *table)
 {
     memset(table, 0, sizeof *table);
-    classifier_init(&table->cls);
+    classifier_init(&table->cls, flow_segment_u32s);
     table->max_flows = UINT_MAX;
 }
 
diff --git a/tests/classifier.at b/tests/classifier.at
index cf0cc44..546c8f7 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -22,3 +22,41 @@ m4_foreach(
   [AT_SETUP([miniflow - m4_bpatsubst(testname, [-], [ ])])
    AT_CHECK([test-classifier testname], [0], [], [])
    AT_CLEANUP])])
+
+AT_BANNER([flow classifier lookup segmentation])
+AT_SETUP([flow classifier - lookup segmentation])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], [1], [2], [3])
+AT_DATA([flows.txt], [dnl
+table=0 in_port=1 priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
+table=0 in_port=1 priority=32,tcp,nw_dst=10.1.2.15,action=output(2)
+table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop
+table=0 in_port=1 priority=0,ip,action=drop
+table=0 in_port=2 priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1)
+table=0 in_port=2 priority=0,ip,action=drop
+table=0 in_port=3 priority=16,tcp,nw_src=10.1.0.0/255.255.0.0,action=output(1)
+table=0 in_port=3 priority=0,ip,action=drop
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Relevant fields: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no
+Datapath actions: 1
+])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Relevant fields: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no
+Datapath actions: drop
+])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Relevant fields: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80
+Datapath actions: drop
+])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Relevant fields: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
+Datapath actions: 2
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index 3f39f8f..cb9b3db 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -612,7 +612,7 @@ test_empty(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
     struct classifier cls;
     struct tcls tcls;
 
-    classifier_init(&cls);
+    classifier_init(&cls, flow_segment_u32s);
     ovs_rwlock_rdlock(&cls.rwlock);
     tcls_init(&tcls);
     assert(classifier_is_empty(&cls));
@@ -644,7 +644,7 @@ test_single_rule(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
         rule = make_rule(wc_fields,
                          hash_bytes(&wc_fields, sizeof wc_fields, 0), 0);
 
-        classifier_init(&cls);
+        classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
         tcls_init(&tcls);
 
@@ -683,7 +683,7 @@ test_rule_replacement(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
         rule2->aux += 5;
         rule2->aux += 5;
 
-        classifier_init(&cls);
+        classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
         tcls_init(&tcls);
         tcls_insert(&tcls, rule1);
@@ -795,7 +795,7 @@ test_many_rules_in_one_list (int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
                 pri_rules[i] = -1;
             }
 
-            classifier_init(&cls);
+            classifier_init(&cls, flow_segment_u32s);
             ovs_rwlock_wrlock(&cls.rwlock);
             tcls_init(&tcls);
 
@@ -897,7 +897,7 @@ test_many_rules_in_one_table(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
             value_mask = ~wcf & ((1u << CLS_N_FIELDS) - 1);
         } while ((1 << count_ones(value_mask)) < N_RULES);
 
-        classifier_init(&cls);
+        classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
         tcls_init(&tcls);
 
@@ -959,7 +959,7 @@ test_many_rules_in_n_tables(int n_tables)
         }
         shuffle(priorities, ARRAY_SIZE(priorities));
 
-        classifier_init(&cls);
+        classifier_init(&cls, flow_segment_u32s);
         ovs_rwlock_wrlock(&cls.rwlock);
         tcls_init(&tcls);
 
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index da0a54b..4d36a7d 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -2398,7 +2398,7 @@ ofctl_replace_flows(int argc OVS_UNUSED, char *argv[])
     struct vconn *vconn;
     struct fte *fte;
 
-    classifier_init(&cls);
+    classifier_init(&cls, NULL);
     usable_protocols = read_flows_from_file(argv[2], &cls, FILE_IDX);
 
     protocol = open_vconn(argv[1], &vconn);
@@ -2468,7 +2468,7 @@ ofctl_diff_flows(int argc OVS_UNUSED, char *argv[])
     struct ds a_s, b_s;
     struct fte *fte;
 
-    classifier_init(&cls);
+    classifier_init(&cls, NULL);
     read_flows_from_source(argv[1], &cls, 0);
     read_flows_from_source(argv[2], &cls, 1);
 
-- 
1.7.10.4




More information about the dev mailing list