[ovs-dev] [PATCH v3 3/3] flow: Split miniflow's map.

Jarno Rajahalme jrajahalme at nicira.com
Thu Jul 16 22:15:54 UTC 2015


Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
 lib/classifier-private.h |  110 ++++++++++++-----
 lib/classifier.c         |   72 +++++++----
 lib/dpif-netdev.c        |   92 +++++++++-----
 lib/flow.c               |  299 ++++++++++++++++++++++++++++------------------
 lib/flow.h               |  188 +++++++++++++++++++----------
 lib/match.c              |    4 +-
 6 files changed, 498 insertions(+), 267 deletions(-)

diff --git a/lib/classifier-private.h b/lib/classifier-private.h
index 2dfaf93..c4c6ce9 100644
--- a/lib/classifier-private.h
+++ b/lib/classifier-private.h
@@ -226,23 +226,48 @@ struct trie_node {
  * These are only used by the classifier, so place them here to allow
  * for better optimization. */
 
-static inline uint64_t
+/* TODO: Ensure that 'start' and 'end' are compile-time constants. */
+static inline unsigned int /* offset */
 miniflow_get_map_in_range(const struct miniflow *miniflow,
-                          uint8_t start, uint8_t end, unsigned int *offset)
+                          uint8_t start, uint8_t end, struct miniflow *map)
 {
-    uint64_t map = miniflow->map;
-    *offset = 0;
+    unsigned int offset = 0;
 
-    if (start > 0) {
-        uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */
-        *offset = count_1bits(map & msk);
-        map &= ~msk;
+    map->tnl_map = miniflow->tnl_map;
+    map->pkt_map = miniflow->pkt_map;
+
+    if (start >= FLOW_TNL_U64S) {
+        offset += count_1bits(map->tnl_map);
+        map->tnl_map = 0;
+        if (start > FLOW_TNL_U64S) {
+            /* Clear 'start - FLOW_TNL_U64S' LSBs from pkt_map. */
+            start -= FLOW_TNL_U64S;
+            uint64_t msk = (UINT64_C(1) << start) - 1;
+
+            offset += count_1bits(map->pkt_map & msk);
+            map->pkt_map &= ~msk;
+        }
+    } else if (start > 0) {
+        /* Clear 'start' LSBs from tnl_map. */
+        uint64_t msk = (UINT64_C(1) << start) - 1;
+
+        offset += count_1bits(map->tnl_map & msk);
+        map->tnl_map &= ~msk;
     }
-    if (end < FLOW_U64S) {
-        uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */
-        map &= msk;
+
+    if (end <= FLOW_TNL_U64S) {
+        map->pkt_map = 0;
+        if (end < FLOW_TNL_U64S) {
+            /* Keep 'end' LSBs in tnl_map. */
+            map->tnl_map &= (UINT64_C(1) << end) - 1;
+        }
+    } else {
+        if (end < FLOW_U64S) {
+            /* Keep 'end - FLOW_TNL_U64S' LSBs in pkt_map. */
+            map->pkt_map &= (UINT64_C(1) << (end - FLOW_TNL_U64S)) - 1;
+        }
     }
-    return map;
+    return offset;
 }
 
 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
@@ -258,10 +283,14 @@ flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
     const uint64_t *flow_u64 = (const uint64_t *)flow;
     const uint64_t *p = mask_values;
     uint32_t hash;
-    int idx;
+    size_t idx;
 
     hash = basis;
-    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
+    MAP_FOR_EACH_INDEX(idx, mask->masks.tnl_map) {
+        hash = hash_add64(hash, flow_u64[idx] & *p++);
+    }
+    flow_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, mask->masks.pkt_map) {
         hash = hash_add64(hash, flow_u64[idx] & *p++);
     }
 
@@ -282,7 +311,10 @@ miniflow_hash_in_minimask(const struct miniflow *flow,
     uint32_t hash = basis;
     uint64_t flow_u64;
 
-    MINIFLOW_FOR_EACH_IN_MAP(flow_u64, flow, mask->masks.map) {
+    MINIFLOW_FOR_EACH_IN_TNL_MAP(flow_u64, flow, mask->masks) {
+        hash = hash_add64(hash, flow_u64 & *p++);
+    }
+    MINIFLOW_FOR_EACH_IN_PKT_MAP(flow_u64, flow, mask->masks) {
         hash = hash_add64(hash, flow_u64 & *p++);
     }
 
@@ -302,14 +334,18 @@ flow_hash_in_minimask_range(const struct flow *flow,
     const uint64_t *mask_values = miniflow_get_values(&mask->masks);
     const uint64_t *flow_u64 = (const uint64_t *)flow;
     unsigned int offset;
-    uint64_t map;
+    struct miniflow map;
     const uint64_t *p;
     uint32_t hash = *basis;
-    int idx;
+    size_t idx;
 
-    map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
+    offset = miniflow_get_map_in_range(&mask->masks, start, end, &map);
     p = mask_values + offset;
-    MAP_FOR_EACH_INDEX(idx, map) {
+    MAP_FOR_EACH_INDEX(idx, map.tnl_map) {
+        hash = hash_add64(hash, flow_u64[idx] & *p++);
+    }
+    flow_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, map.pkt_map) {
         hash = hash_add64(hash, flow_u64[idx] & *p++);
     }
 
@@ -334,13 +370,15 @@ flow_wildcards_fold_minimask_range(struct flow_wildcards *wc,
 {
     const uint64_t *p = miniflow_get_values(&mask->masks);
     uint64_t *dst_u64 = (uint64_t *)&wc->masks;
-    unsigned int offset;
-    uint64_t map;
-    int idx;
+    struct miniflow map;
+    size_t idx;
 
-    map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
-    p += offset;
-    MAP_FOR_EACH_INDEX(idx, map) {
+    p += miniflow_get_map_in_range(&mask->masks, start, end, &map);
+    MAP_FOR_EACH_INDEX(idx, map.tnl_map) {
+        dst_u64[idx] |= *p++;
+    }
+    dst_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, map.pkt_map) {
         dst_u64[idx] |= *p++;
     }
 }
@@ -352,17 +390,25 @@ miniflow_hash(const struct miniflow *flow, uint32_t basis)
     const uint64_t *values = miniflow_get_values(flow);
     const uint64_t *p = values;
     uint32_t hash = basis;
-    uint64_t hash_map = 0;
+    uint64_t hash_tnl_map = 0, hash_pkt_map = 0;
     uint64_t map;
 
-    for (map = flow->map; map; map = zero_rightmost_1bit(map)) {
+    for (map = flow->tnl_map; map; map = zero_rightmost_1bit(map)) {
         if (*p) {
             hash = hash_add64(hash, *p);
-            hash_map |= rightmost_1bit(map);
+            hash_tnl_map |= rightmost_1bit(map);
         }
         p++;
     }
-    hash = hash_add64(hash, hash_map);
+    for (map = flow->pkt_map; map; map = zero_rightmost_1bit(map)) {
+        if (*p) {
+            hash = hash_add64(hash, *p);
+            hash_pkt_map |= rightmost_1bit(map);
+        }
+        p++;
+    }
+    hash = hash_add64(hash, hash_tnl_map);
+    hash = hash_add64(hash, hash_pkt_map);
 
     return hash_finish(hash, p - values);
 }
@@ -393,11 +439,13 @@ minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end,
     const uint64_t *p = miniflow_get_values(match->flow);
     const uint64_t *q = miniflow_get_values(&match->mask->masks);
     unsigned int offset;
+    struct miniflow map;
     uint32_t hash = *basis;
     int n, i;
 
-    n = count_1bits(miniflow_get_map_in_range(&match->mask->masks, start, end,
-                                              &offset));
+    offset = miniflow_get_map_in_range(&match->mask->masks, start, end, &map);
+    n = miniflow_n_values(&map);
+
     q += offset;
     p += offset;
 
diff --git a/lib/classifier.c b/lib/classifier.c
index c75e9b4..fc6a1f5 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -90,7 +90,7 @@ static struct cls_match *
 cls_match_alloc(const struct cls_rule *rule, cls_version_t version,
                 const struct cls_conjunction conj[], size_t n)
 {
-    int count = count_1bits(rule->match.flow->map);
+    size_t count = miniflow_n_values(rule->match.flow);
 
     struct cls_match *cls_match
         = xmalloc(sizeof *cls_match + MINIFLOW_VALUES_SIZE(count));
@@ -1537,7 +1537,7 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
     int i, index = 0;
     struct flow_wildcards old, new;
     uint8_t prev;
-    int count = count_1bits(mask->masks.map);
+    size_t count = miniflow_n_values(&mask->masks);
 
     subtable = xzalloc(sizeof *subtable + MINIFLOW_VALUES_SIZE(count));
     cmap_init(&subtable->rules);
@@ -1700,12 +1700,17 @@ miniflow_and_mask_matches_flow(const struct miniflow *flow,
 {
     const uint64_t *flowp = miniflow_get_values(flow);
     const uint64_t *maskp = miniflow_get_values(&mask->masks);
-    int idx;
+    const uint64_t *target_u64 = (const uint64_t *)target;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
-        uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & *maskp++;
-
-        if (diff) {
+    MAP_FOR_EACH_INDEX(idx, mask->masks.tnl_map) {
+        if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
+            return false;
+        }
+    }
+    target_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, mask->masks.pkt_map) {
+        if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
             return false;
         }
     }
@@ -1749,27 +1754,47 @@ miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
 {
     const uint64_t *flowp = miniflow_get_values(flow);
     const uint64_t *maskp = miniflow_get_values(&mask->masks);
-    int idx;
+    const uint64_t *target_u64 = (const uint64_t *)target;
+    uint64_t *wc_u64 = (uint64_t *)&wc->masks;
+    uint64_t diff;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
-        uint64_t mask = *maskp++;
-        uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask;
+    MAP_FOR_EACH_INDEX(idx, mask->masks.tnl_map) {
+        uint64_t msk = *maskp++;
 
+        diff = (*flowp++ ^ target_u64[idx]) & msk;
         if (diff) {
-            /* Only unwildcard if none of the differing bits is already
-             * exact-matched. */
-            if (!(flow_u64_value(&wc->masks, idx) & diff)) {
-                /* Keep one bit of the difference.  The selected bit may be
-                 * different in big-endian v.s. little-endian systems. */
-                *flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
-            }
-            return false;
+            goto out;
+        }
+
+        /* Fill in the bits that were looked at. */
+        wc_u64[idx] |= msk;
+    }
+    target_u64 += FLOW_TNL_U64S;
+    wc_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, mask->masks.pkt_map) {
+        uint64_t msk = *maskp++;
+
+        diff = (*flowp++ ^ target_u64[idx]) & msk;
+        if (diff) {
+            goto out;
         }
+
         /* Fill in the bits that were looked at. */
-        *flow_u64_lvalue(&wc->masks, idx) |= mask;
+        wc_u64[idx] |= msk;
     }
 
     return true;
+
+out:
+    /* Only unwildcard if none of the differing bits is already
+     * exact-matched. */
+    if (!(wc_u64[idx] & diff)) {
+        /* Keep one bit of the difference.  The selected bit may be
+         * different in big-endian v.s. little-endian systems. */
+        wc_u64[idx] |= rightmost_1bit(diff);
+    }
+    return false;
 }
 
 /* Unwildcard the fields looked up so far, if any. */
@@ -2190,10 +2215,9 @@ minimask_get_prefix_len(const struct minimask *minimask,
 static const ovs_be32 *
 minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
 {
-    return (OVS_FORCE const ovs_be32 *)
-        (miniflow_get_values(match->flow)
-         + count_1bits(match->flow->map &
-                       ((UINT64_C(1) << mf->flow_be32ofs / 2) - 1)))
+    size_t u64_ofs = mf->flow_be32ofs / 2;
+
+    return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
         + (mf->flow_be32ofs & 1);
 }
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a2bd1b9..79c4612 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -488,14 +488,15 @@ emc_cache_init(struct emc_cache *flow_cache)
 {
     int i;
 
-    BUILD_ASSERT(sizeof(struct miniflow) == sizeof(uint64_t));
+    BUILD_ASSERT(sizeof(struct miniflow) == 2 * sizeof(uint64_t));
 
     flow_cache->sweep_idx = 0;
     for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
         flow_cache->entries[i].flow = NULL;
         flow_cache->entries[i].key.hash = 0;
         flow_cache->entries[i].key.len = sizeof(struct miniflow);
-        flow_cache->entries[i].key.mf.map = 0;
+        flow_cache->entries[i].key.mf.tnl_map = 0;
+        flow_cache->entries[i].key.mf.pkt_map = 0;
     }
 }
 
@@ -1390,6 +1391,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
     struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node);
 
     dpcls_remove(&pmd->cls, &flow->cr);
+    flow->cr.mask = NULL;   /* Accessing rule's mask after this is not safe. */
+
     cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
     flow->dead = true;
 
@@ -1518,20 +1521,19 @@ static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow)
  *   miniflow_extract(), if the map is different the miniflow is different.
  *   Therefore we can be faster by comparing the map and the miniflow in a
  *   single memcmp().
- * _ netdev_flow_key's miniflow has always inline values.
  * - These functions can be inlined by the compiler.
  *
  * The following assertions make sure that what we're doing with miniflow is
- * safe
+ * safe.
  */
-BUILD_ASSERT_DECL(sizeof(struct miniflow) == sizeof(uint64_t));
+BUILD_ASSERT_DECL(sizeof(struct miniflow) == 2 * sizeof(uint64_t));
 
-/* Given the number of bits set in the miniflow map, returns the size of the
+/* Given the number of bits set in miniflow's maps, returns the size of the
  * 'netdev_flow_key.mf' */
-static inline uint32_t
-netdev_flow_key_size(uint32_t flow_u32s)
+static inline size_t
+netdev_flow_key_size(size_t flow_u64s)
 {
-    return sizeof(struct miniflow) + MINIFLOW_VALUES_SIZE(flow_u32s);
+    return sizeof(struct miniflow) + MINIFLOW_VALUES_SIZE(flow_u64s);
 }
 
 static inline bool
@@ -1574,7 +1576,7 @@ netdev_flow_key_from_flow(struct netdev_flow_key *dst,
     miniflow_extract(&packet, &dst->mf);
     dp_packet_uninit(&packet);
 
-    dst->len = netdev_flow_key_size(count_1bits(dst->mf.map));
+    dst->len = netdev_flow_key_size(miniflow_n_values(&dst->mf));
     dst->hash = 0; /* Not computed yet. */
 }
 
@@ -1585,28 +1587,43 @@ netdev_flow_mask_init(struct netdev_flow_key *mask,
 {
     const uint64_t *mask_u64 = (const uint64_t *) &match->wc.masks;
     uint64_t *dst = miniflow_values(&mask->mf);
-    uint64_t map, mask_map = 0;
+    struct miniflow maps;
+    uint64_t map;
     uint32_t hash = 0;
     int n;
 
     /* Only check masks that make sense for the flow. */
-    map = flow_wc_map(&match->flow);
+    flow_wc_map(&match->flow, &maps);
+    memset(&mask->mf, 0, sizeof mask->mf);   /* Clear maps. */
 
+    map = maps.tnl_map;
     while (map) {
         uint64_t rm1bit = rightmost_1bit(map);
         int i = raw_ctz(map);
 
         if (mask_u64[i]) {
-            mask_map |= rm1bit;
+            mask->mf.tnl_map |= rm1bit;
             *dst++ = mask_u64[i];
             hash = hash_add64(hash, mask_u64[i]);
         }
         map -= rm1bit;
     }
+    mask_u64 += FLOW_TNL_U64S;
+    map = maps.pkt_map;
+    while (map) {
+        uint64_t rm1bit = rightmost_1bit(map);
+        int i = raw_ctz(map);
 
-    mask->mf.map = mask_map;
+        if (mask_u64[i]) {
+            mask->mf.pkt_map |= rm1bit;
+            *dst++ = mask_u64[i];
+            hash = hash_add64(hash, mask_u64[i]);
+        }
+        map -= rm1bit;
+    }
 
-    hash = hash_add64(hash, mask_map);
+    hash = hash_add64(hash, mask->mf.tnl_map);
+    hash = hash_add64(hash, mask->mf.pkt_map);
 
     n = dst - miniflow_get_values(&mask->mf);
 
@@ -1614,7 +1631,7 @@ netdev_flow_mask_init(struct netdev_flow_key *mask,
     mask->len = netdev_flow_key_size(n);
 }
 
-/* Initializes 'dst' as a copy of 'src' masked with 'mask'. */
+/* Initializes 'dst' as a copy of 'flow' masked with 'mask'. */
 static inline void
 netdev_flow_key_init_masked(struct netdev_flow_key *dst,
                             const struct flow *flow,
@@ -1626,9 +1643,9 @@ netdev_flow_key_init_masked(struct netdev_flow_key *dst,
     uint64_t value;
 
     dst->len = mask->len;
-    dst->mf.map = mask->mf.map;
+    dst->mf = mask->mf;   /* Copy maps. */
 
-    FLOW_FOR_EACH_IN_MAP(value, flow, mask->mf.map) {
+    FLOW_FOR_EACH_IN_MAPS(value, flow, mask->mf) {
         *dst_u64 = value & *mask_u64++;
         hash = hash_add64(hash, *dst_u64++);
     }
@@ -1636,12 +1653,13 @@ netdev_flow_key_init_masked(struct netdev_flow_key *dst,
                             (dst_u64 - miniflow_get_values(&dst->mf)) * 8);
 }
 
-/* Iterate through all netdev_flow_key u64 values specified by 'MAP' */
-#define NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(VALUE, KEY, MAP)                \
-    for (struct mf_for_each_in_map_aux aux__                            \
-             = { miniflow_get_values(&(KEY)->mf), (KEY)->mf.map, MAP }; \
-         mf_get_next_in_map(&aux__, &(VALUE));                          \
-        )
+/* Iterate through netdev_flow_key TNL u64 values specified by 'MAPS'. */
+#define NETDEV_FLOW_KEY_FOR_EACH_IN_TNL_MAP(VALUE, KEY, MAPS)   \
+    MINIFLOW_FOR_EACH_IN_TNL_MAP(VALUE, &(KEY)->mf, MAPS)
+
+/* Iterate through netdev_flow_key PKT u64 values specified by 'MAPS'. */
+#define NETDEV_FLOW_KEY_FOR_EACH_IN_PKT_MAP(VALUE, KEY, MAPS)   \
+    MINIFLOW_FOR_EACH_IN_PKT_MAP(VALUE, &(KEY)->mf, MAPS)
 
 /* Returns a hash value for the bits of 'key' where there are 1-bits in
  * 'mask'. */
@@ -1653,7 +1671,10 @@ netdev_flow_key_hash_in_mask(const struct netdev_flow_key *key,
     uint32_t hash = 0;
     uint64_t key_u64;
 
-    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(key_u64, key, mask->mf.map) {
+    NETDEV_FLOW_KEY_FOR_EACH_IN_TNL_MAP(key_u64, key, mask->mf) {
+        hash = hash_add64(hash, key_u64 & *p++);
+    }
+    NETDEV_FLOW_KEY_FOR_EACH_IN_PKT_MAP(key_u64, key, mask->mf) {
         hash = hash_add64(hash, key_u64 & *p++);
     }
 
@@ -1894,6 +1915,7 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
         for (id = 0; id < MFF_N_IDS; ++id) {
             /* Skip registers and metadata. */
             if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
+                && !(id >= MFF_XREG0 && id < MFF_XREG0 + FLOW_N_XREGS)
                 && id != MFF_METADATA) {
                 const struct mf_field *mf = mf_from_id(id);
                 if (mf_are_prereqs_ok(mf, flow)) {
@@ -1910,7 +1932,6 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
      * mask->in_port.ofp_port, which only covers half of the 32-bit datapath
      * port number mask->in_port.odp_port. */
     mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
-
     return 0;
 }
 
@@ -1987,7 +2008,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 
     netdev_flow_mask_init(&mask, match);
     /* Make sure wc does not have metadata. */
-    ovs_assert(!(mask.mf.map & (MINIFLOW_MAP(metadata) | MINIFLOW_MAP(regs))));
+    ovs_assert(!(mask.mf.pkt_map
+                 & (MINIFLOW_PKT_MAP(metadata) | MINIFLOW_PKT_MAP(regs))));
 
     /* Do not allocate extra space. */
     flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
@@ -3217,7 +3239,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 
     for (i = 0; i < cnt; i++) {
         /* Key length is needed in all the cases, hash computed on demand. */
-        keys[i].len = netdev_flow_key_size(count_1bits(keys[i].mf.map));
+        keys[i].len = netdev_flow_key_size(miniflow_n_values(&keys[i].mf));
     }
     any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
     if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
@@ -3781,6 +3803,7 @@ dpcls_destroy(struct dpcls *cls)
         struct dpcls_subtable *subtable;
 
         CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
+            ovs_assert(cmap_count(&subtable->rules) == 0);
             dpcls_destroy_subtable(cls, subtable);
         }
         cmap_destroy(&cls->subtables_map);
@@ -3847,10 +3870,8 @@ dpcls_remove(struct dpcls *cls, struct dpcls_rule *rule)
     }
 }
 
-/* Returns true if 'target' satisifies 'key' in 'mask', that is, if each 1-bit
- * in 'mask' the values in 'key' and 'target' are the same.
- *
- * Note: 'key' and 'mask' have the same mask, and 'key' is already masked. */
+/* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit
+ * in 'mask' the values in 'key' and 'target' are the same. */
 static inline bool
 dpcls_rule_matches_key(const struct dpcls_rule *rule,
                        const struct netdev_flow_key *target)
@@ -3859,7 +3880,12 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
     const uint64_t *maskp = miniflow_get_values(&rule->mask->mf);
     uint64_t target_u64;
 
-    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(target_u64, target, rule->flow.mf.map) {
+    NETDEV_FLOW_KEY_FOR_EACH_IN_TNL_MAP(target_u64, target, rule->flow.mf) {
+        if (OVS_UNLIKELY((target_u64 & *maskp++) != *keyp++)) {
+            return false;
+        }
+    }
+    NETDEV_FLOW_KEY_FOR_EACH_IN_PKT_MAP(target_u64, target, rule->flow.mf) {
         if (OVS_UNLIKELY((target_u64 & *maskp++) != *keyp++)) {
             return false;
         }
diff --git a/lib/flow.c b/lib/flow.c
index 1fcb671..fabdbfc 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -112,7 +112,7 @@ data_try_pull(const void **datap, size_t *sizep, size_t size)
 
 /* Context for pushing data to a miniflow. */
 struct mf_ctx {
-    uint64_t map;
+    struct miniflow maps;
     uint64_t *data;
     uint64_t * const end;
 };
@@ -132,28 +132,46 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
 #define MINIFLOW_ASSERT(X)
 #endif
 
-#define miniflow_push_uint64_(MF, OFS, VALUE)                   \
-{                                                               \
-    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0          \
-                    && !(MF.map & (UINT64_MAX << (OFS) / 8)));  \
-    *MF.data++ = VALUE;                                         \
-    MF.map |= UINT64_C(1) << (OFS) / 8;                         \
+#define miniflow_set_map(MF, OFS)                                       \
+    if ((OFS) < FLOW_TNL_U64S) {                                        \
+        MINIFLOW_ASSERT(!(MF.maps.tnl_map & (UINT64_MAX << (OFS)))      \
+                        && !MF.maps.pkt_map);                           \
+        MF.maps.tnl_map |= UINT64_C(1) << (OFS);                        \
+    } else {                                                            \
+        MINIFLOW_ASSERT(!(MF.maps.pkt_map                               \
+                          & UINT64_MAX << ((OFS) - FLOW_TNL_U64S)));    \
+        MF.maps.pkt_map |= UINT64_C(1) << ((OFS) - FLOW_TNL_U64S);      \
+    }
+
+#define miniflow_assert_in_map(MF, OFS)                                 \
+    if ((OFS) < FLOW_TNL_U64S) {                                        \
+        MINIFLOW_ASSERT(MF.maps.tnl_map & UINT64_C(1) << (OFS)          \
+                        && !(MF.maps.tnl_map & UINT64_MAX << ((OFS) + 1)) \
+                        && !MF.maps.pkt_map);                           \
+    } else {                                                            \
+        MINIFLOW_ASSERT(MF.maps.pkt_map & UINT64_C(1) << ((OFS) - FLOW_TNL_U64S) \
+                        && !(MF.maps.pkt_map & UINT64_MAX << ((OFS) - FLOW_TNL_U64S + 1))); \
+    }
+
+#define miniflow_push_uint64_(MF, OFS, VALUE)                           \
+{                                                                       \
+    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0);                \
+    *MF.data++ = VALUE;                                                 \
+    miniflow_set_map(MF, OFS / 8);                                          \
 }
 
 #define miniflow_push_be64_(MF, OFS, VALUE) \
     miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
 
-#define miniflow_push_uint32_(MF, OFS, VALUE)                   \
-{                                                               \
-    MINIFLOW_ASSERT(MF.data < MF.end &&                                 \
-                    (((OFS) % 8 == 0 && !(MF.map & (UINT64_MAX << (OFS) / 8))) \
-                     || ((OFS) % 8 == 4 && MF.map & (UINT64_C(1) << (OFS) / 8) \
-                         && !(MF.map & (UINT64_MAX << ((OFS) / 8 + 1)))))); \
+#define miniflow_push_uint32_(MF, OFS, VALUE)                           \
+    {                                                                   \
+    MINIFLOW_ASSERT(MF.data < MF.end);                                  \
                                                                         \
     if ((OFS) % 8 == 0) {                                               \
+        miniflow_set_map(MF, OFS / 8);                                  \
         *(uint32_t *)MF.data = VALUE;                                   \
-        MF.map |= UINT64_C(1) << (OFS) / 8;                             \
     } else if ((OFS) % 8 == 4) {                                        \
+        miniflow_assert_in_map(MF, OFS / 8);                            \
         *((uint32_t *)MF.data + 1) = VALUE;                             \
         MF.data++;                                                      \
     }                                                                   \
@@ -164,29 +182,28 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
 
 #define miniflow_push_uint16_(MF, OFS, VALUE)                           \
 {                                                                       \
-    MINIFLOW_ASSERT(MF.data < MF.end &&                                 \
-                    (((OFS) % 8 == 0 && !(MF.map & (UINT64_MAX << (OFS) / 8))) \
-                     || ((OFS) % 2 == 0 && MF.map & (UINT64_C(1) << (OFS) / 8) \
-                         && !(MF.map & (UINT64_MAX << ((OFS) / 8 + 1)))))); \
+    MINIFLOW_ASSERT(MF.data < MF.end);                                  \
                                                                         \
     if ((OFS) % 8 == 0) {                                               \
+        miniflow_set_map(MF, OFS / 8);                                  \
         *(uint16_t *)MF.data = VALUE;                                   \
-        MF.map |= UINT64_C(1) << (OFS) / 8;                             \
     } else if ((OFS) % 8 == 2) {                                        \
+        miniflow_assert_in_map(MF, OFS / 8);                            \
         *((uint16_t *)MF.data + 1) = VALUE;                             \
     } else if ((OFS) % 8 == 4) {                                        \
+        miniflow_assert_in_map(MF, OFS / 8);                            \
         *((uint16_t *)MF.data + 2) = VALUE;                             \
     } else if ((OFS) % 8 == 6) {                                        \
+        miniflow_assert_in_map(MF, OFS / 8);                            \
         *((uint16_t *)MF.data + 3) = VALUE;                             \
         MF.data++;                                                      \
     }                                                                   \
 }
 
 #define miniflow_pad_to_64_(MF, OFS)                                    \
-{                                                                   \
+{                                                                       \
     MINIFLOW_ASSERT((OFS) % 8 != 0);                                    \
-    MINIFLOW_ASSERT(MF.map & (UINT64_C(1) << (OFS) / 8));               \
-    MINIFLOW_ASSERT(!(MF.map & (UINT64_MAX << ((OFS) / 8 + 1))));       \
+    miniflow_assert_in_map(MF, OFS / 8);                                \
                                                                         \
     memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8);           \
     MF.data++;                                                          \
@@ -195,31 +212,42 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
 #define miniflow_push_be16_(MF, OFS, VALUE)                     \
     miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
 
+#define miniflow_set_maps(MF, OFS, N_WORDS)                             \
+{                                                                       \
+    size_t ofs = (OFS);                                                 \
+    size_t n_words = (N_WORDS);                                         \
+    size_t n_words_mask = UINT64_MAX >> (64 - n_words);                 \
+                                                                        \
+    MINIFLOW_ASSERT(n_words && MF.data + n_words <= MF.end);            \
+    if (ofs < FLOW_TNL_U64S) {                                          \
+        MINIFLOW_ASSERT(!(MF.maps.tnl_map & UINT64_MAX << ofs)          \
+                        && !MF.maps.pkt_map);                           \
+        MF.maps.tnl_map |= n_words_mask << ofs;                         \
+        if (n_words > FLOW_TNL_U64S - ofs) {                            \
+            MF.maps.pkt_map |= n_words_mask >> (FLOW_TNL_U64S - ofs);   \
+        }                                                               \
+    } else {                                                            \
+        ofs -= FLOW_TNL_U64S;                                           \
+        MINIFLOW_ASSERT(!(MF.maps.pkt_map & (UINT64_MAX << ofs)));      \
+        MF.maps.pkt_map |= n_words_mask << ofs;                         \
+    }                                                                   \
+}
+
 /* Data at 'valuep' may be unaligned. */
 #define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS)          \
 {                                                               \
-    int ofs64 = (OFS) / 8;                                      \
-                                                                        \
-    MINIFLOW_ASSERT(MF.data + (N_WORDS) <= MF.end && (OFS) % 8 == 0     \
-                    && !(MF.map & (UINT64_MAX << ofs64)));              \
-                                                                        \
-    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data);             \
-    MF.data += (N_WORDS);                                               \
-    MF.map |= ((UINT64_MAX >> (64 - (N_WORDS))) << ofs64);              \
+    MINIFLOW_ASSERT((OFS) % 8 == 0);                            \
+    miniflow_set_maps(MF, (OFS) / 8, (N_WORDS));                \
+    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data);     \
+    MF.data += (N_WORDS);                                       \
 }
 
 /* Push 32-bit words padded to 64-bits. */
 #define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS)               \
 {                                                                       \
-    int ofs64 = (OFS) / 8;                                              \
-                                                                        \
-    MINIFLOW_ASSERT(MF.data + DIV_ROUND_UP(N_WORDS, 2) <= MF.end        \
-                    && (OFS) % 8 == 0                                   \
-                    && !(MF.map & (UINT64_MAX << ofs64)));              \
-                                                                        \
+    miniflow_set_maps(MF, (OFS) / 8, DIV_ROUND_UP(N_WORDS, 2));         \
     memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t));            \
     MF.data += DIV_ROUND_UP(N_WORDS, 2);                                \
-    MF.map |= ((UINT64_MAX >> (64 - DIV_ROUND_UP(N_WORDS, 2))) << ofs64); \
     if ((N_WORDS) & 1) {                                                \
         *((uint32_t *)MF.data - 1) = 0;                                 \
     }                                                                   \
@@ -229,14 +257,9 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
 /* MACs start 64-aligned, and must be followed by other data or padding. */
 #define miniflow_push_macs_(MF, OFS, VALUEP)                    \
 {                                                               \
-    int ofs64 = (OFS) / 8;                                      \
-                                                                \
-    MINIFLOW_ASSERT(MF.data + 2 <= MF.end && (OFS) % 8 == 0     \
-                    && !(MF.map & (UINT64_MAX << ofs64)));      \
-                                                                \
+    miniflow_set_maps(MF, (OFS) / 8, 2);                        \
     memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN);                \
     MF.data += 1;                   /* First word only. */      \
-    MF.map |= UINT64_C(3) << ofs64; /* Both words. */           \
 }
 
 #define miniflow_push_uint32(MF, FIELD, VALUE)                      \
@@ -429,7 +452,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
     const void *data = dp_packet_data(packet);
     size_t size = dp_packet_size(packet);
     uint64_t *values = miniflow_values(dst);
-    struct mf_ctx mf = { 0, values, values + FLOW_U64S };
+    struct mf_ctx mf = { { 0, 0 }, values, values + FLOW_U64S };
     const char *l2;
     ovs_be16 dl_type;
     uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
@@ -735,7 +758,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         }
     }
  out:
-    dst->map = mf.map;
+    *dst = mf.maps;
 }
 
 /* For every bit of a field that is wildcarded in 'wildcards', sets the
@@ -1222,54 +1245,58 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
  * optimal.
  *
  * This is a less precise version of flow_wildcards_init_for_packet() above. */
-uint64_t
-flow_wc_map(const struct flow *flow)
+void
+flow_wc_map(const struct flow *flow, struct miniflow *map)
 {
     /* Update this function whenever struct flow changes. */
     BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
 
-    uint64_t map = (flow->tunnel.ip_dst) ? MINIFLOW_MAP(tunnel) : 0;
+    map->tnl_map = 0;
+    if (flow->tunnel.ip_dst) {
+        map->tnl_map = MINIFLOW_TNL_MAP(tunnel);
+        if (!flow->tunnel.metadata.opt_map) {
+            map->tnl_map &= ~MINIFLOW_TNL_MAP(tunnel.metadata);
+        }
+    }
 
     /* Metadata fields that can appear on packet input. */
-    map |= MINIFLOW_MAP(skb_priority) | MINIFLOW_MAP(pkt_mark)
-        | MINIFLOW_MAP(recirc_id) | MINIFLOW_MAP(dp_hash)
-        | MINIFLOW_MAP(in_port)
-        | MINIFLOW_MAP(dl_dst) | MINIFLOW_MAP(dl_src)
-        | MINIFLOW_MAP(dl_type) | MINIFLOW_MAP(vlan_tci);
+    map->pkt_map = MINIFLOW_PKT_MAP(skb_priority) | MINIFLOW_PKT_MAP(pkt_mark)
+        | MINIFLOW_PKT_MAP(recirc_id) | MINIFLOW_PKT_MAP(dp_hash)
+        | MINIFLOW_PKT_MAP(in_port)
+        | MINIFLOW_PKT_MAP(dl_dst) | MINIFLOW_PKT_MAP(dl_src)
+        | MINIFLOW_PKT_MAP(dl_type) | MINIFLOW_PKT_MAP(vlan_tci);
 
     /* Ethertype-dependent fields. */
     if (OVS_LIKELY(flow->dl_type == htons(ETH_TYPE_IP))) {
-        map |= MINIFLOW_MAP(nw_src) | MINIFLOW_MAP(nw_dst)
-            | MINIFLOW_MAP(nw_proto) | MINIFLOW_MAP(nw_frag)
-            | MINIFLOW_MAP(nw_tos) | MINIFLOW_MAP(nw_ttl);
+        map->pkt_map |= MINIFLOW_PKT_MAP(nw_src) | MINIFLOW_PKT_MAP(nw_dst)
+            | MINIFLOW_PKT_MAP(nw_proto) | MINIFLOW_PKT_MAP(nw_frag)
+            | MINIFLOW_PKT_MAP(nw_tos) | MINIFLOW_PKT_MAP(nw_ttl);
         if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) {
-            map |= MINIFLOW_MAP(igmp_group_ip4);
+            map->pkt_map |= MINIFLOW_PKT_MAP(igmp_group_ip4);
         } else {
-            map |= MINIFLOW_MAP(tcp_flags)
-                | MINIFLOW_MAP(tp_src) | MINIFLOW_MAP(tp_dst);
+            map->pkt_map |= MINIFLOW_PKT_MAP(tcp_flags)
+                | MINIFLOW_PKT_MAP(tp_src) | MINIFLOW_PKT_MAP(tp_dst);
         }
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
-        map |= MINIFLOW_MAP(ipv6_src) | MINIFLOW_MAP(ipv6_dst)
-            | MINIFLOW_MAP(ipv6_label)
-            | MINIFLOW_MAP(nw_proto) | MINIFLOW_MAP(nw_frag)
-            | MINIFLOW_MAP(nw_tos) | MINIFLOW_MAP(nw_ttl);
+        map->pkt_map |= MINIFLOW_PKT_MAP(ipv6_src) | MINIFLOW_PKT_MAP(ipv6_dst)
+            | MINIFLOW_PKT_MAP(ipv6_label)
+            | MINIFLOW_PKT_MAP(nw_proto) | MINIFLOW_PKT_MAP(nw_frag)
+            | MINIFLOW_PKT_MAP(nw_tos) | MINIFLOW_PKT_MAP(nw_ttl);
         if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_ICMPV6)) {
-            map |= MINIFLOW_MAP(nd_target)
-                | MINIFLOW_MAP(arp_sha) | MINIFLOW_MAP(arp_tha);
+            map->pkt_map |= MINIFLOW_PKT_MAP(nd_target)
+                | MINIFLOW_PKT_MAP(arp_sha) | MINIFLOW_PKT_MAP(arp_tha);
         } else {
-            map |= MINIFLOW_MAP(tcp_flags)
-                | MINIFLOW_MAP(tp_src) | MINIFLOW_MAP(tp_dst);
+            map->pkt_map |= MINIFLOW_PKT_MAP(tcp_flags)
+                | MINIFLOW_PKT_MAP(tp_src) | MINIFLOW_PKT_MAP(tp_dst);
         }
     } else if (eth_type_mpls(flow->dl_type)) {
-        map |= MINIFLOW_MAP(mpls_lse);
+        map->pkt_map |= MINIFLOW_PKT_MAP(mpls_lse);
     } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
                flow->dl_type == htons(ETH_TYPE_RARP)) {
-        map |= MINIFLOW_MAP(nw_src) | MINIFLOW_MAP(nw_dst)
-            | MINIFLOW_MAP(nw_proto)
-            | MINIFLOW_MAP(arp_sha) | MINIFLOW_MAP(arp_tha);
+        map->pkt_map |= MINIFLOW_PKT_MAP(nw_src) | MINIFLOW_PKT_MAP(nw_dst)
+            | MINIFLOW_PKT_MAP(nw_proto)
+            | MINIFLOW_PKT_MAP(arp_sha) | MINIFLOW_PKT_MAP(arp_tha);
     }
-
-    return map;
 }
 
 /* Clear the metadata and register wildcard masks. They are not packet
@@ -1422,10 +1449,11 @@ miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
 
         /* Separate loops for better optimization. */
         if (dl_type == htons(ETH_TYPE_IPV6)) {
-            uint64_t map = MINIFLOW_MAP(ipv6_src) | MINIFLOW_MAP(ipv6_dst);
+            struct miniflow maps = { 0, MINIFLOW_PKT_MAP(ipv6_src)
+                                     | MINIFLOW_PKT_MAP(ipv6_dst) };
             uint64_t value;
 
-            MINIFLOW_FOR_EACH_IN_MAP(value, flow, map) {
+            MINIFLOW_FOR_EACH_IN_PKT_MAP(value, flow, maps) {
                 hash = hash_add64(hash, value);
             }
         } else {
@@ -2174,15 +2202,10 @@ flow_compose(struct dp_packet *p, const struct flow *flow)
 
 /* Compressed flow. */
 
-static int
-miniflow_n_values(const struct miniflow *flow)
-{
-    return count_1bits(flow->map);
-}
-
 /* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
- * the caller.  The caller must have already computed 'dst->map' properly
- * to indicate the significant uint64_t elements of 'src'.
+ * the caller.  The caller must have already computed 'dst->tnl_map' and
+ * 'dst->pkt_map' properly to indicate the significant uint64_t elements of
+ * 'src'.
  *
  * Normally the significant elements are the ones that are non-zero.  However,
  * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
@@ -2192,14 +2215,14 @@ miniflow_init(struct miniflow *dst, const struct flow *src)
 {
     const uint64_t *src_u64 = (const uint64_t *) src;
     uint64_t *dst_u64 = miniflow_values(dst);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, dst->map) {
+    MAPS_FOR_EACH_INDEX(idx, *dst) {
         *dst_u64++ = src_u64[idx];
     }
 }
 
-/* Initialize the map of 'flow' from 'src'. */
+/* Initialize the maps of 'flow' from 'src'. */
 void
 miniflow_map_init(struct miniflow *flow, const struct flow *src)
 {
@@ -2207,10 +2230,17 @@ miniflow_map_init(struct miniflow *flow, const struct flow *src)
     int i;
 
     /* Initialize map, counting the number of nonzero elements. */
-    flow->map = 0;
-    for (i = 0; i < FLOW_U64S; i++) {
+    flow->tnl_map = 0;
+    for (i = 0; i < FLOW_TNL_U64S; i++) {
+        if (src_u64[i]) {
+            flow->tnl_map |= UINT64_C(1) << i;
+        }
+    }
+    src_u64 += FLOW_TNL_U64S;
+    flow->pkt_map = 0;
+    for (i = 0; i < FLOW_U64S - FLOW_TNL_U64S; i++) {
         if (src_u64[i]) {
-            flow->map |= UINT64_C(1) << i;
+            flow->pkt_map |= UINT64_C(1) << i;
         }
     }
 }
@@ -2221,17 +2251,18 @@ miniflow_map_init(struct miniflow *flow, const struct flow *src)
 size_t
 miniflow_alloc(struct miniflow *dsts[], size_t n, const struct miniflow *src)
 {
-    size_t data_size = MINIFLOW_VALUES_SIZE(count_1bits(src->map));
-    size_t size = sizeof *src + data_size;
-    struct miniflow *dst = xmalloc(n * size);
+    size_t n_values = miniflow_n_values(src);
+    size_t data_size = MINIFLOW_VALUES_SIZE(n_values);
+    struct miniflow *dst = xmalloc(n * (sizeof *src + data_size));
     unsigned int i;
 
     COVERAGE_INC(miniflow_malloc);
 
     for (i = 0; i < n; i++) {
-        dst->map = src->map;
+        *dst = *src;   /* Copy maps. */
         dsts[i] = dst;
-        dst += size / sizeof *dst;
+        dst += 1;      /* Just past the maps. */
+        dst = (struct miniflow *)((uint64_t *)dst + n_values); /* Skip data. */
     }
     return data_size;
 }
@@ -2257,7 +2288,7 @@ void
 miniflow_clone(struct miniflow *dst, const struct miniflow *src,
                size_t n_values)
 {
-    dst->map = src->map;
+    *dst = *src;   /* Copy maps. */
     memcpy(miniflow_values(dst), miniflow_get_values(src),
            MINIFLOW_VALUES_SIZE(n_values));
 }
@@ -2277,17 +2308,26 @@ miniflow_equal(const struct miniflow *a, const struct miniflow *b)
     const uint64_t *ap = miniflow_get_values(a);
     const uint64_t *bp = miniflow_get_values(b);
 
-    if (OVS_LIKELY(a->map == b->map)) {
-        int count = miniflow_n_values(a);
-
-        return !memcmp(ap, bp, count * sizeof *ap);
+    if (OVS_LIKELY(a->tnl_map == b->tnl_map && a->pkt_map == b->pkt_map)) {
+        return !memcmp(ap, bp, miniflow_n_values(a) * sizeof *ap);
     } else {
         uint64_t map;
 
-        for (map = a->map | b->map; map; map = zero_rightmost_1bit(map)) {
+        map = a->tnl_map | b->tnl_map;
+        for (; map; map = zero_rightmost_1bit(map)) {
+            uint64_t bit = rightmost_1bit(map);
+
+            if ((a->tnl_map & bit ? *ap++ : 0)
+                != (b->tnl_map & bit ? *bp++ : 0)) {
+                return false;
+            }
+        }
+        map = a->pkt_map | b->pkt_map;
+        for (; map; map = zero_rightmost_1bit(map)) {
             uint64_t bit = rightmost_1bit(map);
 
-            if ((a->map & bit ? *ap++ : 0) != (b->map & bit ? *bp++ : 0)) {
+            if ((a->pkt_map & bit ? *ap++ : 0)
+                != (b->pkt_map & bit ? *bp++ : 0)) {
                 return false;
             }
         }
@@ -2303,9 +2343,9 @@ miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b,
                            const struct minimask *mask)
 {
     const uint64_t *p = miniflow_get_values(&mask->masks);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
+    MAPS_FOR_EACH_INDEX(idx, mask->masks) {
         if ((miniflow_get(a, idx) ^ miniflow_get(b, idx)) & *p++) {
             return false;
         }
@@ -2322,9 +2362,9 @@ miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
 {
     const uint64_t *b_u64 = (const uint64_t *) b;
     const uint64_t *p = miniflow_get_values(&mask->masks);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
+    MAPS_FOR_EACH_INDEX(idx, mask->masks) {
         if ((miniflow_get(a, idx) ^ b_u64[idx]) & *p++) {
             return false;
         }
@@ -2362,15 +2402,31 @@ minimask_combine(struct minimask *dst_,
     uint64_t *dst_values = storage;
     const struct miniflow *a = &a_->masks;
     const struct miniflow *b = &b_->masks;
-    int idx;
+    const uint64_t *ap = miniflow_get_values(a);
+    const uint64_t *bp = miniflow_get_values(b);
+    size_t idx;
 
-    dst->map = 0;
-    MAP_FOR_EACH_INDEX(idx, a->map & b->map) {
+    dst->tnl_map = 0;
+    MAP_FOR_EACH_INDEX(idx, a->tnl_map & b->tnl_map) {
         /* Both 'a' and 'b' have non-zero data at 'idx'. */
-        uint64_t mask = miniflow_get__(a, idx) & miniflow_get__(b, idx);
+        uint64_t mask = *miniflow_values_get__(ap, a->tnl_map, idx)
+            & *miniflow_values_get__(bp, b->tnl_map, idx);
 
         if (mask) {
-            dst->map |= UINT64_C(1) << idx;
+            dst->tnl_map |= UINT64_C(1) << idx;
+            *dst_values++ = mask;
+        }
+    }
+    dst->pkt_map = 0;
+    ap += count_1bits(a->tnl_map);   /* Skip tnl_map values. */
+    bp += count_1bits(b->tnl_map);   /* Skip tnl_map values. */
+    MAP_FOR_EACH_INDEX(idx, a->pkt_map & b->pkt_map) {
+        /* Both 'a' and 'b' have non-zero data at 'idx'. */
+        uint64_t mask = *miniflow_values_get__(ap, a->pkt_map, idx)
+            & *miniflow_values_get__(bp, b->pkt_map, idx);
+
+        if (mask) {
+            dst->pkt_map |= UINT64_C(1) << idx;
             *dst_values++ = mask;
         }
     }
@@ -2389,9 +2445,10 @@ minimask_expand(const struct minimask *mask, struct flow_wildcards *wc)
 bool
 minimask_equal(const struct minimask *a, const struct minimask *b)
 {
-    return a->masks.map == b->masks.map &&
+    return a->masks.tnl_map == b->masks.tnl_map
+        && a->masks.pkt_map == b->masks.pkt_map &&
         !memcmp(miniflow_get_values(&a->masks), miniflow_get_values(&b->masks),
-                MINIFLOW_VALUES_SIZE(count_1bits(a->masks.map)));
+                MINIFLOW_VALUES_SIZE(miniflow_n_values(&a->masks)));
 }
 
 /* Returns true if at least one bit matched by 'b' is wildcarded by 'a',
@@ -2401,15 +2458,25 @@ minimask_has_extra(const struct minimask *a, const struct minimask *b)
 {
     const uint64_t *ap = miniflow_get_values(&a->masks);
     const uint64_t *bp = miniflow_get_values(&b->masks);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, b->masks.map) {
+    MAP_FOR_EACH_INDEX(idx, b->masks.tnl_map) {
         uint64_t b_u64 = *bp++;
 
         /* 'b_u64' is non-zero, check if the data in 'a' is either zero
          * or misses some of the bits in 'b_u64'. */
-        if (!(a->masks.map & (UINT64_C(1) << idx))
-            || ((miniflow_values_get__(ap, a->masks.map, idx) & b_u64)
+        if (!(a->masks.tnl_map & (UINT64_C(1) << idx))
+            || ((*miniflow_values_get__(ap, a->masks.tnl_map, idx) & b_u64)
+                != b_u64)) {
+            return true; /* 'a' wildcards some bits 'b' doesn't. */
+        }
+    }
+    ap += count_1bits(a->masks.tnl_map);   /* Skip tnl_map values. */
+    MAP_FOR_EACH_INDEX(idx, b->masks.pkt_map) {
+        uint64_t b_u64 = *bp++;
+
+        if (!(a->masks.pkt_map & (UINT64_C(1) << idx))
+            || ((*miniflow_values_get__(ap, a->masks.pkt_map, idx) & b_u64)
                 != b_u64)) {
             return true; /* 'a' wildcards some bits 'b' doesn't. */
         }
diff --git a/lib/flow.h b/lib/flow.h
index 8a63af1..85a9792 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -149,6 +149,9 @@ struct flow {
                                  * Keep last for BUILD_ASSERT_DECL below. */
 };
 BUILD_ASSERT_DECL(sizeof(struct flow) % sizeof(uint64_t) == 0);
+BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0);
+/* Number of uint64_t units in flow tunnel metadata. */
+#define FLOW_TNL_U64S (sizeof(struct flow_tnl) / sizeof(uint64_t))
 
 #define FLOW_U64S (sizeof(struct flow) / sizeof(uint64_t))
 
@@ -329,7 +332,6 @@ void flow_wildcards_init_catchall(struct flow_wildcards *);
 
 void flow_wildcards_init_for_packet(struct flow_wildcards *,
                                     const struct flow *);
-uint64_t flow_wc_map(const struct flow *);
 
 void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *);
 
@@ -374,8 +376,11 @@ bool flow_equal_except(const struct flow *a, const struct flow *b,
 
 /* Compressed flow. */
 
-/* Maximum number of 64-bit words supported. */
-BUILD_ASSERT_DECL(FLOW_U64S <= 64);
+/* Check that all tunnel fields fit into a single map. */
+BUILD_ASSERT_DECL(FLOW_TNL_U64S <= 64);
+
+/* Check that all non-tunnel fields fit into a single map. */
+BUILD_ASSERT_DECL(FLOW_U64S - FLOW_TNL_U64S <= 64);
 
 /* A sparse representation of a "struct flow".
  *
@@ -384,27 +389,29 @@ BUILD_ASSERT_DECL(FLOW_U64S <= 64);
  * importantly, minimizes the number of accessed cache lines.  Second, it saves
  * time when the goal is to iterate over only the nonzero parts of the struct.
  *
- * The 'map' member holds one bit for each uint64_t in a "struct flow".  Each
+ * The map members hold one bit for each uint64_t in a "struct flow".  Each
  * 0-bit indicates that the corresponding uint64_t is zero, each 1-bit that it
  * *may* be nonzero (see below how this applies to minimasks).
  *
- * The values indicated by 'map' always follow the 'map' in memory.  The user
- * of the miniflow is responsible for always having enough storage after 'map'
- * corresponding to the number of 1-bits in 'map'.
-
+ * The values indicated by 'tnl_map' and 'pkt_map' always follow the 'map' in
+ * memory.  The user of the miniflow is responsible for always having enough
+ * storage after the struct miniflow corresponding to the number of 1-bits in
+ * maps.
+ *
  * Elements in values array are allowed to be zero.  This is useful for "struct
  * minimatch", for which ensuring that the miniflow and minimask members have
- * same 'map' allows optimization.  This allowance applies only to a miniflow
+ * same maps allows optimization.  This allowance applies only to a miniflow
  * that is not a mask.  That is, a minimask may NOT have zero elements in its
- * 'values'.
+ * values.
  *
- * A miniflow is always dynamically allocated so that the 'values' array has as
- * many elements as there are 1-bits in 'map'. */
+ * A miniflow is always dynamically allocated so that the maps are followed by
+ * at least as many elements as there are 1-bits in maps. */
 struct miniflow {
-    uint64_t map;
+    uint64_t tnl_map;
+    uint64_t pkt_map;
     /* uint64_t values[];   Storage follows 'map' in memory. */
 };
-BUILD_ASSERT_DECL(sizeof(struct miniflow) == sizeof(uint64_t));
+BUILD_ASSERT_DECL(sizeof(struct miniflow) == 2 * sizeof(uint64_t));
 
 #define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint64_t))
 
@@ -425,6 +432,7 @@ struct pkt_metadata;
  * were extracted. */
 void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
 void miniflow_map_init(struct miniflow *, const struct flow *);
+void flow_wc_map(const struct flow *, struct miniflow *);
 size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
                       const struct miniflow *src);
 void miniflow_init(struct miniflow *, const struct flow *);
@@ -436,29 +444,56 @@ void miniflow_expand(const struct miniflow *, struct flow *);
 
 static inline uint64_t flow_u64_value(const struct flow *flow, size_t index)
 {
-    return ((uint64_t *)(flow))[index];
+    return ((uint64_t *)flow)[index];
 }
 
 static inline uint64_t *flow_u64_lvalue(struct flow *flow, size_t index)
 {
-    return &((uint64_t *)(flow))[index];
+    return &((uint64_t *)flow)[index];
+}
+
+static inline size_t
+miniflow_n_values(const struct miniflow *flow)
+{
+    return count_1bits(flow->tnl_map) + count_1bits(flow->pkt_map);
+}
+
+struct flow_for_each_in_maps_aux {
+    const uint64_t *values;
+    struct miniflow maps;
+};
+
+static inline uint64_t
+flow_values_get_next_in_map(const uint64_t *values, uint64_t *map)
+{
+    uint64_t value = values[raw_ctz(*map)];
+
+    *map = zero_rightmost_1bit(*map);
+
+    return value;
 }
 
 static inline bool
-flow_get_next_in_map(const struct flow *flow, uint64_t map, uint64_t *value)
+flow_values_get_next_in_maps(struct flow_for_each_in_maps_aux *aux,
+                             uint64_t *value)
 {
-    if (map) {
-        *value = flow_u64_value(flow, raw_ctz(map));
+    if (aux->maps.tnl_map) {
+        *value = flow_values_get_next_in_map(aux->values, &aux->maps.tnl_map);
+        return true;
+    }
+    if (aux->maps.pkt_map) {
+        *value = flow_values_get_next_in_map(aux->values + FLOW_TNL_U64S,
+                                             &aux->maps.pkt_map);
         return true;
     }
     return false;
 }
 
-/* Iterate through all flow u64 values specified by 'MAP'. */
-#define FLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)         \
-    for (uint64_t map__ = (MAP);                       \
-         flow_get_next_in_map(FLOW, map__, &(VALUE));  \
-         map__ = zero_rightmost_1bit(map__))
+/* Iterate through all flow tunnel u64 values specified by 'MAPS'. */
+#define FLOW_FOR_EACH_IN_MAPS(VALUE, FLOW, MAPS)            \
+    for (struct flow_for_each_in_maps_aux aux__             \
+             = { (const uint64_t *)(FLOW), (MAPS) };        \
+         flow_values_get_next_in_maps(&aux__, &(VALUE));)
 
 /* Iterate through all struct flow u64 indices specified by 'MAP'. */
 #define MAP_FOR_EACH_INDEX(U64IDX, MAP)                 \
@@ -466,12 +501,27 @@ flow_get_next_in_map(const struct flow *flow, uint64_t map, uint64_t *value)
          map__ && ((U64IDX) = raw_ctz(map__), true);    \
          map__ = zero_rightmost_1bit(map__))
 
+/* Iterate through all struct flow u64 indices specified by 'MAPS'. */
+#define MAPS_FOR_EACH_INDEX(U64IDX, MAPS)                               \
+    for (struct miniflow maps__ = (MAPS);                               \
+         maps__.tnl_map                                                 \
+             ? ((U64IDX) = raw_ctz(maps__.tnl_map),                     \
+                maps__.tnl_map = zero_rightmost_1bit(maps__.tnl_map),   \
+                true)                                                   \
+             : (maps__.pkt_map &&                                       \
+                ((U64IDX) = FLOW_TNL_U64S + raw_ctz(maps__.pkt_map),    \
+                 maps__.pkt_map = zero_rightmost_1bit(maps__.pkt_map),  \
+                 true));)
+
 #define FLOW_U64_SIZE(FIELD)                                            \
     DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t))
 
-#define MINIFLOW_MAP(FIELD)                       \
-    (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)  \
+#define MINIFLOW_TNL_MAP(FIELD)                                         \
+    (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)                        \
      << (offsetof(struct flow, FIELD) / sizeof(uint64_t)))
+#define MINIFLOW_PKT_MAP(FIELD)                                         \
+    (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)                        \
+     << ((offsetof(struct flow, FIELD) / sizeof(uint64_t)) - FLOW_TNL_U64S))
 
 struct mf_for_each_in_map_aux {
     const uint64_t *values;
@@ -480,64 +530,76 @@ struct mf_for_each_in_map_aux {
 };
 
 static inline bool
-mf_get_next_in_map(struct mf_for_each_in_map_aux *aux, uint64_t *value)
+mf_get_next_in_map(struct mf_for_each_in_map_aux *aux,
+                   uint64_t *value)
 {
     if (aux->map) {
         uint64_t rm1bit = rightmost_1bit(aux->map);
+
         aux->map -= rm1bit;
 
         if (aux->fmap & rm1bit) {
-            /* Advance 'aux->values' to point to the value for 'rm1bit'. */
             uint64_t trash = aux->fmap & (rm1bit - 1);
-            if (trash) {
-                aux->fmap -= trash;
-                aux->values += count_1bits(trash);
-            }
-
-            /* Retrieve the value for 'rm1bit' then advance past it. */
-            aux->fmap -= rm1bit;
-            *value = *aux->values++;
+
+            aux->fmap -= trash;
+            /* count_1bits() is fast for systems where speed matters (e.g.,
+             * DPDK), so we don't try avoid using it.
+             * Advance 'aux->values' to point to the value for 'rm1bit'. */
+            aux->values += count_1bits(trash);
+
+            *value = *aux->values;
         } else {
             *value = 0;
         }
         return true;
-    } else {
-        return false;
     }
+    return false;
 }
 
-/* Iterate through all miniflow u64 values specified by 'MAP'. */
-#define MINIFLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)           \
-    for (struct mf_for_each_in_map_aux aux__                 \
-             = { miniflow_get_values(FLOW), (FLOW)->map, MAP }; \
-         mf_get_next_in_map(&aux__, &(VALUE));               \
-        )
+/* Iterate through miniflow TNL u64 values specified by 'MAPS'. */
+#define MINIFLOW_FOR_EACH_IN_TNL_MAP(VALUE, FLOW, MAPS)                 \
+    for (struct mf_for_each_in_map_aux aux__ =                          \
+        { miniflow_get_values(FLOW), (FLOW)->tnl_map, (MAPS).tnl_map }; \
+         mf_get_next_in_map(&aux__, &(VALUE));)
+
+/* Iterate through miniflow PKT u64 values specified by 'MAPS'. */
+#define MINIFLOW_FOR_EACH_IN_PKT_MAP(VALUE, FLOW, MAPS)             \
+    for (struct mf_for_each_in_map_aux aux__ =                      \
+        { miniflow_get_values(FLOW) + count_1bits((FLOW)->tnl_map), \
+                (FLOW)->pkt_map, (MAPS).pkt_map };                  \
+         mf_get_next_in_map(&aux__, &(VALUE));)
 
 /* This can be used when it is known that 'u64_idx' is set in 'map'. */
-static inline uint64_t
-miniflow_values_get__(const uint64_t *values, uint64_t map, int u64_idx)
+static inline const uint64_t *
+miniflow_values_get__(const uint64_t *values, uint64_t map, size_t u64_idx)
 {
-    return values[count_1bits(map & ((UINT64_C(1) << u64_idx) - 1))];
+    return values + count_1bits(map & ((UINT64_C(1) << u64_idx) - 1));
 }
 
 /* This can be used when it is known that 'u64_idx' is set in
  * the map of 'mf'. */
-static inline uint64_t
-miniflow_get__(const struct miniflow *mf, int u64_idx)
+static inline const uint64_t *
+miniflow_get__(const struct miniflow *mf, size_t u64_idx)
 {
-    return miniflow_values_get__(miniflow_get_values(mf), mf->map, u64_idx);
+    return OVS_LIKELY(u64_idx >= FLOW_TNL_U64S)
+        ? miniflow_values_get__(miniflow_get_values(mf)
+                                + count_1bits(mf->tnl_map),
+                                mf->pkt_map, u64_idx - FLOW_TNL_U64S)
+        : miniflow_values_get__(miniflow_get_values(mf), mf->tnl_map, u64_idx);
 }
 
+#define MINIFLOW_IN_MAP(MF, U64_IDX)                            \
+    (OVS_LIKELY(U64_IDX >= FLOW_TNL_U64S)                           \
+     ? (MF)->pkt_map & (UINT64_C(1) << ((U64_IDX) - FLOW_TNL_U64S)) \
+     : (MF)->tnl_map & (UINT64_C(1) << (U64_IDX)))
+
 /* Get the value of 'FIELD' of an up to 8 byte wide integer type 'TYPE' of
  * a miniflow. */
 #define MINIFLOW_GET_TYPE(MF, TYPE, OFS)                                \
-    (((MF)->map & (UINT64_C(1) << (OFS) / sizeof(uint64_t)))            \
-     ? ((OVS_FORCE const TYPE *)                                        \
-        (miniflow_get_values(MF)                                        \
-         + count_1bits((MF)->map &                                      \
-                       ((UINT64_C(1) << (OFS) / sizeof(uint64_t)) - 1)))) \
+    (MINIFLOW_IN_MAP(MF, (OFS) / sizeof(uint64_t))                      \
+     ? ((OVS_FORCE const TYPE *)miniflow_get__(MF, (OFS) / sizeof(uint64_t))) \
      [(OFS) % sizeof(uint64_t) / sizeof(TYPE)]                          \
-     : 0)                                                               \
+     : 0)
 
 #define MINIFLOW_GET_U8(FLOW, FIELD)                                \
     MINIFLOW_GET_TYPE(FLOW, uint8_t, offsetof(struct flow, FIELD))
@@ -613,7 +675,7 @@ minimask_is_catchall(const struct minimask *mask)
     /* For every 1-bit in mask's map, the corresponding value is non-zero,
      * so the only way the mask can not fix any bits or fields is for the
      * map the be zero. */
-    return mask->masks.map == 0;
+    return mask->masks.tnl_map == 0 && mask->masks.pkt_map == 0;
 }
 
 /* Returns the uint64_t that would be at byte offset '8 * u64_ofs' if 'flow'
@@ -621,8 +683,8 @@ minimask_is_catchall(const struct minimask *mask)
 static inline uint64_t miniflow_get(const struct miniflow *flow,
                                     unsigned int u64_ofs)
 {
-    return flow->map & (UINT64_C(1) << u64_ofs)
-        ? miniflow_get__(flow, u64_ofs) : 0;
+    return MINIFLOW_IN_MAP(flow, u64_ofs)
+        ? *miniflow_get__(flow, u64_ofs) : 0;
 }
 
 static inline uint32_t miniflow_get_u32(const struct miniflow *flow,
@@ -707,9 +769,13 @@ flow_union_with_miniflow(struct flow *dst, const struct miniflow *src)
 {
     uint64_t *dst_u64 = (uint64_t *) dst;
     const uint64_t *p = miniflow_get_values(src);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, src->map) {
+    MAP_FOR_EACH_INDEX(idx, src->tnl_map) {
+        dst_u64[idx] |= *p++;
+    }
+    dst_u64 += FLOW_TNL_U64S;
+    MAP_FOR_EACH_INDEX(idx, src->pkt_map) {
         dst_u64[idx] |= *p++;
     }
 }
diff --git a/lib/match.c b/lib/match.c
index 1585a1f..5175868 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -1259,9 +1259,9 @@ minimatch_matches_flow(const struct minimatch *match,
     const uint64_t *target_u64 = (const uint64_t *) target;
     const uint64_t *flowp = miniflow_get_values(match->flow);
     const uint64_t *maskp = miniflow_get_values(&match->mask->masks);
-    int idx;
+    size_t idx;
 
-    MAP_FOR_EACH_INDEX(idx, match->flow->map) {
+    MAPS_FOR_EACH_INDEX(idx, *match->flow) {
         if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
             return false;
         }
-- 
1.7.10.4




More information about the dev mailing list