[ovs-dev] [PATCH 2/2] miniflow: Use 64-bit data.

Madhu Challa challa at noironetworks.com
Wed Dec 17 20:07:06 UTC 2014


Jarno,

I did some simple tests increasing flow_tnl size and trying to match on it
looks good. Thanks for solving this. I have a question inline around
miniflow_extract. Other than that the diff looks good to me.

Thanks.

On Wed, Dec 17, 2014 at 10:30 AM, Jarno Rajahalme <jrajahalme at nicira.com>
wrote:
>
> So far the compressed flow data in struct miniflow has been in 32-bit
> words with a 63-bit map, allowing for a maximum size of struct flow of
> 252 bytes.  With the forthcoming Geneve options this is not sufficient
> any more.
>
> This patch solves the problem by changing the miniflow data to 64-bit
> words, doubling the flow max size to 504 bytes.  Since the word size
> is doubled, there is some loss in compression efficiency.  To counter
> this some of the flow fields have been reordered to keep related
> fields together (e.g., the source and destination IP addresses share
> the same 64-bit word).
>
> This change should speed up flow data processing on 64-bit CPUs, which
> may help counterbalance the impact of making the struct flow bigger in
> the future.
>
> Classifier lookup stage boundaries are also changed to 64-bit
> alignment, as the current algorithm depends on each miniflow word to
> not be split between ranges.  This has resulted in new padding (part
> of the 'mpls_lse' field).
>
> The 'dp_hash' field is also moved to packet metadata to eliminate
> otherwise needed padding there.  This allows the L4 to fit into one
> 64-bit word, and also makes matches on 'dp_hash' more efficient as
> misses can be found already on stage 1.
>
> Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
> Summary:
> ---
>  lib/classifier-private.h      |   60 +++---
>  lib/classifier.c              |   72 ++++----
>  lib/dpif-netdev.c             |   48 ++---
>  lib/flow.c                    |  402
> +++++++++++++++++++++++------------------
>  lib/flow.h                    |  270 +++++++++++++++------------
>  lib/match.c                   |   10 +-
>  lib/nx-match.c                |    2 +-
>  lib/odp-util.h                |    2 +-
>  lib/ofp-util.c                |    2 +-
>  lib/tnl-ports.c               |    2 +-
>  ofproto/ofproto-dpif-upcall.c |   10 +-
>  ofproto/ofproto-dpif-xlate.c  |    2 +-
>  ofproto/ofproto.c             |    2 +-
>  tests/ofproto-dpif.at         |    2 +-
>  tests/test-classifier.c       |   23 +--
>  15 files changed, 499 insertions(+), 410 deletions(-)
>
> diff --git a/lib/classifier-private.h b/lib/classifier-private.h
> index 17eed2c..cd64fed 100644
> --- a/lib/classifier-private.h
> +++ b/lib/classifier-private.h
> @@ -42,7 +42,7 @@ struct cls_subtable {
>      /* These fields are accessed by readers who care about wildcarding. */
>      const tag_type tag;       /* Tag generated from mask for
> partitioning. */
>      const uint8_t n_indices;                   /* How many indices to
> use. */
> -    const uint8_t index_ofs[CLS_MAX_INDICES];  /* u32 segment boundaries.
> */
> +    const uint8_t index_ofs[CLS_MAX_INDICES];  /* u64 segment boundaries.
> */
>      unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in
> 'mask'
>                                               * (runtime configurable). */
>      const int ports_mask_len;
> @@ -112,7 +112,7 @@ miniflow_get_map_in_range(const struct miniflow
> *miniflow,
>          *offset = count_1bits(map & msk);
>          map &= ~msk;
>      }
> -    if (end < FLOW_U32S) {
> +    if (end < FLOW_U64S) {
>          uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */
>          map &= msk;
>      }
> @@ -128,18 +128,18 @@ static inline uint32_t
>  flow_hash_in_minimask(const struct flow *flow, const struct minimask
> *mask,
>                        uint32_t basis)
>  {
> -    const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
> -    const uint32_t *flow_u32 = (const uint32_t *)flow;
> -    const uint32_t *p = mask_values;
> +    const uint64_t *mask_values = miniflow_get_values(&mask->masks);
> +    const uint64_t *flow_u64 = (const uint64_t *)flow;
> +    const uint64_t *p = mask_values;
>      uint32_t hash;
>      int idx;
>
>      hash = basis;
>      MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
> -        hash = hash_add(hash, flow_u32[idx] & *p++);
> +        hash = hash_add64(hash, flow_u64[idx] & *p++);
>      }
>
> -    return hash_finish(hash, (p - mask_values) * 4);
> +    return hash_finish(hash, (p - mask_values) * 8);
>  }
>
>  /* Returns a hash value for the bits of 'flow' where there are 1-bits in
> @@ -151,16 +151,16 @@ static inline uint32_t
>  miniflow_hash_in_minimask(const struct miniflow *flow,
>                            const struct minimask *mask, uint32_t basis)
>  {
> -    const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
> -    const uint32_t *p = mask_values;
> +    const uint64_t *mask_values = miniflow_get_values(&mask->masks);
> +    const uint64_t *p = mask_values;
>      uint32_t hash = basis;
> -    uint32_t flow_u32;
> +    uint64_t flow_u64;
>
> -    MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) {
> -        hash = hash_add(hash, flow_u32 & *p++);
> +    MINIFLOW_FOR_EACH_IN_MAP(flow_u64, flow, mask->masks.map) {
> +        hash = hash_add64(hash, flow_u64 & *p++);
>      }
>
> -    return hash_finish(hash, (p - mask_values) * 4);
> +    return hash_finish(hash, (p - mask_values) * 8);
>  }
>
>  /* Returns a hash value for the bits of range [start, end) in 'flow',
> @@ -173,22 +173,22 @@ flow_hash_in_minimask_range(const struct flow *flow,
>                              const struct minimask *mask,
>                              uint8_t start, uint8_t end, uint32_t *basis)
>  {
> -    const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
> -    const uint32_t *flow_u32 = (const uint32_t *)flow;
> +    const uint64_t *mask_values = miniflow_get_values(&mask->masks);
> +    const uint64_t *flow_u64 = (const uint64_t *)flow;
>      unsigned int offset;
>      uint64_t map;
> -    const uint32_t *p;
> +    const uint64_t *p;
>      uint32_t hash = *basis;
>      int idx;
>
>      map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
>      p = mask_values + offset;
>      MAP_FOR_EACH_INDEX(idx, map) {
> -        hash = hash_add(hash, flow_u32[idx] & *p++);
> +        hash = hash_add64(hash, flow_u64[idx] & *p++);
>      }
>
>      *basis = hash; /* Allow continuation from the unfinished value. */
> -    return hash_finish(hash, (p - mask_values) * 4);
> +    return hash_finish(hash, (p - mask_values) * 8);
>  }
>
>  /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
> @@ -206,16 +206,16 @@ flow_wildcards_fold_minimask_range(struct
> flow_wildcards *wc,
>                                     const struct minimask *mask,
>                                     uint8_t start, uint8_t end)
>  {
> -    uint32_t *dst_u32 = (uint32_t *)&wc->masks;
> +    uint64_t *dst_u64 = (uint64_t *)&wc->masks;
>      unsigned int offset;
>      uint64_t map;
> -    const uint32_t *p;
> +    const uint64_t *p;
>      int idx;
>
>      map = miniflow_get_map_in_range(&mask->masks, start, end, &offset);
> -    p = miniflow_get_u32_values(&mask->masks) + offset;
> +    p = miniflow_get_values(&mask->masks) + offset;
>      MAP_FOR_EACH_INDEX(idx, map) {
> -        dst_u32[idx] |= *p++;
> +        dst_u64[idx] |= *p++;
>      }
>  }
>
> @@ -223,15 +223,15 @@ flow_wildcards_fold_minimask_range(struct
> flow_wildcards *wc,
>  static inline uint32_t
>  miniflow_hash(const struct miniflow *flow, uint32_t basis)
>  {
> -    const uint32_t *values = miniflow_get_u32_values(flow);
> -    const uint32_t *p = values;
> +    const uint64_t *values = miniflow_get_values(flow);
> +    const uint64_t *p = values;
>      uint32_t hash = basis;
>      uint64_t hash_map = 0;
>      uint64_t map;
>
>      for (map = flow->map; map; map = zero_rightmost_1bit(map)) {
>          if (*p) {
> -            hash = hash_add(hash, *p);
> +            hash = hash_add64(hash, *p);
>              hash_map |= rightmost_1bit(map);
>          }
>          p++;
> @@ -265,20 +265,20 @@ minimatch_hash_range(const struct minimatch *match,
> uint8_t start, uint8_t end,
>                       uint32_t *basis)
>  {
>      unsigned int offset;
> -    const uint32_t *p, *q;
> +    const uint64_t *p, *q;
>      uint32_t hash = *basis;
>      int n, i;
>
>      n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start,
> end,
>                                                &offset));
> -    q = miniflow_get_u32_values(&match->mask.masks) + offset;
> -    p = miniflow_get_u32_values(&match->flow) + offset;
> +    q = miniflow_get_values(&match->mask.masks) + offset;
> +    p = miniflow_get_values(&match->flow) + offset;
>
>      for (i = 0; i < n; i++) {
> -        hash = hash_add(hash, p[i] & q[i]);
> +        hash = hash_add64(hash, p[i] & q[i]);
>      }
>      *basis = hash; /* Allow continuation from the unfinished value. */
> -    return hash_finish(hash, (offset + n) * 4);
> +    return hash_finish(hash, (offset + n) * 8);
>  }
>
>  #endif
> diff --git a/lib/classifier.c b/lib/classifier.c
> index bbc5a4a..0ac5356 100644
> --- a/lib/classifier.c
> +++ b/lib/classifier.c
> @@ -34,6 +34,8 @@ struct trie_ctx;
>  /* Ports trie depends on both ports sharing the same ovs_be32. */
>  #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
>  BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
> +BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
> +#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)
>
>  static struct cls_match *
>  cls_match_alloc(const struct cls_rule *rule)
> @@ -240,7 +242,7 @@ classifier_init(struct classifier *cls, const uint8_t
> *flow_segments)
>      cls->n_flow_segments = 0;
>      if (flow_segments) {
>          while (cls->n_flow_segments < CLS_MAX_INDICES
> -               && *flow_segments < FLOW_U32S) {
> +               && *flow_segments < FLOW_U64S) {
>              cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
>          }
>      }
> @@ -409,10 +411,9 @@ classifier_count(const struct classifier *cls)
>  }
>
>  static uint32_t
> -hash_metadata(ovs_be64 metadata_)
> +hash_metadata(ovs_be64 metadata)
>  {
> -    uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
> -    return hash_uint64(metadata);
> +    return hash_uint64((OVS_FORCE uint64_t) metadata);
>  }
>
>  static struct cls_partition *
> @@ -491,7 +492,7 @@ classifier_replace(struct classifier *cls, const
> struct cls_rule *rule)
>      struct cls_match *new = cls_match_alloc(rule);
>      struct cls_subtable *subtable;
>      uint32_t ihash[CLS_MAX_INDICES];
> -    uint8_t prev_be32ofs = 0;
> +    uint8_t prev_be64ofs = 0;
>      struct cls_match *head;
>      size_t n_rules = 0;
>      uint32_t basis;
> @@ -508,11 +509,11 @@ classifier_replace(struct classifier *cls, const
> struct cls_rule *rule)
>      /* Compute hashes in segments. */
>      basis = 0;
>      for (i = 0; i < subtable->n_indices; i++) {
> -        ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
> +        ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
>                                          subtable->index_ofs[i], &basis);
> -        prev_be32ofs = subtable->index_ofs[i];
> +        prev_be64ofs = subtable->index_ofs[i];
>      }
> -    hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S,
> &basis);
> +    hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S,
> &basis);
>
>      head = find_equal(subtable, &rule->match.flow, hash);
>      if (!head) {
> @@ -674,7 +675,7 @@ classifier_remove(struct classifier *cls, const struct
> cls_rule *rule)
>      struct cls_match *next;
>      int i;
>      uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
> -    uint8_t prev_be32ofs = 0;
> +    uint8_t prev_be64ofs = 0;
>      size_t n_rules;
>
>      cls_match = rule->cls_match;
> @@ -704,11 +705,11 @@ classifier_remove(struct classifier *cls, const
> struct cls_rule *rule)
>      ovs_assert(subtable);
>
>      for (i = 0; i < subtable->n_indices; i++) {
> -        ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
> +        ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
>                                          subtable->index_ofs[i], &basis);
> -        prev_be32ofs = subtable->index_ofs[i];
> +        prev_be64ofs = subtable->index_ofs[i];
>      }
> -    hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S,
> &basis);
> +    hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S,
> &basis);
>
>      /* Head rule.  Check if 'next' is an identical, lower-priority rule
> that
>       * will replace 'rule' in the data structures. */
> @@ -943,7 +944,7 @@ classifier_rule_overlaps(const struct classifier *cls,
>      /* Iterate subtables in the descending max priority order. */
>      PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2,
>                                 sizeof(struct cls_subtable),
> &cls->subtables) {
> -        uint32_t storage[FLOW_U32S];
> +        uint64_t storage[FLOW_U64S];
>          struct minimask mask;
>          const struct cls_rule *rule;
>
> @@ -1148,7 +1149,7 @@ insert_subtable(struct classifier *cls, const struct
> minimask *mask)
>      /* Check if the rest of the subtable's mask adds any bits,
>       * and remove the last index if it doesn't. */
>      if (index > 0) {
> -        flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S);
> +        flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U64S);
>          if (flow_wildcards_equal(&new, &old)) {
>              --index;
>              *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0;
> @@ -1227,9 +1228,10 @@ check_tries(struct trie_ctx
> trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
>          if (field_plen[j]) {
>              struct trie_ctx *ctx = &trie_ctx[j];
>              uint8_t be32ofs = ctx->be32ofs;
> +            uint8_t be64ofs = be32ofs / 2;
>
>              /* Is the trie field within the current range of fields? */
> -            if (be32ofs >= ofs.start && be32ofs < ofs.end) {
> +            if (be64ofs >= ofs.start && be64ofs < ofs.end) {
>                  /* On-demand trie lookup. */
>                  if (!ctx->lookup_done) {
>                      memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
> @@ -1281,12 +1283,12 @@ miniflow_and_mask_matches_flow(const struct
> miniflow *flow,
>                                 const struct minimask *mask,
>                                 const struct flow *target)
>  {
> -    const uint32_t *flowp = miniflow_get_u32_values(flow);
> -    const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
> +    const uint64_t *flowp = miniflow_get_values(flow);
> +    const uint64_t *maskp = miniflow_get_values(&mask->masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
> -        uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) &
> *maskp++;
> +        uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) &
> *maskp++;
>
>          if (diff) {
>              return false;
> @@ -1324,26 +1326,26 @@ miniflow_and_mask_matches_flow_wc(const struct
> miniflow *flow,
>                                    const struct flow *target,
>                                    struct flow_wildcards *wc)
>  {
> -    const uint32_t *flowp = miniflow_get_u32_values(flow);
> -    const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
> +    const uint64_t *flowp = miniflow_get_values(flow);
> +    const uint64_t *maskp = miniflow_get_values(&mask->masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
> -        uint32_t mask = *maskp++;
> -        uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask;
> +        uint64_t mask = *maskp++;
> +        uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask;
>
>          if (diff) {
>              /* Only unwildcard if none of the differing bits is already
>               * exact-matched. */
> -            if (!(flow_u32_value(&wc->masks, idx) & diff)) {
> +            if (!(flow_u64_value(&wc->masks, idx) & diff)) {
>                  /* Keep one bit of the difference.  The selected bit may
> be
>                   * different in big-endian v.s. little-endian systems. */
> -                *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
> +                *flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
>              }
>              return false;
>          }
>          /* Fill in the bits that were looked at. */
> -        *flow_u32_lvalue(&wc->masks, idx) |= mask;
> +        *flow_u64_lvalue(&wc->masks, idx) |= mask;
>      }
>
>      return true;
> @@ -1413,7 +1415,7 @@ find_match_wc(const struct cls_subtable *subtable,
> const struct flow *flow,
>          }
>          ofs.start = ofs.end;
>      }
> -    ofs.end = FLOW_U32S;
> +    ofs.end = FLOW_U64S;
>      /* Trie check for the final range. */
>      if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow,
> wc)) {
>          fill_range_wc(subtable, wc, ofs.start);
> @@ -1438,7 +1440,7 @@ find_match_wc(const struct cls_subtable *subtable,
> const struct flow *flow,
>
>          /* Unwildcard all bits in the mask upto the ports, as they were
> used
>           * to determine there is no match. */
> -        fill_range_wc(subtable, wc, TP_PORTS_OFS32);
> +        fill_range_wc(subtable, wc, TP_PORTS_OFS64);
>          return NULL;
>      }
>
> @@ -1727,12 +1729,11 @@ minimask_get_prefix_len(const struct minimask
> *minimask,
>                          const struct mf_field *mf)
>  {
>      unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask
> seen. */
> -    uint8_t u32_ofs = mf->flow_be32ofs;
> -    uint8_t u32_end = u32_ofs + mf->n_bytes / 4;
> +    uint8_t be32_ofs = mf->flow_be32ofs;
> +    uint8_t be32_end = be32_ofs + mf->n_bytes / 4;
>
> -    for (; u32_ofs < u32_end; ++u32_ofs) {
> -        uint32_t mask;
> -        mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs));
> +    for (; be32_ofs < be32_end; ++be32_ofs) {
> +        uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs));
>
>          /* Validate mask, count the mask length. */
>          if (mask_tz) {
> @@ -1760,8 +1761,11 @@ minimask_get_prefix_len(const struct minimask
> *minimask,
>  static const ovs_be32 *
>  minimatch_get_prefix(const struct minimatch *match, const struct mf_field
> *mf)
>  {
> -    return miniflow_get_be32_values(&match->flow) +
> -        count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs)
> - 1));
> +    return (OVS_FORCE const ovs_be32 *)
> +        (miniflow_get_values(&match->flow)
> +         + count_1bits(match->flow.map &
> +                       ((UINT64_C(1) << mf->flow_be32ofs / 2) - 1)))
> +        + (mf->flow_be32ofs & 1);
>  }
>
>  /* Insert rule in to the prefix tree.
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c23decb..18864f9 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -93,7 +93,7 @@ struct netdev_flow_key {
>      uint32_t hash;       /* Hash function differs for different users. */
>      uint32_t len;        /* Length of the following miniflow (incl. map).
> */
>      struct miniflow mf;
> -    uint32_t buf[FLOW_MAX_PACKET_U32S - MINI_N_INLINE];
> +    uint64_t buf[FLOW_MAX_PACKET_U64S - MINI_N_INLINE];
>  };
>
>  /* Exact match cache for frequently used flows
> @@ -1365,8 +1365,8 @@ static inline void
>  netdev_flow_mask_init(struct netdev_flow_key *mask,
>                        const struct match *match)
>  {
> -    const uint32_t *mask_u32 = (const uint32_t *) &match->wc.masks;
> -    uint32_t *dst = mask->mf.inline_values;
> +    const uint64_t *mask_u64 = (const uint64_t *) &match->wc.masks;
> +    uint64_t *dst = mask->mf.inline_values;
>      uint64_t map, mask_map = 0;
>      uint32_t hash = 0;
>      int n;
> @@ -1378,10 +1378,10 @@ netdev_flow_mask_init(struct netdev_flow_key *mask,
>          uint64_t rm1bit = rightmost_1bit(map);
>          int i = raw_ctz(map);
>
> -        if (mask_u32[i]) {
> +        if (mask_u64[i]) {
>              mask_map |= rm1bit;
> -            *dst++ = mask_u32[i];
> -            hash = hash_add(hash, mask_u32[i]);
> +            *dst++ = mask_u64[i];
> +            hash = hash_add64(hash, mask_u64[i]);
>          }
>          map -= rm1bit;
>      }
> @@ -1393,7 +1393,7 @@ netdev_flow_mask_init(struct netdev_flow_key *mask,
>
>      n = dst - mask->mf.inline_values;
>
> -    mask->hash = hash_finish(hash, n * 4);
> +    mask->hash = hash_finish(hash, n * 8);
>      mask->len = netdev_flow_key_size(n);
>  }
>
> @@ -1403,23 +1403,23 @@ netdev_flow_key_init_masked(struct netdev_flow_key
> *dst,
>                              const struct flow *flow,
>                              const struct netdev_flow_key *mask)
>  {
> -    uint32_t *dst_u32 = dst->mf.inline_values;
> -    const uint32_t *mask_u32 = mask->mf.inline_values;
> +    uint64_t *dst_u64 = dst->mf.inline_values;
> +    const uint64_t *mask_u64 = mask->mf.inline_values;
>      uint32_t hash = 0;
> -    uint32_t value;
> +    uint64_t value;
>
>      dst->len = mask->len;
>      dst->mf.values_inline = true;
>      dst->mf.map = mask->mf.map;
>
>      FLOW_FOR_EACH_IN_MAP(value, flow, mask->mf.map) {
> -        *dst_u32 = value & *mask_u32++;
> -        hash = hash_add(hash, *dst_u32++);
> +        *dst_u64 = value & *mask_u64++;
> +        hash = hash_add64(hash, *dst_u64++);
>      }
> -    dst->hash = hash_finish(hash, (dst_u32 - dst->mf.inline_values) * 4);
> +    dst->hash = hash_finish(hash, (dst_u64 - dst->mf.inline_values) * 8);
>  }
>
> -/* Iterate through all netdev_flow_key u32 values specified by 'MAP' */
> +/* Iterate through all netdev_flow_key u64 values specified by 'MAP' */
>  #define NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(VALUE, KEY, MAP)           \
>      for (struct mf_for_each_in_map_aux aux__                       \
>               = { (KEY)->mf.inline_values, (KEY)->mf.map, MAP };    \
> @@ -1432,15 +1432,15 @@ static inline uint32_t
>  netdev_flow_key_hash_in_mask(const struct netdev_flow_key *key,
>                               const struct netdev_flow_key *mask)
>  {
> -    const uint32_t *p = mask->mf.inline_values;
> +    const uint64_t *p = mask->mf.inline_values;
>      uint32_t hash = 0;
> -    uint32_t key_u32;
> +    uint64_t key_u64;
>
> -    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(key_u32, key, mask->mf.map) {
> -        hash = hash_add(hash, key_u32 & *p++);
> +    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(key_u64, key, mask->mf.map) {
> +        hash = hash_add64(hash, key_u64 & *p++);
>      }
>
> -    return hash_finish(hash, (p - mask->mf.inline_values) * 4);
> +    return hash_finish(hash, (p - mask->mf.inline_values) * 8);
>  }
>
>  static inline bool
> @@ -3492,12 +3492,12 @@ static inline bool
>  dpcls_rule_matches_key(const struct dpcls_rule *rule,
>                         const struct netdev_flow_key *target)
>  {
> -    const uint32_t *keyp = rule->flow.mf.inline_values;
> -    const uint32_t *maskp = rule->mask->mf.inline_values;
> -    uint32_t target_u32;
> +    const uint64_t *keyp = rule->flow.mf.inline_values;
> +    const uint64_t *maskp = rule->mask->mf.inline_values;
> +    uint64_t target_u64;
>
> -    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(target_u32, target,
> rule->flow.mf.map) {
> -        if (OVS_UNLIKELY((target_u32 & *maskp++) != *keyp++)) {
> +    NETDEV_FLOW_KEY_FOR_EACH_IN_MAP(target_u64, target,
> rule->flow.mf.map) {
> +        if (OVS_UNLIKELY((target_u64 & *maskp++) != *keyp++)) {
>              return false;
>          }
>      }
> diff --git a/lib/flow.c b/lib/flow.c
> index 521ee82..3c42ec1 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -42,12 +42,12 @@
>  COVERAGE_DEFINE(flow_extract);
>  COVERAGE_DEFINE(miniflow_malloc);
>
> -/* U32 indices for segmented flow classification. */
> -const uint8_t flow_segment_u32s[4] = {
> -    FLOW_SEGMENT_1_ENDS_AT / 4,
> -    FLOW_SEGMENT_2_ENDS_AT / 4,
> -    FLOW_SEGMENT_3_ENDS_AT / 4,
> -    FLOW_U32S
> +/* U64 indices for segmented flow classification. */
> +const uint8_t flow_segment_u64s[4] = {
> +    FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),
> +    FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),
> +    FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),
> +    FLOW_U64S
>  };
>
>  /* miniflow_extract() assumes the following to be true to optimize the
> @@ -70,11 +70,9 @@ BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 3
>                    offsetof(struct flow, nw_proto) / 4
>                    == offsetof(struct flow, nw_tos) / 4);
>
> -/* TCP flags in the first half of a BE32, zeroes in the other half. */
> -BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) + 2
> -                  == offsetof(struct flow, pad2) &&
> -                  offsetof(struct flow, tcp_flags) / 4
> -                  == offsetof(struct flow, pad2) / 4);
> +/* TCP flags in the middle of a BE64, zeroes in the other half. */
> +BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) % 8 == 4);
> +
>  #if WORDS_BIGENDIAN
>  #define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE
> ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \
>                                   << 16)
> @@ -111,8 +109,8 @@ data_try_pull(void **datap, size_t *sizep, size_t size)
>  /* Context for pushing data to a miniflow. */
>  struct mf_ctx {
>      uint64_t map;
> -    uint32_t *data;
> -    uint32_t * const end;
> +    uint64_t *data;
> +    uint64_t * const end;
>  };
>
>  /* miniflow_push_* macros allow filling in a miniflow data values in
> order.
> @@ -121,7 +119,7 @@ struct mf_ctx {
>   * away.  Some GCC versions gave warnings on ALWAYS_INLINE, so these are
>   * defined as macros. */
>
> -#if (FLOW_WC_SEQ != 28)
> +#if (FLOW_WC_SEQ != 29)
>  #define MINIFLOW_ASSERT(X) ovs_assert(X)
>  BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
>                 "assertions enabled. Consider updating FLOW_WC_SEQ after "
> @@ -130,76 +128,137 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed:
> miniflow_extract() will have runtime "
>  #define MINIFLOW_ASSERT(X)
>  #endif
>
> -#define miniflow_push_uint32_(MF, OFS, VALUE)                   \
> +#define miniflow_push_uint64_(MF, OFS, VALUE)                   \
>  {                                                               \
> -    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 4 == 0          \
> -                    && !(MF.map & (UINT64_MAX << (OFS) / 4)));  \
> +    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0          \
> +                    && !(MF.map & (UINT64_MAX << (OFS) / 8)));  \
>      *MF.data++ = VALUE;                                         \
> -    MF.map |= UINT64_C(1) << (OFS) / 4;                         \
> +    MF.map |= UINT64_C(1) << (OFS) / 8;                         \
>  }
>
> -#define miniflow_push_be32_(MF, OFS, VALUE) \
> -    miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
> +#define miniflow_push_be64_(MF, OFS, VALUE) \
> +    miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
>
> -#define miniflow_push_uint16_(MF, OFS, VALUE)                   \
> +#define miniflow_push_uint32_(MF, OFS, VALUE)                   \
>  {                                                               \
>      MINIFLOW_ASSERT(MF.data < MF.end &&                                 \
> -                    (((OFS) % 4 == 0 && !(MF.map & (UINT64_MAX << (OFS) /
> 4))) \
> -                     || ((OFS) % 4 == 2 && MF.map & (UINT64_C(1) << (OFS)
> / 4) \
> -                         && !(MF.map & (UINT64_MAX << ((OFS) / 4 +
> 1)))))); \
> +                    (((OFS) % 8 == 0 && !(MF.map & (UINT64_MAX << (OFS) /
> 8))) \
> +                     || ((OFS) % 8 == 4 && MF.map & (UINT64_C(1) << (OFS)
> / 8) \
> +                         && !(MF.map & (UINT64_MAX << ((OFS) / 8 +
> 1)))))); \
> +                                                                        \
> +    if ((OFS) % 8 == 0) {                                               \
> +        *(uint32_t *)MF.data = VALUE;                                   \
> +        MF.map |= UINT64_C(1) << (OFS) / 8;                             \
> +    } else if ((OFS) % 8 == 4) {                                        \
> +        *((uint32_t *)MF.data + 1) = VALUE;                             \
> +        MF.data++;                                                      \
> +    }                                                                   \
> +}
> +
> +#define miniflow_push_be32_(MF, OFS, VALUE)                     \
> +    miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
> +
> +#define miniflow_push_uint16_(MF, OFS, VALUE)                           \
> +{                                                                       \
> +    MINIFLOW_ASSERT(MF.data < MF.end &&                                 \
> +                    (((OFS) % 8 == 0 && !(MF.map & (UINT64_MAX << (OFS) /
> 8))) \
> +                     || ((OFS) % 2 == 0 && MF.map & (UINT64_C(1) << (OFS)
> / 8) \
> +                         && !(MF.map & (UINT64_MAX << ((OFS) / 8 +
> 1)))))); \
>                                                                          \
> -    if ((OFS) % 4 == 0) {                                               \
> +    if ((OFS) % 8 == 0) {                                               \
>          *(uint16_t *)MF.data = VALUE;                                   \
> -        MF.map |= UINT64_C(1) << (OFS) / 4;                             \
> -    } else if ((OFS) % 4 == 2) {                                        \
> +        MF.map |= UINT64_C(1) << (OFS) / 8;                             \
> +    } else if ((OFS) % 8 == 2) {                                        \
>          *((uint16_t *)MF.data + 1) = VALUE;                             \
> +    } else if ((OFS) % 8 == 4) {                                        \
> +        *((uint16_t *)MF.data + 2) = VALUE;                             \
> +    } else if ((OFS) % 8 == 6) {                                        \
> +        *((uint16_t *)MF.data + 3) = VALUE;                             \
>          MF.data++;                                                      \
>      }                                                                   \
>  }
>

So I am assuming these calls should come in groups of 4 for the increment
to work ? This does not show up in the diff but was wondering how this
would get incremented and map set for say the ethernet header where we have

 468         vlan_tci = parse_vlan(&data, &size);
 469         dl_type = parse_ethertype(&data, &size);
 470         miniflow_push_be16(mf, dl_type, dl_type);
 471         miniflow_push_be16(mf, vlan_tci, vlan_tci);
 472     }


>
> -#define miniflow_push_be16_(MF, OFS, VALUE)             \
> +#define miniflow_pad_to_64_(MF, OFS)                                    \
> +{                                                                   \
> +    MINIFLOW_ASSERT((OFS) % 8 != 0);                                    \
> +    MINIFLOW_ASSERT(MF.map & (UINT64_C(1) << (OFS) / 8));               \
> +    MINIFLOW_ASSERT(!(MF.map & (UINT64_MAX << ((OFS) / 8 + 1))));       \
> +                                                                        \
> +    memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8);           \
> +    MF.data++;                                                          \
> +}
> +
> +#define miniflow_push_be16_(MF, OFS, VALUE)                     \
>      miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
>
>  /* Data at 'valuep' may be unaligned. */
>  #define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS)          \
>  {                                                               \
> -    int ofs32 = (OFS) / 4;                                      \
> +    int ofs64 = (OFS) / 8;                                      \
>                                                                          \
> -    MINIFLOW_ASSERT(MF.data + (N_WORDS) <= MF.end && (OFS) % 4 == 0     \
> -                    && !(MF.map & (UINT64_MAX << ofs32)));              \
> +    MINIFLOW_ASSERT(MF.data + (N_WORDS) <= MF.end && (OFS) % 8 == 0     \
> +                    && !(MF.map & (UINT64_MAX << ofs64)));              \
>                                                                          \
>      memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data);             \
>      MF.data += (N_WORDS);                                               \
> -    MF.map |= ((UINT64_MAX >> (64 - (N_WORDS))) << ofs32);              \
> +    MF.map |= ((UINT64_MAX >> (64 - (N_WORDS))) << ofs64);              \
>  }
>
> -#define miniflow_push_uint32(MF, FIELD, VALUE)                          \
> -    miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
> +/* Push 32-bit words padded to 64-bits. */
> +#define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS)               \
> +{                                                                       \
> +    int ofs64 = (OFS) / 8;                                              \
> +                                                                        \
> +    MINIFLOW_ASSERT(MF.data + DIV_ROUND_UP(N_WORDS, 2) <= MF.end        \
> +                    && (OFS) % 8 == 0                                   \
> +                    && !(MF.map & (UINT64_MAX << ofs64)));              \
> +                                                                        \
> +    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t));            \
> +    MF.data += DIV_ROUND_UP(N_WORDS, 2);                                \
> +    MF.map |= ((UINT64_MAX >> (64 - DIV_ROUND_UP(N_WORDS, 2))) << ofs64);
> \
> +    if ((N_WORDS) & 1) {                                                \
> +        *((uint32_t *)MF.data - 1) = 0;                                 \
> +    }                                                                   \
> +}
>
> -#define miniflow_push_be32(MF, FIELD, VALUE)                            \
> -    miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
> +/* Data at 'valuep' may be unaligned. */
> +/* MACs start 64-aligned, and must be followed by other data or padding.
> */
> +#define miniflow_push_macs_(MF, OFS, VALUEP)                    \
> +{                                                               \
> +    int ofs64 = (OFS) / 8;                                      \
> +                                                                \
> +    MINIFLOW_ASSERT(MF.data + 2 <= MF.end && (OFS) % 8 == 0     \
> +                    && !(MF.map & (UINT64_MAX << ofs64)));      \
> +                                                                \
> +    memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN);                \
> +    MF.data += 1;                   /* First word only. */      \
> +    MF.map |= UINT64_C(3) << ofs64; /* Both words. */           \
> +}
>
> -#define miniflow_push_uint32_check(MF, FIELD, VALUE)                    \
> -    { if (OVS_LIKELY(VALUE)) {                                          \
> -            miniflow_push_uint32_(MF, offsetof(struct flow, FIELD),
> VALUE); \
> -        }                                                               \
> -    }
> +#define miniflow_push_uint32(MF, FIELD, VALUE)                      \
> +    miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
>
> -#define miniflow_push_be32_check(MF, FIELD, VALUE)                      \
> -    { if (OVS_LIKELY(VALUE)) {                                          \
> -            miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE);
> \
> -        }                                                               \
> -    }
> +#define miniflow_push_be32(MF, FIELD, VALUE)                        \
> +    miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
>
> -#define miniflow_push_uint16(MF, FIELD, VALUE)                          \
> +#define miniflow_push_uint16(MF, FIELD, VALUE)                      \
>      miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE)
>
> -#define miniflow_push_be16(MF, FIELD, VALUE)                            \
> +#define miniflow_push_be16(MF, FIELD, VALUE)                        \
>      miniflow_push_be16_(MF, offsetof(struct flow, FIELD), VALUE)
>
> +#define miniflow_pad_to_64(MF, FIELD)                       \
> +    miniflow_pad_to_64_(MF, offsetof(struct flow, FIELD))
> +
>  #define miniflow_push_words(MF, FIELD, VALUEP, N_WORDS)                 \
>      miniflow_push_words_(MF, offsetof(struct flow, FIELD), VALUEP,
> N_WORDS)
>
> +#define miniflow_push_words_32(MF, FIELD, VALUEP, N_WORDS)              \
> +    miniflow_push_words_32_(MF, offsetof(struct flow, FIELD), VALUEP,
> N_WORDS)
> +
> +#define miniflow_push_macs(MF, FIELD, VALUEP)                       \
> +    miniflow_push_macs_(MF, offsetof(struct flow, FIELD), VALUEP)
> +
>  /* Pulls the MPLS headers at '*datap' and returns the count of them. */
>  static inline int
>  parse_mpls(void **datap, size_t *sizep)
> @@ -349,7 +408,7 @@ flow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>  {
>      struct {
>          struct miniflow mf;
> -        uint32_t buf[FLOW_U32S];
> +        uint64_t buf[FLOW_U64S];
>      } m;
>
>      COVERAGE_INC(flow_extract);
> @@ -360,15 +419,15 @@ flow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>  }
>
>  /* Caller is responsible for initializing 'dst' with enough storage for
> - * FLOW_U32S * 4 bytes. */
> + * FLOW_U64S * 8 bytes. */
>  void
>  miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
>                   struct miniflow *dst)
>  {
>      void *data = ofpbuf_data(packet);
>      size_t size = ofpbuf_size(packet);
> -    uint32_t *values = miniflow_values(dst);
> -    struct mf_ctx mf = { 0, values, values + FLOW_U32S };
> +    uint64_t *values = miniflow_values(dst);
> +    struct mf_ctx mf = { 0, values, values + FLOW_U64S };
>      char *l2;
>      ovs_be16 dl_type;
>      uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
> @@ -377,12 +436,18 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>      if (md) {
>          if (md->tunnel.ip_dst) {
>              miniflow_push_words(mf, tunnel, &md->tunnel,
> -                                sizeof md->tunnel / 4);
> +                                sizeof md->tunnel / sizeof(uint64_t));
> +        }
> +        if (md->skb_priority || md->pkt_mark) {
> +            miniflow_push_uint32(mf, skb_priority, md->skb_priority);
> +            miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
>          }
> -        miniflow_push_uint32_check(mf, skb_priority, md->skb_priority);
> -        miniflow_push_uint32_check(mf, pkt_mark, md->pkt_mark);
> -        miniflow_push_uint32_check(mf, recirc_id, md->recirc_id);
> +        miniflow_push_uint32(mf, dp_hash, md->dp_hash);
>          miniflow_push_uint32(mf, in_port,
> odp_to_u32(md->in_port.odp_port));
> +        if (md->recirc_id) {
> +            miniflow_push_uint32(mf, recirc_id, md->recirc_id);
> +            miniflow_pad_to_64(mf, actset_output);
> +        }
>      }
>
>      /* Initialize packet's layer pointer and offsets. */
> @@ -398,7 +463,7 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>          /* Link layer. */
>          BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6
>                       == offsetof(struct flow, dl_src));
> -        miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4);
> +        miniflow_push_macs(mf, dl_dst, data);
>          /* dl_type, vlan_tci. */
>          vlan_tci = parse_vlan(&data, &size);
>          dl_type = parse_ethertype(&data, &size);
> @@ -413,7 +478,7 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>
>          packet->l2_5_ofs = (char *)data - l2;
>          count = parse_mpls(&data, &size);
> -        miniflow_push_words(mf, mpls_lse, mpls, count);
> +        miniflow_push_words_32(mf, mpls_lse, mpls, count);
>      }
>
>      /* Network layer. */
> @@ -447,7 +512,9 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>          size = tot_len;   /* Never pull padding. */
>
>          /* Push both source and destination address at once. */
> -        miniflow_push_words(mf, nw_src, &nh->ip_src, 2);
> +        miniflow_push_words(mf, nw_src, &nh->ip_src, 1);
> +
> +        miniflow_push_be32(mf, ipv6_label, 0); /* Padding for IPv4. */
>
>          nw_tos = nh->ip_tos;
>          nw_ttl = nh->ip_ttl;
> @@ -481,14 +548,14 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>          size = plen;   /* Never pull padding. */
>
>          miniflow_push_words(mf, ipv6_src, &nh->ip6_src,
> -                            sizeof nh->ip6_src / 4);
> +                            sizeof nh->ip6_src / 8);
>          miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
> -                            sizeof nh->ip6_dst / 4);
> +                            sizeof nh->ip6_dst / 8);
>
>          tc_flow = get_16aligned_be32(&nh->ip6_flow);
>          {
>              ovs_be32 label = tc_flow & htonl(IPV6_LABEL_MASK);
> -            miniflow_push_be32_check(mf, ipv6_label, label);
> +            miniflow_push_be32(mf, ipv6_label, label);
>          }
>
>          nw_tos = ntohl(tc_flow) >> 20;
> @@ -569,11 +636,14 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>                  && OVS_LIKELY(arp->ar_pro == htons(ETH_TYPE_IP))
>                  && OVS_LIKELY(arp->ar_hln == ETH_ADDR_LEN)
>                  && OVS_LIKELY(arp->ar_pln == 4)) {
> -                miniflow_push_words(mf, nw_src, &arp->ar_spa, 1);
> -                miniflow_push_words(mf, nw_dst, &arp->ar_tpa, 1);
> +                miniflow_push_be32(mf, nw_src,
> +                                   get_16aligned_be32(&arp->ar_spa));
> +                miniflow_push_be32(mf, nw_dst,
> +                                   get_16aligned_be32(&arp->ar_tpa));
>
>                  /* We only match on the lower 8 bits of the opcode. */
>                  if (OVS_LIKELY(ntohs(arp->ar_op) <= 0xff)) {
> +                    miniflow_push_be32(mf, ipv6_label, 0); /* Pad with
> ARP. */
>                      miniflow_push_be32(mf, nw_frag,
> htonl(ntohs(arp->ar_op)));
>                  }
>
> @@ -583,8 +653,8 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>
>                  memcpy(arp_buf[0], arp->ar_sha, ETH_ADDR_LEN);
>                  memcpy(arp_buf[1], arp->ar_tha, ETH_ADDR_LEN);
> -                miniflow_push_words(mf, arp_sha, arp_buf,
> -                                    ETH_ADDR_LEN * 2 / 4);
> +                miniflow_push_macs(mf, arp_sha, arp_buf);
> +                miniflow_pad_to_64(mf, tcp_flags);
>              }
>          }
>          goto out;
> @@ -599,21 +669,25 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>              if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
>                  const struct tcp_header *tcp = data;
>
> +                miniflow_push_be32(mf, arp_tha[2], 0);
>                  miniflow_push_be32(mf, tcp_flags,
>                                     TCP_FLAGS_BE32(tcp->tcp_ctl));
>                  miniflow_push_words(mf, tp_src, &tcp->tcp_src, 1);
> +                miniflow_pad_to_64(mf, igmp_group_ip4);
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
>              if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
>                  const struct udp_header *udp = data;
>
>                  miniflow_push_words(mf, tp_src, &udp->udp_src, 1);
> +                miniflow_pad_to_64(mf, igmp_group_ip4);
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
>              if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
>                  const struct sctp_header *sctp = data;
>
>                  miniflow_push_words(mf, tp_src, &sctp->sctp_src, 1);
> +                miniflow_pad_to_64(mf, igmp_group_ip4);
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
>              if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
> @@ -621,6 +695,7 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>
>                  miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
>                  miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
> +                miniflow_pad_to_64(mf, igmp_group_ip4);
>              }
>          } else if (OVS_LIKELY(nw_proto == IPPROTO_IGMP)) {
>              if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
> @@ -640,21 +715,19 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>                  memset(arp_buf, 0, sizeof arp_buf);
>                  if (OVS_LIKELY(parse_icmpv6(&data, &size, icmp,
> &nd_target,
>                                              arp_buf))) {
> -                    miniflow_push_words(mf, arp_sha, arp_buf,
> -                                             ETH_ADDR_LEN * 2 / 4);
>                      if (nd_target) {
>                          miniflow_push_words(mf, nd_target, nd_target,
> -                                            sizeof *nd_target / 4);
> +                                            sizeof *nd_target / 8);
>                      }
> +                    miniflow_push_macs(mf, arp_sha, arp_buf);
> +                    miniflow_pad_to_64(mf, tcp_flags);
>                      miniflow_push_be16(mf, tp_src,
> htons(icmp->icmp6_type));
>                      miniflow_push_be16(mf, tp_dst,
> htons(icmp->icmp6_code));
> +                    miniflow_pad_to_64(mf, igmp_group_ip4);
>                  }
>              }
>          }
>      }
> -    if (md) {
> -        miniflow_push_uint32_check(mf, dp_hash, md->dp_hash);
> -    }
>   out:
>      dst->map = mf.map;
>  }
> @@ -664,12 +737,12 @@ miniflow_extract(struct ofpbuf *packet, const struct
> pkt_metadata *md,
>  void
>  flow_zero_wildcards(struct flow *flow, const struct flow_wildcards
> *wildcards)
>  {
> -    uint32_t *flow_u32 = (uint32_t *) flow;
> -    const uint32_t *wc_u32 = (const uint32_t *) &wildcards->masks;
> +    uint64_t *flow_u64 = (uint64_t *) flow;
> +    const uint64_t *wc_u64 = (const uint64_t *) &wildcards->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        flow_u32[i] &= wc_u32[i];
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        flow_u64[i] &= wc_u64[i];
>      }
>  }
>
> @@ -689,7 +762,7 @@ flow_unwildcard_tp_ports(const struct flow *flow,
> struct flow_wildcards *wc)
>  void
>  flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd)
>  {
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      fmd->dp_hash = flow->dp_hash;
>      fmd->recirc_id = flow->recirc_id;
> @@ -836,7 +909,7 @@ void flow_wildcards_init_for_packet(struct
> flow_wildcards *wc,
>      memset(&wc->masks, 0x0, sizeof wc->masks);
>
>      /* Update this function whenever struct flow changes. */
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      if (flow->tunnel.ip_dst) {
>          if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
> @@ -933,7 +1006,7 @@ uint64_t
>  flow_wc_map(const struct flow *flow)
>  {
>      /* Update this function whenever struct flow changes. */
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      uint64_t map = (flow->tunnel.ip_dst) ? MINIFLOW_MAP(tunnel) : 0;
>
> @@ -985,7 +1058,7 @@ void
>  flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
>  {
>      /* Update this function whenever struct flow changes. */
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
>      memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
> @@ -997,11 +1070,11 @@ flow_wildcards_clear_non_packet_fields(struct
> flow_wildcards *wc)
>  bool
>  flow_wildcards_is_catchall(const struct flow_wildcards *wc)
>  {
> -    const uint32_t *wc_u32 = (const uint32_t *) &wc->masks;
> +    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        if (wc_u32[i]) {
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        if (wc_u64[i]) {
>              return false;
>          }
>      }
> @@ -1016,13 +1089,13 @@ flow_wildcards_and(struct flow_wildcards *dst,
>                     const struct flow_wildcards *src1,
>                     const struct flow_wildcards *src2)
>  {
> -    uint32_t *dst_u32 = (uint32_t *) &dst->masks;
> -    const uint32_t *src1_u32 = (const uint32_t *) &src1->masks;
> -    const uint32_t *src2_u32 = (const uint32_t *) &src2->masks;
> +    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
> +    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
> +    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        dst_u32[i] = src1_u32[i] & src2_u32[i];
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        dst_u64[i] = src1_u64[i] & src2_u64[i];
>      }
>  }
>
> @@ -1034,13 +1107,13 @@ flow_wildcards_or(struct flow_wildcards *dst,
>                    const struct flow_wildcards *src1,
>                    const struct flow_wildcards *src2)
>  {
> -    uint32_t *dst_u32 = (uint32_t *) &dst->masks;
> -    const uint32_t *src1_u32 = (const uint32_t *) &src1->masks;
> -    const uint32_t *src2_u32 = (const uint32_t *) &src2->masks;
> +    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
> +    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
> +    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        dst_u32[i] = src1_u32[i] | src2_u32[i];
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        dst_u64[i] = src1_u64[i] | src2_u64[i];
>      }
>  }
>
> @@ -1066,12 +1139,12 @@ bool
>  flow_wildcards_has_extra(const struct flow_wildcards *a,
>                           const struct flow_wildcards *b)
>  {
> -    const uint32_t *a_u32 = (const uint32_t *) &a->masks;
> -    const uint32_t *b_u32 = (const uint32_t *) &b->masks;
> +    const uint64_t *a_u64 = (const uint64_t *) &a->masks;
> +    const uint64_t *b_u64 = (const uint64_t *) &b->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        if ((a_u32[i] & b_u32[i]) != b_u32[i]) {
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        if ((a_u64[i] & b_u64[i]) != b_u64[i]) {
>              return true;
>          }
>      }
> @@ -1084,13 +1157,13 @@ bool
>  flow_equal_except(const struct flow *a, const struct flow *b,
>                    const struct flow_wildcards *wc)
>  {
> -    const uint32_t *a_u32 = (const uint32_t *) a;
> -    const uint32_t *b_u32 = (const uint32_t *) b;
> -    const uint32_t *wc_u32 = (const uint32_t *) &wc->masks;
> +    const uint64_t *a_u64 = (const uint64_t *) a;
> +    const uint64_t *b_u64 = (const uint64_t *) b;
> +    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
>      size_t i;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        if ((a_u32[i] ^ b_u32[i]) & wc_u32[i]) {
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        if ((a_u64[i] ^ b_u64[i]) & wc_u64[i]) {
>              return false;
>          }
>      }
> @@ -1128,22 +1201,18 @@ miniflow_hash_5tuple(const struct miniflow *flow,
> uint32_t basis)
>
>          /* Separate loops for better optimization. */
>          if (dl_type == htons(ETH_TYPE_IPV6)) {
> -            uint64_t map = MINIFLOW_MAP(ipv6_src) | MINIFLOW_MAP(ipv6_dst)
> -                | MINIFLOW_MAP(tp_src); /* Covers both ports */
> -            uint32_t value;
> +            uint64_t map = MINIFLOW_MAP(ipv6_src) |
> MINIFLOW_MAP(ipv6_dst);
> +            uint64_t value;
>
>              MINIFLOW_FOR_EACH_IN_MAP(value, flow, map) {
> -                hash = hash_add(hash, value);
> +                hash = hash_add64(hash, value);
>              }
>          } else {
> -            uint64_t map = MINIFLOW_MAP(nw_src) | MINIFLOW_MAP(nw_dst)
> -                | MINIFLOW_MAP(tp_src); /* Covers both ports */
> -            uint32_t value;
> -
> -            MINIFLOW_FOR_EACH_IN_MAP(value, flow, map) {
> -                hash = hash_add(hash, value);
> -            }
> +            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_src));
> +            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_dst));
>          }
> +        /* Add both ports at once. */
> +        hash = hash_add(hash, MINIFLOW_GET_U32(flow, tp_src));
>          hash = hash_finish(hash, 42); /* Arbitrary number. */
>      }
>      return hash;
> @@ -1163,23 +1232,24 @@ flow_hash_5tuple(const struct flow *flow, uint32_t
> basis)
>      uint32_t hash = basis;
>
>      if (flow) {
> -        const uint32_t *flow_u32 = (const uint32_t *)flow;
> -
>          hash = hash_add(hash, flow->nw_proto);
>
>          if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
> -            int ofs = offsetof(struct flow, ipv6_src) / 4;
> -            int end = ofs + 2 * sizeof flow->ipv6_src / 4;
> +            const uint64_t *flow_u64 = (const uint64_t *)flow;
> +            int ofs = offsetof(struct flow, ipv6_src) / 8;
> +            int end = ofs + 2 * sizeof flow->ipv6_src / 8;
>
> -            while (ofs < end) {
> -                hash = hash_add(hash, flow_u32[ofs++]);
> +            for (;ofs < end; ofs++) {
> +                hash = hash_add64(hash, flow_u64[ofs]);
>              }
>          } else {
>              hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_src);
>              hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst);
>          }
> -        hash = hash_add(hash, flow_u32[offsetof(struct flow, tp_src) /
> 4]);
> -
> +        /* Add both ports at once. */
> +        hash = hash_add(hash,
> +                        ((const uint32_t *)flow)[offsetof(struct flow,
> tp_src)
> +                                                 / sizeof(uint32_t)]);
>          hash = hash_finish(hash, 42); /* Arbitrary number. */
>      }
>      return hash;
> @@ -1348,16 +1418,16 @@ uint32_t
>  flow_hash_in_wildcards(const struct flow *flow,
>                         const struct flow_wildcards *wc, uint32_t basis)
>  {
> -    const uint32_t *wc_u32 = (const uint32_t *) &wc->masks;
> -    const uint32_t *flow_u32 = (const uint32_t *) flow;
> +    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
> +    const uint64_t *flow_u64 = (const uint64_t *) flow;
>      uint32_t hash;
>      size_t i;
>
>      hash = basis;
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        hash = hash_add(hash, flow_u32[i] & wc_u32[i]);
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        hash = hash_add64(hash, flow_u64[i] & wc_u64[i]);
>      }
> -    return hash_finish(hash, 4 * FLOW_U32S);
> +    return hash_finish(hash, 8 * FLOW_U64S);
>  }
>
>  /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted
> as an
> @@ -1542,10 +1612,11 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16
> mpls_eth_type,
>
>          flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
>
> -        /* Clear all L3 and L4 fields. */
> -        BUILD_ASSERT(FLOW_WC_SEQ == 28);
> +        /* Clear all L3 and L4 fields and dp_hash. */
> +        BUILD_ASSERT(FLOW_WC_SEQ == 29);
>          memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
>                 sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
> +        flow->dp_hash = 0;
>      }
>      flow->dl_type = mpls_eth_type;
>  }
> @@ -1820,7 +1891,7 @@ miniflow_n_values(const struct miniflow *flow)
>      return count_1bits(flow->map);
>  }
>
> -static uint32_t *
> +static uint64_t *
>  miniflow_alloc_values(struct miniflow *flow, int n)
>  {
>      int size = MINIFLOW_VALUES_SIZE(n);
> @@ -1838,7 +1909,7 @@ miniflow_alloc_values(struct miniflow *flow, int n)
>
>  /* Completes an initialization of 'dst' as a miniflow copy of 'src' begun
> by
>   * the caller.  The caller must have already initialized 'dst->map'
> properly
> - * to indicate the significant uint32_t elements of 'src'.  'n' must be
> the
> + * to indicate the significant uint64_t elements of 'src'.  'n' must be
> the
>   * number of 1-bits in 'dst->map'.
>   *
>   * Normally the significant elements are the ones that are non-zero.
> However,
> @@ -1846,17 +1917,17 @@ miniflow_alloc_values(struct miniflow *flow, int n)
>   * so that the flow and mask always have the same maps.
>   *
>   * This function initializes values (either inline if possible or with
> - * malloc() otherwise) and copies the uint32_t elements of 'src'
> indicated by
> + * malloc() otherwise) and copies the uint64_t elements of 'src'
> indicated by
>   * 'dst->map' into it. */
>  static void
>  miniflow_init__(struct miniflow *dst, const struct flow *src, int n)
>  {
> -    const uint32_t *src_u32 = (const uint32_t *) src;
> -    uint32_t *dst_u32 = miniflow_alloc_values(dst, n);
> +    const uint64_t *src_u64 = (const uint64_t *) src;
> +    uint64_t *dst_u64 = miniflow_alloc_values(dst, n);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, dst->map) {
> -        *dst_u32++ = src_u32[idx];
> +        *dst_u64++ = src_u64[idx];
>      }
>  }
>
> @@ -1866,7 +1937,7 @@ miniflow_init__(struct miniflow *dst, const struct
> flow *src, int n)
>  void
>  miniflow_init(struct miniflow *dst, const struct flow *src)
>  {
> -    const uint32_t *src_u32 = (const uint32_t *) src;
> +    const uint64_t *src_u64 = (const uint64_t *) src;
>      unsigned int i;
>      int n;
>
> @@ -1874,8 +1945,8 @@ miniflow_init(struct miniflow *dst, const struct
> flow *src)
>      n = 0;
>      dst->map = 0;
>
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        if (src_u32[i]) {
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        if (src_u64[i]) {
>              dst->map |= UINT64_C(1) << i;
>              n++;
>          }
> @@ -1900,7 +1971,7 @@ void
>  miniflow_clone(struct miniflow *dst, const struct miniflow *src)
>  {
>      int size = MINIFLOW_VALUES_SIZE(miniflow_n_values(src));
> -    uint32_t *values;
> +    uint64_t *values;
>
>      dst->map = src->map;
>      if (size <= sizeof dst->inline_values) {
> @@ -1971,21 +2042,12 @@ miniflow_expand(const struct miniflow *src, struct
> flow *dst)
>      flow_union_with_miniflow(dst, src);
>  }
>
> -/* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if
> 'flow'
> - * were expanded into a "struct flow". */
> -static uint32_t
> -miniflow_get(const struct miniflow *flow, unsigned int u32_ofs)
> -{
> -    return flow->map & (UINT64_C(1) << u32_ofs)
> -        ? miniflow_get__(flow, u32_ofs) : 0;
> -}
> -
>  /* Returns true if 'a' and 'b' are the equal miniflow, false otherwise. */
>  bool
>  miniflow_equal(const struct miniflow *a, const struct miniflow *b)
>  {
> -    const uint32_t *ap = miniflow_get_u32_values(a);
> -    const uint32_t *bp = miniflow_get_u32_values(b);
> +    const uint64_t *ap = miniflow_get_values(a);
> +    const uint64_t *bp = miniflow_get_values(b);
>
>      if (OVS_LIKELY(a->map == b->map)) {
>          int count = miniflow_n_values(a);
> @@ -2012,7 +2074,7 @@ bool
>  miniflow_equal_in_minimask(const struct miniflow *a, const struct
> miniflow *b,
>                             const struct minimask *mask)
>  {
> -    const uint32_t *p = miniflow_get_u32_values(&mask->masks);
> +    const uint64_t *p = miniflow_get_values(&mask->masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
> @@ -2030,12 +2092,12 @@ bool
>  miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct
> flow *b,
>                                  const struct minimask *mask)
>  {
> -    const uint32_t *b_u32 = (const uint32_t *) b;
> -    const uint32_t *p = miniflow_get_u32_values(&mask->masks);
> +    const uint64_t *b_u64 = (const uint64_t *) b;
> +    const uint64_t *p = miniflow_get_values(&mask->masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
> -        if ((miniflow_get(a, idx) ^ b_u32[idx]) & *p++) {
> +        if ((miniflow_get(a, idx) ^ b_u64[idx]) & *p++) {
>              return false;
>          }
>      }
> @@ -2070,15 +2132,15 @@ minimask_move(struct minimask *dst, struct
> minimask *src)
>
>  /* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'.
>   *
> - * The caller must provide room for FLOW_U32S "uint32_t"s in 'storage',
> for use
> + * The caller must provide room for FLOW_U64S "uint64_t"s in 'storage',
> for use
>   * by 'dst_'.  The caller must *not* free 'dst_' with minimask_destroy().
> */
>  void
>  minimask_combine(struct minimask *dst_,
>                   const struct minimask *a_, const struct minimask *b_,
> -                 uint32_t storage[FLOW_U32S])
> +                 uint64_t storage[FLOW_U64S])
>  {
>      struct miniflow *dst = &dst_->masks;
> -    uint32_t *dst_values = storage;
> +    uint64_t *dst_values = storage;
>      const struct miniflow *a = &a_->masks;
>      const struct miniflow *b = &b_->masks;
>      int idx;
> @@ -2089,7 +2151,7 @@ minimask_combine(struct minimask *dst_,
>      dst->map = 0;
>      MAP_FOR_EACH_INDEX(idx, a->map & b->map) {
>          /* Both 'a' and 'b' have non-zero data at 'idx'. */
> -        uint32_t mask = miniflow_get__(a, idx) & miniflow_get__(b, idx);
> +        uint64_t mask = miniflow_get__(a, idx) & miniflow_get__(b, idx);
>
>          if (mask) {
>              dst->map |= UINT64_C(1) << idx;
> @@ -2113,14 +2175,6 @@ minimask_expand(const struct minimask *mask, struct
> flow_wildcards *wc)
>      miniflow_expand(&mask->masks, &wc->masks);
>  }
>
> -/* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if
> 'mask'
> - * were expanded into a "struct flow_wildcards". */
> -uint32_t
> -minimask_get(const struct minimask *mask, unsigned int u32_ofs)
> -{
> -    return miniflow_get(&mask->masks, u32_ofs);
> -}
> -
>  /* Returns true if 'a' and 'b' are the same flow mask, false otherwise.
>   * Minimasks may not have zero data values, so for the minimasks to be the
>   * same, they need to have the same map and the same data values. */
> @@ -2128,8 +2182,8 @@ bool
>  minimask_equal(const struct minimask *a, const struct minimask *b)
>  {
>      return a->masks.map == b->masks.map &&
> -        !memcmp(miniflow_get_u32_values(&a->masks),
> -                miniflow_get_u32_values(&b->masks),
> +        !memcmp(miniflow_get_values(&a->masks),
> +                miniflow_get_values(&b->masks),
>                  count_1bits(a->masks.map) * sizeof
> *a->masks.inline_values);
>  }
>
> @@ -2138,18 +2192,18 @@ minimask_equal(const struct minimask *a, const
> struct minimask *b)
>  bool
>  minimask_has_extra(const struct minimask *a, const struct minimask *b)
>  {
> -    const uint32_t *ap = miniflow_get_u32_values(&a->masks);
> -    const uint32_t *bp = miniflow_get_u32_values(&b->masks);
> +    const uint64_t *ap = miniflow_get_values(&a->masks);
> +    const uint64_t *bp = miniflow_get_values(&b->masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, b->masks.map) {
> -        uint32_t b_u32 = *bp++;
> +        uint64_t b_u64 = *bp++;
>
> -        /* 'b_u32' is non-zero, check if the data in 'a' is either zero
> -         * or misses some of the bits in 'b_u32'. */
> +        /* 'b_u64' is non-zero, check if the data in 'a' is either zero
> +         * or misses some of the bits in 'b_u64'. */
>          if (!(a->masks.map & (UINT64_C(1) << idx))
> -            || ((miniflow_values_get__(ap, a->masks.map, idx) & b_u32)
> -                != b_u32)) {
> +            || ((miniflow_values_get__(ap, a->masks.map, idx) & b_u64)
> +                != b_u64)) {
>              return true; /* 'a' wildcards some bits 'b' doesn't. */
>          }
>      }
> diff --git a/lib/flow.h b/lib/flow.h
> index 8e56d05..17b9b86 100644
> --- a/lib/flow.h
> +++ b/lib/flow.h
> @@ -38,11 +38,12 @@ struct pkt_metadata;
>  /* This sequence number should be incremented whenever anything involving
> flows
>   * or the wildcarding of flows changes.  This will cause build assertion
>   * failures in places which likely need to be updated. */
> -#define FLOW_WC_SEQ 28
> +#define FLOW_WC_SEQ 29
>
>  /* Number of Open vSwitch extension 32-bit registers. */
>  #define FLOW_N_REGS 8
>  BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
> +BUILD_ASSERT_DECL(FLOW_N_REGS % 2 == 0); /* Even. */
>
>  /* Number of OpenFlow 1.5+ 64-bit registers.
>   *
> @@ -100,85 +101,82 @@ struct flow {
>      uint32_t regs[FLOW_N_REGS]; /* Registers. */
>      uint32_t skb_priority;      /* Packet priority for QoS. */
>      uint32_t pkt_mark;          /* Packet mark. */
> -    uint32_t recirc_id;         /* Must be exact match. */
> +    uint32_t dp_hash;           /* Datapath computed hash value. The exact
> +                                 * computation is opaque to the user
> space. */
>      union flow_in_port in_port; /* Input port.*/
> +    uint32_t recirc_id;         /* Must be exact match. */
>      ofp_port_t actset_output;   /* Output port in action set. */
> -    ovs_be16 pad1;              /* Pad to 32 bits. */
> +    ovs_be16 pad1;              /* Pad to 64 bits. */
>
> -    /* L2, Order the same as in the Ethernet header! */
> +    /* L2, Order the same as in the Ethernet header! (64-bit aligned) */
>      uint8_t dl_dst[ETH_ADDR_LEN]; /* Ethernet destination address. */
>      uint8_t dl_src[ETH_ADDR_LEN]; /* Ethernet source address. */
>      ovs_be16 dl_type;           /* Ethernet frame type. */
>      ovs_be16 vlan_tci;          /* If 802.1Q, TCI | VLAN_CFI; otherwise
> 0. */
> -    ovs_be32 mpls_lse[FLOW_MAX_MPLS_LABELS]; /* MPLS label stack entry. */
> -
> -    /* L3 */
> +    ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label
> stack
> +                                                             (with
> padding). */
> +    /* L3 (64-bit aligned) */
> +    ovs_be32 nw_src;            /* IPv4 source address. */
> +    ovs_be32 nw_dst;            /* IPv4 destination address. */
>      struct in6_addr ipv6_src;   /* IPv6 source address. */
>      struct in6_addr ipv6_dst;   /* IPv6 destination address. */
>      ovs_be32 ipv6_label;        /* IPv6 flow label. */
> -    ovs_be32 nw_src;            /* IPv4 source address. */
> -    ovs_be32 nw_dst;            /* IPv4 destination address. */
>      uint8_t nw_frag;            /* FLOW_FRAG_* flags. */
>      uint8_t nw_tos;             /* IP ToS (including DSCP and ECN). */
>      uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
>      uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP
> opcode. */
> +    struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
>      uint8_t arp_sha[ETH_ADDR_LEN]; /* ARP/ND source hardware address. */
>      uint8_t arp_tha[ETH_ADDR_LEN]; /* ARP/ND target hardware address. */
> -    struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
>      ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching
> L4. */
> -    ovs_be16 pad2;              /* Pad to 32 bits. */
> +    ovs_be16 pad2;              /* Pad to 64 bits. */
>
> -    /* L4 */
> +    /* L4 (64-bit aligned) */
>      ovs_be16 tp_src;            /* TCP/UDP/SCTP source port. */
>      ovs_be16 tp_dst;            /* TCP/UDP/SCTP destination port. */
> -    ovs_be32 igmp_group_ip4;    /* IGMP group IPv4 address */
> -    uint32_t dp_hash;           /* Datapath computed hash value. The exact
> -                                 * computation is opaque to the user
> space.
> +    ovs_be32 igmp_group_ip4;    /* IGMP group IPv4 address.
>                                   * Keep last for BUILD_ASSERT_DECL below.
> */
>  };
> -BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
> +BUILD_ASSERT_DECL(sizeof(struct flow) % sizeof(uint64_t) == 0);
>
> -#define FLOW_U32S (sizeof(struct flow) / 4)
> +#define FLOW_U64S (sizeof(struct flow) / sizeof(uint64_t))
>
>  /* Some flow fields are mutually exclusive or only appear within the flow
>   * pipeline.  IPv6 headers are bigger than IPv4 and MPLS, and IPv6 ND
> packets
>   * are bigger than TCP,UDP and IGMP packets. */
> -#define FLOW_MAX_PACKET_U32S (FLOW_U32S
>  \
> -    /* Unused in datapath */  - FLOW_U32_SIZE(regs)
>  \
> -                              - FLOW_U32_SIZE(metadata)
>  \
> -                              - FLOW_U32_SIZE(actset_output)
> \
> -    /* L2.5/3 */              - FLOW_U32_SIZE(nw_src)
>  \
> -                              - FLOW_U32_SIZE(nw_dst)
>  \
> -                              - FLOW_U32_SIZE(mpls_lse)
>  \
> -    /* L4 */                  - FLOW_U32_SIZE(tcp_flags) /* incl. pad. */
> \
> -                              - FLOW_U32_SIZE(igmp_group_ip4)
>  \
> +#define FLOW_MAX_PACKET_U64S (FLOW_U64S
>  \
> +    /* Unused in datapath */  - FLOW_U64_SIZE(regs)
>  \
> +                              - FLOW_U64_SIZE(metadata)
>  \
> +    /* L2.5/3 */              - FLOW_U64_SIZE(nw_src)  /* incl. nw_dst */
> \
> +                              - FLOW_U64_SIZE(mpls_lse)
>  \
> +    /* L4 */                  - FLOW_U64_SIZE(tp_src)
>  \
>                               )
>
>  /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
> -BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t)
> -                  == sizeof(struct flow_tnl) + 180
> -                  && FLOW_WC_SEQ == 28);
> +BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
> +                  == sizeof(struct flow_tnl) + 184
> +                  && FLOW_WC_SEQ == 29);
>
>  /* Incremental points at which flow classification may be performed in
>   * segments.
>   * This is located here since this is dependent on the structure of the
>   * struct flow defined above:
> - * Each offset must be on a distinct, successive U32 boundary strictly
> + * Each offset must be on a distinct, successive U64 boundary strictly
>   * within the struct flow. */
>  enum {
>      FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst),
> -    FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, ipv6_src),
> +    FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src),
>      FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
>  };
> -BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % 4 == 0);
> -BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % 4 == 0);
> -BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT % 4 == 0);
> +BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0);
> +BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0);
> +BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT % sizeof(uint64_t) == 0);
>  BUILD_ASSERT_DECL(                     0 < FLOW_SEGMENT_1_ENDS_AT);
>  BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT < FLOW_SEGMENT_2_ENDS_AT);
>  BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT < FLOW_SEGMENT_3_ENDS_AT);
>  BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT < sizeof(struct flow));
>
> -extern const uint8_t flow_segment_u32s[];
> +extern const uint8_t flow_segment_u64s[];
>
>  /* Represents the metadata fields of struct flow. */
>  struct flow_metadata {
> @@ -261,7 +259,8 @@ flow_equal(const struct flow *a, const struct flow *b)
>  static inline size_t
>  flow_hash(const struct flow *flow, uint32_t basis)
>  {
> -    return hash_words((const uint32_t *) flow, sizeof *flow / 4, basis);
> +    return hash_words64((const uint64_t *)flow,
> +                        sizeof *flow / sizeof(uint64_t), basis);
>  }
>
>  static inline uint16_t
> @@ -373,11 +372,11 @@ bool flow_equal_except(const struct flow *a, const
> struct flow *b,
>
>  /* Compressed flow. */
>
> -/* Number of 32-bit words present in struct miniflow. */
> -#define MINI_N_INLINE 8
> +/* Number of 64-bit words present in struct miniflow. */
> +#define MINI_N_INLINE 4
>
> -/* Maximum number of 32-bit words supported. */
> -BUILD_ASSERT_DECL(FLOW_U32S <= 63);
> +/* Maximum number of 64-bit words supported. */
> +BUILD_ASSERT_DECL(FLOW_U64S <= 63);
>
>  /* A sparse representation of a "struct flow".
>   *
> @@ -386,8 +385,8 @@ BUILD_ASSERT_DECL(FLOW_U32S <= 63);
>   * saves time when the goal is to iterate over only the nonzero parts of
> the
>   * struct.
>   *
> - * The 'map' member holds one bit for each uint32_t in a "struct flow".
> Each
> - * 0-bit indicates that the corresponding uint32_t is zero, each 1-bit
> that it
> + * The 'map' member holds one bit for each uint64_t in a "struct flow".
> Each
> + * 0-bit indicates that the corresponding uint64_t is zero, each 1-bit
> that it
>   * *may* be nonzero (see below how this applies to minimasks).
>   *
>   * The 'values_inline' boolean member indicates that the values are at
> @@ -400,7 +399,7 @@ BUILD_ASSERT_DECL(FLOW_U32S <= 63);
>   * MINI_N_INLINE is the default number of inline words.  When a miniflow
> is
>   * dynamically allocated the actual amount of inline storage may be
> different.
>   * In that case 'inline_values' contains storage at least for the number
> - * of words indicated by 'map' (one uint32_t for each 1-bit in the map).
> + * of words indicated by 'map' (one uint64_t for each 1-bit in the map).
>   *
>   * Elements in values array are allowed to be zero.  This is useful for
> "struct
>   * minimatch", for which ensuring that the miniflow and minimask members
> have
> @@ -412,43 +411,33 @@ struct miniflow {
>      uint64_t map:63;
>      uint64_t values_inline:1;
>      union {
> -        uint32_t *offline_values;
> -        uint32_t inline_values[MINI_N_INLINE]; /* Minimum inline size. */
> +        uint64_t *offline_values;
> +        uint64_t inline_values[MINI_N_INLINE]; /* Minimum inline size. */
>      };
>  };
>  BUILD_ASSERT_DECL(sizeof(struct miniflow)
> -                  == sizeof(uint64_t) + MINI_N_INLINE * sizeof(uint32_t));
> +                  == sizeof(uint64_t) + MINI_N_INLINE * sizeof(uint64_t));
>
> -#define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint32_t))
> +#define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint64_t))
>
> -static inline uint32_t *miniflow_values(struct miniflow *mf)
> +static inline uint64_t *miniflow_values(struct miniflow *mf)
>  {
>      return OVS_LIKELY(mf->values_inline)
>          ? mf->inline_values : mf->offline_values;
>  }
>
> -static inline const uint32_t *miniflow_get_values(const struct miniflow
> *mf)
> +static inline const uint64_t *miniflow_get_values(const struct miniflow
> *mf)
>  {
>      return OVS_LIKELY(mf->values_inline)
>          ? mf->inline_values : mf->offline_values;
>  }
>
> -static inline const uint32_t *miniflow_get_u32_values(const struct
> miniflow *mf)
> -{
> -    return miniflow_get_values(mf);
> -}
> -
> -static inline const ovs_be32 *miniflow_get_be32_values(const struct
> miniflow *mf)
> -{
> -    return (OVS_FORCE const ovs_be32 *)miniflow_get_values(mf);
> -}
> -
>  /* This is useful for initializing a miniflow for a miniflow_extract()
> call. */
>  static inline void miniflow_initialize(struct miniflow *mf,
> -                                       uint32_t buf[FLOW_U32S])
> +                                       uint64_t buf[FLOW_U64S])
>  {
>      mf->map = 0;
> -    mf->values_inline = (buf == (uint32_t *)(mf + 1));
> +    mf->values_inline = (buf == (uint64_t *)(mf + 1));
>      if (!mf->values_inline) {
>          mf->offline_values = buf;
>      }
> @@ -457,7 +446,7 @@ static inline void miniflow_initialize(struct miniflow
> *mf,
>  struct pkt_metadata;
>
>  /* The 'dst->values' must be initialized with a buffer with space for
> - * FLOW_U32S.  'dst->map' is ignored on input and set on output to
> + * FLOW_U64S.  'dst->map' is ignored on input and set on output to
>   * indicate which fields were extracted. */
>  void miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *,
>                        struct miniflow *dst);
> @@ -472,53 +461,53 @@ void miniflow_destroy(struct miniflow *);
>
>  void miniflow_expand(const struct miniflow *, struct flow *);
>
> -static inline uint32_t flow_u32_value(const struct flow *flow, size_t
> index)
> +static inline uint64_t flow_u64_value(const struct flow *flow, size_t
> index)
>  {
> -    return ((uint32_t *)(flow))[index];
> +    return ((uint64_t *)(flow))[index];
>  }
>
> -static inline uint32_t *flow_u32_lvalue(struct flow *flow, size_t index)
> +static inline uint64_t *flow_u64_lvalue(struct flow *flow, size_t index)
>  {
> -    return &((uint32_t *)(flow))[index];
> +    return &((uint64_t *)(flow))[index];
>  }
>
>  static inline bool
> -flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t
> *value)
> +flow_get_next_in_map(const struct flow *flow, uint64_t map, uint64_t
> *value)
>  {
>      if (map) {
> -        *value = flow_u32_value(flow, raw_ctz(map));
> +        *value = flow_u64_value(flow, raw_ctz(map));
>          return true;
>      }
>      return false;
>  }
>
> -/* Iterate through all flow u32 values specified by 'MAP'. */
> +/* Iterate through all flow u64 values specified by 'MAP'. */
>  #define FLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)         \
>      for (uint64_t map__ = (MAP);                       \
>           flow_get_next_in_map(FLOW, map__, &(VALUE));  \
>           map__ = zero_rightmost_1bit(map__))
>
> -/* Iterate through all struct flow u32 indices specified by 'MAP'. */
> -#define MAP_FOR_EACH_INDEX(U32IDX, MAP)                 \
> +/* Iterate through all struct flow u64 indices specified by 'MAP'. */
> +#define MAP_FOR_EACH_INDEX(U64IDX, MAP)                 \
>      for (uint64_t map__ = (MAP);                        \
> -         map__ && ((U32IDX) = raw_ctz(map__), true);    \
> +         map__ && ((U64IDX) = raw_ctz(map__), true);    \
>           map__ = zero_rightmost_1bit(map__))
>
> -#define FLOW_U32_SIZE(FIELD)                                            \
> -    DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint32_t))
> +#define FLOW_U64_SIZE(FIELD)                                            \
> +    DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t))
>
>  #define MINIFLOW_MAP(FIELD)                       \
> -    (((UINT64_C(1) << FLOW_U32_SIZE(FIELD)) - 1)  \
> -     << (offsetof(struct flow, FIELD) / 4))
> +    (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1)  \
> +     << (offsetof(struct flow, FIELD) / sizeof(uint64_t)))
>
>  struct mf_for_each_in_map_aux {
> -    const uint32_t *values;
> +    const uint64_t *values;
>      uint64_t fmap;
>      uint64_t map;
>  };
>
>  static inline bool
> -mf_get_next_in_map(struct mf_for_each_in_map_aux *aux, uint32_t *value)
> +mf_get_next_in_map(struct mf_for_each_in_map_aux *aux, uint64_t *value)
>  {
>      if (aux->map) {
>          uint64_t rm1bit = rightmost_1bit(aux->map);
> @@ -544,50 +533,60 @@ mf_get_next_in_map(struct mf_for_each_in_map_aux
> *aux, uint32_t *value)
>      }
>  }
>
> -/* Iterate through all miniflow u32 values specified by 'MAP'. */
> +/* Iterate through all miniflow u64 values specified by 'MAP'. */
>  #define MINIFLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)                      \
>      for (struct mf_for_each_in_map_aux aux__                            \
> -             = { miniflow_get_u32_values(FLOW), (FLOW)->map, MAP };     \
> +             = { miniflow_get_values(FLOW), (FLOW)->map, MAP };         \
>           mf_get_next_in_map(&aux__, &(VALUE));                          \
>          )
>
> -/* This can be used when it is known that 'u32_idx' is set in 'map'. */
> -static inline uint32_t
> -miniflow_values_get__(const uint32_t *values, uint64_t map, int u32_idx)
> +/* This can be used when it is known that 'u64_idx' is set in 'map'. */
> +static inline uint64_t
> +miniflow_values_get__(const uint64_t *values, uint64_t map, int u64_idx)
>  {
> -    return values[count_1bits(map & ((UINT64_C(1) << u32_idx) - 1))];
> +    return values[count_1bits(map & ((UINT64_C(1) << u64_idx) - 1))];
>  }
>
> -/* This can be used when it is known that 'u32_idx' is set in
> +/* This can be used when it is known that 'u64_idx' is set in
>   * the map of 'mf'. */
> -static inline uint32_t
> -miniflow_get__(const struct miniflow *mf, int u32_idx)
> +static inline uint64_t
> +miniflow_get__(const struct miniflow *mf, int u64_idx)
>  {
> -    return miniflow_values_get__(miniflow_get_u32_values(mf), mf->map,
> -                                 u32_idx);
> +    return miniflow_values_get__(miniflow_get_values(mf), mf->map,
> u64_idx);
>  }
>
> -/* Get the value of 'FIELD' of an up to 4 byte wide integer type 'TYPE' of
> +/* Get the value of 'FIELD' of an up to 8 byte wide integer type 'TYPE' of
>   * a miniflow. */
>  #define MINIFLOW_GET_TYPE(MF, TYPE, OFS)                                \
> -    (((MF)->map & (UINT64_C(1) << (OFS) / 4))                           \
> +    (((MF)->map & (UINT64_C(1) << (OFS) / sizeof(uint64_t)))            \
>       ? ((OVS_FORCE const TYPE *)                                        \
> -        (miniflow_get_u32_values(MF)                                    \
> -         + count_1bits((MF)->map & ((UINT64_C(1) << (OFS) / 4) - 1))))  \
> -       [(OFS) % 4 / sizeof(TYPE)]                                       \
> +        (miniflow_get_values(MF)                                        \
> +         + count_1bits((MF)->map &                                      \
> +                       ((UINT64_C(1) << (OFS) / sizeof(uint64_t)) - 1))))
> \
> +     [(OFS) % sizeof(uint64_t) / sizeof(TYPE)]                          \
>       : 0)                                                               \
>
> -#define MINIFLOW_GET_U8(FLOW, FIELD)                                    \
> +#define MINIFLOW_GET_U8(FLOW, FIELD)                                \
>      MINIFLOW_GET_TYPE(FLOW, uint8_t, offsetof(struct flow, FIELD))
> -#define MINIFLOW_GET_U16(FLOW, FIELD)                                    \
> +#define MINIFLOW_GET_U16(FLOW, FIELD)                               \
>      MINIFLOW_GET_TYPE(FLOW, uint16_t, offsetof(struct flow, FIELD))
> -#define MINIFLOW_GET_BE16(FLOW, FIELD)
> \
> +#define MINIFLOW_GET_BE16(FLOW, FIELD)                              \
>      MINIFLOW_GET_TYPE(FLOW, ovs_be16, offsetof(struct flow, FIELD))
> -#define MINIFLOW_GET_U32(FLOW, FIELD)                                    \
> +#define MINIFLOW_GET_U32(FLOW, FIELD)                               \
>      MINIFLOW_GET_TYPE(FLOW, uint32_t, offsetof(struct flow, FIELD))
> -#define MINIFLOW_GET_BE32(FLOW, FIELD)
> \
> +#define MINIFLOW_GET_BE32(FLOW, FIELD)                              \
>      MINIFLOW_GET_TYPE(FLOW, ovs_be32, offsetof(struct flow, FIELD))
> -
> +#define MINIFLOW_GET_U64(FLOW, FIELD)                               \
> +    MINIFLOW_GET_TYPE(FLOW, uint64_t, offsetof(struct flow, FIELD))
> +#define MINIFLOW_GET_BE64(FLOW, FIELD)                              \
> +    MINIFLOW_GET_TYPE(FLOW, ovs_be64, offsetof(struct flow, FIELD))
> +
> +static inline uint64_t miniflow_get(const struct miniflow *,
> +                                    unsigned int u64_ofs);
> +static inline uint32_t miniflow_get_u32(const struct miniflow *,
> +                                        unsigned int u32_ofs);
> +static inline ovs_be32 miniflow_get_be32(const struct miniflow *,
> +                                         unsigned int be32_ofs);
>  static inline uint16_t miniflow_get_vid(const struct miniflow *);
>  static inline uint16_t miniflow_get_tcp_flags(const struct miniflow *);
>  static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
> @@ -619,12 +618,15 @@ void minimask_clone(struct minimask *, const struct
> minimask *);
>  void minimask_move(struct minimask *dst, struct minimask *src);
>  void minimask_combine(struct minimask *dst,
>                        const struct minimask *a, const struct minimask *b,
> -                      uint32_t storage[FLOW_U32S]);
> +                      uint64_t storage[FLOW_U64S]);
>  void minimask_destroy(struct minimask *);
>
>  void minimask_expand(const struct minimask *, struct flow_wildcards *);
>
> -uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs);
> +static inline uint32_t minimask_get_u32(const struct minimask *,
> +                                        unsigned int u32_ofs);
> +static inline ovs_be32 minimask_get_be32(const struct minimask *,
> +                                         unsigned int be32_ofs);
>  static inline uint16_t minimask_get_vid_mask(const struct minimask *);
>  static inline ovs_be64 minimask_get_metadata_mask(const struct minimask
> *);
>
> @@ -643,6 +645,33 @@ minimask_is_catchall(const struct minimask *mask)
>      return mask->masks.map == 0;
>  }
>
> +/* Returns the uint64_t that would be at byte offset '8 * u64_ofs' if
> 'flow'
> + * were expanded into a "struct flow". */
> +static inline uint64_t miniflow_get(const struct miniflow *flow,
> +                                    unsigned int u64_ofs)
> +{
> +    return flow->map & (UINT64_C(1) << u64_ofs)
> +        ? miniflow_get__(flow, u64_ofs) : 0;
> +}
> +
> +static inline uint32_t miniflow_get_u32(const struct miniflow *flow,
> +                                        unsigned int u32_ofs)
> +{
> +    uint64_t value = miniflow_get(flow, u32_ofs / 2);
> +
> +#if WORDS_BIGENDIAN
> +    return (u32_ofs & 1) ? value : value >> 32;
> +#else
> +    return (u32_ofs & 1) ? value >> 32 : value;
> +#endif
> +}
> +
> +static inline ovs_be32 miniflow_get_be32(const struct miniflow *flow,
> +                                         unsigned int be32_ofs)
> +{
> +    return (OVS_FORCE ovs_be32)miniflow_get_u32(flow, be32_ofs);
> +}
> +
>  /* Returns the VID within the vlan_tci member of the "struct flow"
> represented
>   * by 'flow'. */
>  static inline uint16_t
> @@ -652,6 +681,20 @@ miniflow_get_vid(const struct miniflow *flow)
>      return vlan_tci_to_vid(tci);
>  }
>
> +/* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if
> 'mask'
> + * were expanded into a "struct flow_wildcards". */
> +static inline uint32_t
> +minimask_get_u32(const struct minimask *mask, unsigned int u32_ofs)
> +{
> +    return miniflow_get_u32(&mask->masks, u32_ofs);
> +}
> +
> +static inline ovs_be32
> +minimask_get_be32(const struct minimask *mask, unsigned int be32_ofs)
> +{
> +    return (OVS_FORCE ovs_be32)minimask_get_u32(mask, be32_ofs);
> +}
> +
>  /* Returns the VID mask within the vlan_tci member of the "struct
>   * flow_wildcards" represented by 'mask'. */
>  static inline uint16_t
> @@ -671,20 +714,7 @@ miniflow_get_tcp_flags(const struct miniflow *flow)
>  static inline ovs_be64
>  miniflow_get_metadata(const struct miniflow *flow)
>  {
> -    union {
> -        ovs_be64 be64;
> -        struct {
> -            ovs_be32 hi;
> -            ovs_be32 lo;
> -        };
> -    } value;
> -
> -    enum { MD_OFS = offsetof(struct flow, metadata) };
> -    BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0);
> -    value.hi = MINIFLOW_GET_TYPE(flow, ovs_be32, MD_OFS);
> -    value.lo = MINIFLOW_GET_TYPE(flow, ovs_be32, MD_OFS + 4);
> -
> -    return value.be64;
> +    return MINIFLOW_GET_BE64(flow, metadata);
>  }
>
>  /* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
> @@ -696,7 +726,7 @@ miniflow_get_metadata(const struct miniflow *flow)
>  static inline ovs_be64
>  minimask_get_metadata_mask(const struct minimask *mask)
>  {
> -    return miniflow_get_metadata(&mask->masks);
> +    return MINIFLOW_GET_BE64(&mask->masks, metadata);
>  }
>
>  /* Perform a bitwise OR of miniflow 'src' flow data with the equivalent
> @@ -704,12 +734,12 @@ minimask_get_metadata_mask(const struct minimask
> *mask)
>  static inline void
>  flow_union_with_miniflow(struct flow *dst, const struct miniflow *src)
>  {
> -    uint32_t *dst_u32 = (uint32_t *) dst;
> -    const uint32_t *p = miniflow_get_u32_values(src);
> +    uint64_t *dst_u64 = (uint64_t *) dst;
> +    const uint64_t *p = miniflow_get_values(src);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, src->map) {
> -        dst_u32[idx] |= *p++;
> +        dst_u64[idx] |= *p++;
>      }
>  }
>
> diff --git a/lib/match.c b/lib/match.c
> index 480b972..b5bea5d 100644
> --- a/lib/match.c
> +++ b/lib/match.c
> @@ -870,7 +870,7 @@ match_format(const struct match *match, struct ds *s,
> int priority)
>
>      int i;
>
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      if (priority != OFP_DEFAULT_PRIORITY) {
>          ds_put_format(s, "priority=%d,", priority);
> @@ -1200,13 +1200,13 @@ bool
>  minimatch_matches_flow(const struct minimatch *match,
>                         const struct flow *target)
>  {
> -    const uint32_t *target_u32 = (const uint32_t *) target;
> -    const uint32_t *flowp = miniflow_get_u32_values(&match->flow);
> -    const uint32_t *maskp = miniflow_get_u32_values(&match->mask.masks);
> +    const uint64_t *target_u64 = (const uint64_t *) target;
> +    const uint64_t *flowp = miniflow_get_values(&match->flow);
> +    const uint64_t *maskp = miniflow_get_values(&match->mask.masks);
>      int idx;
>
>      MAP_FOR_EACH_INDEX(idx, match->flow.map) {
> -        if ((*flowp++ ^ target_u32[idx]) & *maskp++) {
> +        if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
>              return false;
>          }
>      }
> diff --git a/lib/nx-match.c b/lib/nx-match.c
> index 2ad3cf2..1f72a84 100644
> --- a/lib/nx-match.c
> +++ b/lib/nx-match.c
> @@ -817,7 +817,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm,
> const struct match *match,
>      int match_len;
>      int i;
>
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      /* Metadata. */
>      if (match->wc.masks.dp_hash) {
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index 9c990cd..b361795 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -133,7 +133,7 @@ void odp_portno_names_destroy(struct hmap
> *portno_names);
>   * add another field and forget to adjust this value.
>   */
>  #define ODPUTIL_FLOW_KEY_BYTES 512
> -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>  /* A buffer with sufficient size and alignment to hold an
> nlattr-formatted flow
>   * key.  An array of "struct nlattr" might not, in theory, be sufficiently
> diff --git a/lib/ofp-util.c b/lib/ofp-util.c
> index 986659e..c4daec6 100644
> --- a/lib/ofp-util.c
> +++ b/lib/ofp-util.c
> @@ -186,7 +186,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
>  void
>  ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
>  {
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>
>      /* Initialize most of wc. */
>      flow_wildcards_init_catchall(wc);
> diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c
> index 9832ef4..7ab25eb 100644
> --- a/lib/tnl-ports.c
> +++ b/lib/tnl-ports.c
> @@ -188,6 +188,6 @@ tnl_port_show(struct unixctl_conn *conn, int argc
> OVS_UNUSED,
>  void
>  tnl_port_map_init(void)
>  {
> -    classifier_init(&cls, flow_segment_u32s);
> +    classifier_init(&cls, flow_segment_u64s);
>      unixctl_command_register("tnl/ports/show", "", 0, 0, tnl_port_show,
> NULL);
>  }
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 89de528..bc59ffb 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -1551,7 +1551,7 @@ revalidate_ukey(struct udpif *udpif, struct
> udpif_key *ukey,
>      struct dpif_flow_stats push;
>      struct ofpbuf xout_actions;
>      struct flow flow, dp_mask;
> -    uint32_t *dp32, *xout32;
> +    uint64_t *dp64, *xout64;
>      ofp_port_t ofp_in_port;
>      struct xlate_in xin;
>      long long int last_used;
> @@ -1651,10 +1651,10 @@ revalidate_ukey(struct udpif *udpif, struct
> udpif_key *ukey,
>       * mask in the kernel is more specific i.e. less wildcarded, than what
>       * we've calculated here.  This guarantees we don't catch any packets
> we
>       * shouldn't with the megaflow. */
> -    dp32 = (uint32_t *) &dp_mask;
> -    xout32 = (uint32_t *) &xout.wc.masks;
> -    for (i = 0; i < FLOW_U32S; i++) {
> -        if ((dp32[i] | xout32[i]) != dp32[i]) {
> +    dp64 = (uint64_t *) &dp_mask;
> +    xout64 = (uint64_t *) &xout.wc.masks;
> +    for (i = 0; i < FLOW_U64S; i++) {
> +        if ((dp64[i] | xout64[i]) != dp64[i]) {
>              goto exit;
>          }
>      }
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index c1327a6..50ed418 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -2611,7 +2611,7 @@ compose_output_action__(struct xlate_ctx *ctx,
> ofp_port_t ofp_port,
>
>      /* If 'struct flow' gets additional metadata, we'll need to zero it
> out
>       * before traversing a patch port. */
> -    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
> +    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29);
>      memset(&flow_tnl, 0, sizeof flow_tnl);
>
>      if (!xport) {
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 75f0b54..cbf8434 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -6682,7 +6682,7 @@ static void
>  oftable_init(struct oftable *table)
>  {
>      memset(table, 0, sizeof *table);
> -    classifier_init(&table->cls, flow_segment_u32s);
> +    classifier_init(&table->cls, flow_segment_u64s);
>      table->max_flows = UINT_MAX;
>      atomic_init(&table->miss_config, OFPUTIL_TABLE_MISS_DEFAULT);
>
> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> index baa942f..064253e 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -5389,7 +5389,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1
> 'in_port(1),eth(src=50:54:00:00:00:
>  sleep 1
>  AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0],
> [dnl
>  recirc_id=0,ipv6,in_port=1,ipv6_src=2001:db8:3c4d:1:2:3:4:5,nw_frag=no,
> actions: <del>
> -recirc_id=0,ipv6,in_port=1,ipv6_src=2001:db8:3c4d:5:4:3:2:1/ffff:ffff:0:4::,nw_frag=no,
> actions: <del>
> +recirc_id=0,ipv6,in_port=1,ipv6_src=2001:db8:3c4d:5:4:3:2:1/0:0:0:4::,nw_frag=no,
> actions: <del>
>  ])
>  OVS_VSWITCHD_STOP
>  AT_CLEANUP
> diff --git a/tests/test-classifier.c b/tests/test-classifier.c
> index e4eb0f4..a15a612 100644
> --- a/tests/test-classifier.c
> +++ b/tests/test-classifier.c
> @@ -700,7 +700,7 @@ test_empty(int argc OVS_UNUSED, char *argv[]
> OVS_UNUSED)
>      struct classifier cls;
>      struct tcls tcls;
>
> -    classifier_init(&cls, flow_segment_u32s);
> +    classifier_init(&cls, flow_segment_u64s);
>      set_prefix_fields(&cls);
>      tcls_init(&tcls);
>      assert(classifier_is_empty(&cls));
> @@ -731,7 +731,7 @@ test_single_rule(int argc OVS_UNUSED, char *argv[]
> OVS_UNUSED)
>          rule = make_rule(wc_fields,
>                           hash_bytes(&wc_fields, sizeof wc_fields, 0), 0);
>
> -        classifier_init(&cls, flow_segment_u32s);
> +        classifier_init(&cls, flow_segment_u64s);
>          set_prefix_fields(&cls);
>          tcls_init(&tcls);
>
> @@ -769,7 +769,7 @@ test_rule_replacement(int argc OVS_UNUSED, char
> *argv[] OVS_UNUSED)
>          rule2->aux += 5;
>          rule2->aux += 5;
>
> -        classifier_init(&cls, flow_segment_u32s);
> +        classifier_init(&cls, flow_segment_u64s);
>          set_prefix_fields(&cls);
>          tcls_init(&tcls);
>          tcls_insert(&tcls, rule1);
> @@ -884,7 +884,7 @@ test_many_rules_in_one_list (int argc OVS_UNUSED, char
> *argv[] OVS_UNUSED)
>                  pri_rules[i] = -1;
>              }
>
> -            classifier_init(&cls, flow_segment_u32s);
> +            classifier_init(&cls, flow_segment_u64s);
>              set_prefix_fields(&cls);
>              tcls_init(&tcls);
>
> @@ -986,7 +986,7 @@ test_many_rules_in_one_table(int argc OVS_UNUSED, char
> *argv[] OVS_UNUSED)
>              value_mask = ~wcf & ((1u << CLS_N_FIELDS) - 1);
>          } while ((1 << count_ones(value_mask)) < N_RULES);
>
> -        classifier_init(&cls, flow_segment_u32s);
> +        classifier_init(&cls, flow_segment_u64s);
>          set_prefix_fields(&cls);
>          tcls_init(&tcls);
>
> @@ -1048,7 +1048,7 @@ test_many_rules_in_n_tables(int n_tables)
>          }
>          shuffle(priorities, ARRAY_SIZE(priorities));
>
> -        classifier_init(&cls, flow_segment_u32s);
> +        classifier_init(&cls, flow_segment_u64s);
>          set_prefix_fields(&cls);
>          tcls_init(&tcls);
>
> @@ -1122,6 +1122,8 @@ choose(unsigned int n, unsigned int *idxp)
>      }
>  }
>
> +#define FLOW_U32S (FLOW_U64S * 2)
> +
>  static bool
>  init_consecutive_values(int n_consecutive, struct flow *flow,
>                          unsigned int *idxp)
> @@ -1259,7 +1261,7 @@ test_miniflow(int argc OVS_UNUSED, char *argv[]
> OVS_UNUSED)
>
>      random_set_seed(0xb3faca38);
>      for (idx = 0; next_random_flow(&flow, idx); idx++) {
> -        const uint32_t *flow_u32 = (const uint32_t *) &flow;
> +        const uint64_t *flow_u64 = (const uint64_t *) &flow;
>          struct miniflow miniflow, miniflow2, miniflow3;
>          struct flow flow2, flow3;
>          struct flow_wildcards mask;
> @@ -1271,9 +1273,8 @@ test_miniflow(int argc OVS_UNUSED, char *argv[]
> OVS_UNUSED)
>
>          /* Check that the flow equals its miniflow. */
>          assert(miniflow_get_vid(&miniflow) ==
> vlan_tci_to_vid(flow.vlan_tci));
> -        for (i = 0; i < FLOW_U32S; i++) {
> -            assert(MINIFLOW_GET_TYPE(&miniflow, uint32_t, i * 4)
> -                   == flow_u32[i]);
> +        for (i = 0; i < FLOW_U64S; i++) {
> +            assert(miniflow_get(&miniflow, i) == flow_u64[i]);
>          }
>
>          /* Check that the miniflow equals itself. */
> @@ -1372,7 +1373,7 @@ test_minimask_combine(int argc OVS_UNUSED, char
> *argv[] OVS_UNUSED)
>      for (idx = 0; next_random_flow(&flow, idx); idx++) {
>          struct minimask minimask, minimask2, minicombined;
>          struct flow_wildcards mask, mask2, combined, combined2;
> -        uint32_t storage[FLOW_U32S];
> +        uint64_t storage[FLOW_U64S];
>          struct flow flow2;
>
>          mask.masks = flow;
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>



More information about the dev mailing list