[ovs-dev] [PATCH 10/10] lib/classifier: Support variable sized miniflows.

Ethan Jackson ethan at nicira.com
Tue Apr 29 02:07:58 UTC 2014


The comment of miniflow_and_mask_matches_flow() need to be reworded.

Do we really need that function?  Is it really that much faster?
Having two functions which do the exact same thing is a bit confusing.

So I suppose the only reason for a non zero inline_values array is for
those callers who can't allocate something that's variable sized, but
still want to avoid the extra memory allocation.  However, I don't
think there are any of those callers which are performance critical.
I'm wondering if we could simplify this significantly by always having
offline values passed in by the caller.  For those callers which can
make it variable sized, they would, everyone else would make it
FLOW_U32s sized?

What do you think?

Ethan

On Sat, Apr 19, 2014 at 9:51 PM, Kmindg G <kmindg at gmail.com> wrote:
> On Sat, Apr 19, 2014 at 3:42 AM, Jarno Rajahalme <jrajahalme at nicira.com> wrote:
>> Change the classifier to allocate variable sized miniflows and
>> minimasks in cls_match and cls_subtable, respectively.  Do not
>> duplicate the mask in cls_rule any more.
>>
>> miniflow_clone and miniflow_move can now take variably sized miniflows
>> as source.  The destination is assumed to be regularly sized miniflow.
>>
>> Inlining miniflow and mask values reduces memory indirection and helps
>> reduce cache misses.
>>
>> Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
>> ---
>>  lib/classifier.c |   82 +++++++++++++++++++++++++++++++++++++++---------------
>>  lib/flow.c       |   55 +++++++++++++++++++++++++++++++-----
>>  lib/flow.h       |   30 ++++++++++++++++++--
>>  3 files changed, 134 insertions(+), 33 deletions(-)
>>
>> diff --git a/lib/classifier.c b/lib/classifier.c
>> index 75befad..1198a76 100644
>> --- a/lib/classifier.c
>> +++ b/lib/classifier.c
>> @@ -40,7 +40,6 @@ struct cls_trie {
>>
>>  struct cls_subtable_entry {
>>      struct cls_subtable *subtable;
>> -    const uint32_t *mask_values;
>>      tag_type tag;
>>      unsigned int max_priority;
>>  };
>> @@ -72,7 +71,6 @@ struct cls_subtable {
>>      struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables'
>>                                   * hmap. */
>>      struct hmap rules;          /* Contains "struct cls_rule"s. */
>> -    struct minimask mask;       /* Wildcards for fields. */
>>      int n_rules;                /* Number of rules, including duplicates. */
>>      unsigned int max_priority;  /* Max priority of any rule in the subtable. */
>>      unsigned int max_count;     /* Count of max_priority rules. */
>> @@ -81,6 +79,8 @@ struct cls_subtable {
>>      uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
>>      struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
>>      unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'. */
>> +    struct minimask mask;       /* Wildcards for fields. */
>> +    /* 'mask' must be the last field. */
>>  };
>>
>>  /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
>> @@ -103,16 +103,21 @@ struct cls_match {
>>      unsigned int priority;      /* Larger numbers are higher priorities. */
>>      struct cls_partition *partition;
>>      struct list list;           /* List of identical, lower-priority rules. */
>> -    struct minimatch match;     /* Matching rule. */
>> +    struct miniflow flow;       /* Matching rule. Mask is in the subtable. */
>> +    /* 'flow' must be the last field. */
>>  };
>>
>>  static struct cls_match *
>>  cls_match_alloc(struct cls_rule *rule)
>>  {
>> -    struct cls_match *cls_match = xmalloc(sizeof *cls_match);
>> +    int count = count_1bits(rule->match.flow.map);
>> +
>> +    struct cls_match *cls_match
>> +        = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values
>> +                  + MINIFLOW_VALUES_SIZE(count));
>
> Would it lead to a potential array access violation problem when
> 'sizeof cls_match->flow.inline_values' is bigger than
> 'MINIFLOW_VALUES_SIZE(count)'?
>
>>
>>      cls_match->cls_rule = rule;
>> -    minimatch_clone(&cls_match->match, &rule->match);
>> +    miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count);
>>      cls_match->priority = rule->priority;
>>      rule->cls_match = cls_match;
>>
>> @@ -875,7 +880,6 @@ static inline void
>>  lookahead_subtable(const struct cls_subtable_entry *subtables)
>>  {
>>      ovs_prefetch_range(subtables->subtable, sizeof *subtables->subtable);
>> -    ovs_prefetch_range(subtables->mask_values, 1);
>>  }
>>
>>  /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
>> @@ -969,16 +973,19 @@ classifier_lookup(const struct classifier *cls_, const struct flow *flow,
>>  }
>>
>>  /* Returns true if 'target' satisifies 'match', that is, if each bit for which
>> - * 'match' specifies a particular value has the correct value in 'target'. */
>> + * 'match' specifies a particular value has the correct value in 'target'.
>> + *
>> + * 'flow' and 'mask' have the same mask! */
>>  static bool
>> -minimatch_matches_miniflow(const struct minimatch *match,
>> -                           const struct miniflow *target)
>> +miniflow_and_mask_matches_miniflow(const struct miniflow *flow,
>> +                                   const struct minimask *mask,
>> +                                   const struct miniflow *target)
>>  {
>> -    const uint32_t *flowp = miniflow_get_u32_values(&match->flow);
>> -    const uint32_t *maskp = miniflow_get_u32_values(&match->mask.masks);
>> +    const uint32_t *flowp = miniflow_get_u32_values(flow);
>> +    const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
>>      uint32_t target_u32;
>>
>> -    MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, match->mask.masks.map) {
>> +    MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
>>          if ((*flowp++ ^ target_u32) & *maskp++) {
>>              return false;
>>          }
>> @@ -995,7 +1002,8 @@ find_match_miniflow(const struct cls_subtable *subtable,
>>      struct cls_match *rule;
>>
>>      HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
>> -        if (minimatch_matches_miniflow(&rule->match, flow)) {
>> +        if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask,
>> +                                               flow)) {
>>              return rule;
>>          }
>>      }
>> @@ -1113,7 +1121,7 @@ classifier_rule_overlaps(const struct classifier *cls_,
>>                  }
>>                  if (rule->priority == target->priority
>>                      && miniflow_equal_in_minimask(&target->match.flow,
>> -                                                  &rule->match.flow, &mask)) {
>> +                                                  &rule->flow, &mask)) {
>>                      return true;
>>                  }
>>              }
>> @@ -1171,7 +1179,7 @@ static bool
>>  rule_matches(const struct cls_match *rule, const struct cls_rule *target)
>>  {
>>      return (!target
>> -            || miniflow_equal_in_minimask(&rule->match.flow,
>> +            || miniflow_equal_in_minimask(&rule->flow,
>>                                            &target->match.flow,
>>                                            &target->match.mask));
>>  }
>> @@ -1285,10 +1293,12 @@ insert_subtable(struct cls_classifier *cls, const struct minimask *mask)
>>      struct flow_wildcards old, new;
>>      uint8_t prev;
>>      struct cls_subtable_entry elem;
>> +    int count = count_1bits(mask->masks.map);
>>
>> -    subtable = xzalloc(sizeof *subtable);
>> +    subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values
>> +                       + MINIFLOW_VALUES_SIZE(count));
>>      hmap_init(&subtable->rules);
>> -    minimask_clone(&subtable->mask, mask);
>> +    miniflow_clone_inline(&subtable->mask.masks, &mask->masks, count);
>>
>>      /* Init indices for segmented lookup, if any. */
>>      flow_wildcards_init_catchall(&new);
>> @@ -1329,7 +1339,6 @@ insert_subtable(struct cls_classifier *cls, const struct minimask *mask)
>>
>>      hmap_insert(&cls->subtables, &subtable->hmap_node, hash);
>>      elem.subtable = subtable;
>> -    elem.mask_values = miniflow_get_values(&subtable->mask.masks);
>>      elem.tag = subtable->tag;
>>      elem.max_priority = subtable->max_priority;
>>      cls_subtable_cache_push_back(&cls->subtables_priority, elem);
>> @@ -1525,6 +1534,31 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
>>      return false;
>>  }
>>
>> +/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
>> + * for which 'flow', for which 'mask' has a bit set, specifies a particular
>> + * value has the correct value in 'target'.
>> + *
>> + * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
>> + * target, mask) but it is faster because of the invariant that
>> + * flow->map and mask->masks.map are the same. */
>> +static inline bool
>> +miniflow_and_mask_matches_flow(const struct miniflow *flow,
>> +                               const struct minimask *mask,
>> +                               const struct flow *target)
>> +{
>> +    const uint32_t *flowp = miniflow_get_u32_values(flow);
>> +    const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
>> +    uint32_t target_u32;
>> +
>> +    FLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
>> +        if ((*flowp++ ^ target_u32) & *maskp++) {
>> +            return false;
>> +        }
>> +    }
>> +
>> +    return true;
>> +}
>> +
>>  static inline struct cls_match *
>>  find_match(const struct cls_subtable *subtable, const struct flow *flow,
>>             uint32_t hash)
>> @@ -1532,7 +1566,8 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow,
>>      struct cls_match *rule;
>>
>>      HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) {
>> -        if (minimatch_matches_flow(&rule->match, flow)) {
>> +        if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
>> +                                           flow)) {
>>              return rule;
>>          }
>>      }
>> @@ -1580,8 +1615,8 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
>>           * not match, then we know that we will never get a match, but we do
>>           * not yet know how many wildcards we need to fold into 'wc' so we
>>           * continue iterating through indices to find that out.  (We won't
>> -         * waste time calling minimatch_matches_flow() again because we've set
>> -         * 'rule' nonnull.)
>> +         * waste time calling miniflow_and_mask_matches_flow() again because
>> +         * we've set 'rule' nonnull.)
>>           *
>>           * This check shows a measurable benefit with non-trivial flow tables.
>>           *
>> @@ -1589,7 +1624,8 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
>>           * optimization. */
>>          if (!inode->s && !rule) {
>>              ASSIGN_CONTAINER(rule, inode - i, index_nodes);
>> -            if (minimatch_matches_flow(&rule->match, flow)) {
>> +            if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
>> +                                               flow)) {
>>                  goto out;
>>              }
>>          }
>> @@ -1629,7 +1665,7 @@ find_equal(struct cls_subtable *subtable, const struct miniflow *flow,
>>      struct cls_match *head;
>>
>>      HMAP_FOR_EACH_WITH_HASH (head, hmap_node, hash, &subtable->rules) {
>> -        if (miniflow_equal(&head->match.flow, flow)) {
>> +        if (miniflow_equal(&head->flow, flow)) {
>>              return head;
>>          }
>>      }
>> diff --git a/lib/flow.c b/lib/flow.c
>> index 8e8e67e..1f65001 100644
>> --- a/lib/flow.c
>> +++ b/lib/flow.c
>> @@ -1581,13 +1581,15 @@ miniflow_n_values(const struct miniflow *flow)
>>  static uint32_t *
>>  miniflow_alloc_values(struct miniflow *flow, int n)
>>  {
>> -    if (n <= sizeof flow->inline_values / sizeof(uint32_t)) {
>> +    int size = MINIFLOW_VALUES_SIZE(n);
>> +
>> +    if (size <= sizeof flow->inline_values) {
>>          flow->values_inline = true;
>>          return flow->inline_values;
>>      } else {
>>          COVERAGE_INC(miniflow_malloc);
>>          flow->values_inline = false;
>> -        flow->offline_values = xmalloc(n * sizeof(uint32_t));
>> +        flow->offline_values = xmalloc(size);
>>          return flow->offline_values;
>>      }
>>  }
>> @@ -1655,18 +1657,57 @@ miniflow_init_with_minimask(struct miniflow *dst, const struct flow *src,
>>  void
>>  miniflow_clone(struct miniflow *dst, const struct miniflow *src)
>>  {
>> -    int n = miniflow_n_values(src);
>> +    int size = MINIFLOW_VALUES_SIZE(miniflow_n_values(src));
>> +    uint32_t *values;
>> +
>>      dst->map = src->map;
>> -    memcpy(miniflow_alloc_values(dst, n), miniflow_get_values(src),
>> -           n * sizeof(uint32_t));
>> +    if (size <= sizeof dst->inline_values) {
>> +        dst->values_inline = true;
>> +        values = dst->inline_values;
>> +    } else {
>> +        dst->values_inline = false;
>> +        COVERAGE_INC(miniflow_malloc);
>> +        dst->offline_values = xmalloc(size);
>> +        values = dst->offline_values;
>> +    }
>> +    memcpy(values, miniflow_get_values(src), size);
>> +}
>> +
>> +/* Initializes 'dst' as a copy of 'src'.  The caller must have allocated
>> + * 'dst' to have inline space all data in 'src'. */
>> +void
>> +miniflow_clone_inline(struct miniflow *dst, const struct miniflow *src,
>> +                      size_t n_values)
>> +{
>> +    dst->values_inline = true;
>> +    dst->map = src->map;
>> +    memcpy(dst->inline_values, miniflow_get_values(src),
>> +           MINIFLOW_VALUES_SIZE(n_values));
>>  }
>>
>>  /* Initializes 'dst' with the data in 'src', destroying 'src'.
>> - * The caller must eventually free 'dst' with miniflow_destroy(). */
>> + * The caller must eventually free 'dst' with miniflow_destroy().
>> + * 'dst' must be regularly sized miniflow, but 'src' can have
>> + * larger than default inline values. */
>>  void
>>  miniflow_move(struct miniflow *dst, struct miniflow *src)
>>  {
>> -    *dst = *src;
>> +    int size = MINIFLOW_VALUES_SIZE(miniflow_n_values(src));
>> +
>> +    dst->map = src->map;
>> +    if (size <= sizeof dst->inline_values) {
>> +        dst->values_inline = true;
>> +        memcpy(dst->inline_values, miniflow_get_values(src), size);
>> +        miniflow_destroy(src);
>> +    } else if (src->values_inline) {
>> +        dst->values_inline = false;
>> +        COVERAGE_INC(miniflow_malloc);
>> +        dst->offline_values = xmalloc(size);
>> +        memcpy(dst->offline_values, src->inline_values, size);
>> +    } else {
>> +        dst->values_inline = false;
>> +        dst->offline_values = src->offline_values;
>> +    }
>>  }
>>
>>  /* Frees any memory owned by 'flow'.  Does not free the storage in which 'flow'
>> diff --git a/lib/flow.h b/lib/flow.h
>> index c508e1e..324997f 100644
>> --- a/lib/flow.h
>> +++ b/lib/flow.h
>> @@ -358,14 +358,18 @@ struct miniflow {
>>      };
>>  };
>>
>> +#define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint32_t))
>> +
>>  static inline uint32_t *miniflow_values(struct miniflow *mf)
>>  {
>> -    return mf->values_inline ? mf->inline_values : mf->offline_values;
>> +    return OVS_LIKELY(mf->values_inline)
>> +        ? mf->inline_values : mf->offline_values;
>>  }
>>
>>  static inline const uint32_t *miniflow_get_values(const struct miniflow *mf)
>>  {
>> -    return mf->values_inline ? mf->inline_values : mf->offline_values;
>> +    return OVS_LIKELY(mf->values_inline)
>> +        ? mf->inline_values : mf->offline_values;
>>  }
>>
>>  static inline const uint32_t *miniflow_get_u32_values(const struct miniflow *mf)
>> @@ -400,11 +404,31 @@ void miniflow_init(struct miniflow *, const struct flow *);
>>  void miniflow_init_with_minimask(struct miniflow *, const struct flow *,
>>                                   const struct minimask *);
>>  void miniflow_clone(struct miniflow *, const struct miniflow *);
>> +void miniflow_clone_inline(struct miniflow *, const struct miniflow *,
>> +                           size_t n_values);
>>  void miniflow_move(struct miniflow *dst, struct miniflow *);
>>  void miniflow_destroy(struct miniflow *);
>>
>>  void miniflow_expand(const struct miniflow *, struct flow *);
>>
>> +static inline uint32_t
>> +flow_get_next_in_map(const struct flow *flow, uint64_t map, uint32_t *value)
>> +{
>> +    if (map) {
>> +        *value = ((const uint32_t *)flow)[raw_ctz(map)];
>> +        return true;
>> +    }
>> +    return false;
>> +}
>> +
>> +/* Iterate through all flow u32 values specified by 'MAP'.
>> + * This works as the first statement in a block.*/
>> +#define FLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)                          \
>> +    uint64_t map_;                                                      \
>> +    for (map_ = (MAP);                                                  \
>> +         flow_get_next_in_map(FLOW, map_, &(VALUE));                    \
>> +         map_ = zero_rightmost_1bit(map_))
>> +
>>  #define FLOW_U32_SIZE(FIELD)                                            \
>>      DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint32_t))
>>
>> @@ -429,7 +453,7 @@ mf_get_next_in_map(uint64_t *fmap, uint64_t rm1bit, const uint32_t **fp,
>>      return rm1bit != 0;
>>  }
>>
>> -/* Iterate through all miniflow u32 values specified by the 'MAP'.
>> +/* Iterate through all miniflow u32 values specified by 'MAP'.
>>   * This works as the first statement in a block.*/
>>  #define MINIFLOW_FOR_EACH_IN_MAP(VALUE, FLOW, MAP)                      \
>>      const uint32_t *fp_ = miniflow_get_u32_values(FLOW);                \
>> --
>> 1.7.10.4
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list