[ovs-dev] [PATCH v3] classifier: Prevent tries vs n_tries race leading to NULL dereference.

Eiichi Tsukata eiichi.tsukata at nutanix.com
Mon May 11 23:01:59 UTC 2020


Hello Ilya,

Could you review the patch?

Thanks

Eiichi

> On Apr 30, 2020, at 14:16, Eiichi Tsukata <eiichi.tsukata at nutanix.com> wrote:
> 
> Currently classifier tries and n_tries can be updated not atomically,
> there is a race condition which can lead to NULL dereference.
> The race can happen when main thread updates a classifier tries and
> n_tries in classifier_set_prefix_fields() and at the same time revalidator
> or handler thread try to lookup them in classifier_lookup__(). Such race
> can be triggered when user changes prefixes of flow_table.
> 
> Race(user changes flow_table prefixes: ip_dst,ip_src => none):
> 
>  [main thread]             [revalidator/handler thread]
>  ===========================================================
>                            /* cls->n_tries == 2 */
>                            for (int i = 0; i < cls->n_tries; i++) {
>  trie_init(cls, i, NULL);
>  /* n_tries == 0 */
>  cls->n_tries = n_tries;
>                            /* cls->tries[i]->feild is NULL */
>                            trie_ctx_init(&trie_ctx[i],&cls->tries[i]);
>                            /* trie->field is NULL */
>                            ctx->be32ofs = trie->field->flow_be32ofs;
> 
> To prevent the race, instead of re-introducing internal mutex
> implemented in the commit fccd7c092e09 ("classifier: Remove internal
> mutex."), this patch makes trie field RCU protected and checks it after
> read.
> 
> Fixes: fccd7c092e09 ("classifier: Remove internal mutex.")
> Signed-off-by: Eiichi Tsukata <eiichi.tsukata at nutanix.com>
> ---
> lib/classifier.c        | 37 ++++++++++++++++++++++---------------
> lib/classifier.h        |  6 ++++--
> tests/test-classifier.c |  5 +++--
> 3 files changed, 29 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/classifier.c b/lib/classifier.c
> index 0fad953..10909a6 100644
> --- a/lib/classifier.c
> +++ b/lib/classifier.c
> @@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls,
>         bitmap_set1(fields.bm, trie_fields[i]);
> 
>         new_fields[n_tries] = NULL;
> -        if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
> +        const struct mf_field *cls_field
> +            = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field);
> +        if (n_tries >= cls->n_tries || field != cls_field) {
>             new_fields[n_tries] = field;
>             changed = true;
>         }
> @@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
>     } else {
>         ovsrcu_set_hidden(&trie->root, NULL);
>     }
> -    trie->field = field;
> +    ovsrcu_set_hidden(&trie->field, field);
> 
>     /* Add existing rules to the new trie. */
>     CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
> @@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls,
> struct trie_ctx {
>     const struct cls_trie *trie;
>     bool lookup_done;        /* Status of the lookup. */
> -    uint8_t be32ofs;         /* U32 offset of the field in question. */
>     unsigned int maskbits;   /* Prefix length needed to avoid false matches. */
>     union trie_prefix match_plens;  /* Bitmask of prefix lengths with possible
>                                      * matches. */
> @@ -849,7 +850,6 @@ static void
> trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
> {
>     ctx->trie = trie;
> -    ctx->be32ofs = trie->field->flow_be32ofs;
>     ctx->lookup_done = false;
> }
> 
> @@ -1531,8 +1531,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
>     *CONST_CAST(uint8_t *, &subtable->n_indices) = index;
> 
>     for (i = 0; i < cls->n_tries; i++) {
> -        subtable->trie_plen[i] = minimask_get_prefix_len(mask,
> -                                                         cls->tries[i].field);
> +        const struct mf_field *field
> +            = ovsrcu_get(struct mf_field *, &cls->tries[i].field);
> +        subtable->trie_plen[i]
> +            = field ? minimask_get_prefix_len(mask, field) : 0;
>     }
> 
>     /* Ports trie. */
> @@ -1577,8 +1579,10 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
>     for (j = 0; j < n_tries; j++) {
>         /* Is the trie field relevant for this subtable, and
>            is the trie field within the current range of fields? */
> -        if (field_plen[j] &&
> -            flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) {
> +        const struct mf_field *ctx_field
> +            = ovsrcu_get(struct mf_field *, &trie_ctx[j].trie->field);
> +        if (field_plen[j] && ctx_field &&
> +            flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) {
>             struct trie_ctx *ctx = &trie_ctx[j];
> 
>             /* On-demand trie lookup. */
> @@ -1601,14 +1605,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
>                  * than this subtable would otherwise. */
>                 if (ctx->maskbits <= field_plen[j]) {
>                     /* Unwildcard the bits and skip the rest. */
> -                    mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
> +                    mask_set_prefix_bits(wc, ctx_field->flow_be32ofs,
> +                                         ctx->maskbits);
>                     /* Note: Prerequisite already unwildcarded, as the only
>                      * prerequisite of the supported trie lookup fields is
>                      * the ethertype, which is always unwildcarded. */
>                     return true;
>                 }
>                 /* Can skip if the field is already unwildcarded. */
> -                if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
> +                if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs,
> +                                         ctx->maskbits)) {
>                     return true;
>                 }
>             }
> @@ -2001,12 +2007,12 @@ static unsigned int
> trie_lookup(const struct cls_trie *trie, const struct flow *flow,
>             union trie_prefix *plens)
> {
> -    const struct mf_field *mf = trie->field;
> +    const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field);
> 
>     /* Check that current flow matches the prerequisites for the trie
>      * field.  Some match fields are used for multiple purposes, so we
>      * must check that the trie is relevant for this flow. */
> -    if (mf_are_prereqs_ok(mf, flow, NULL)) {
> +    if (mf && mf_are_prereqs_ok(mf, flow, NULL)) {
>         return trie_lookup_value(&trie->root,
>                                  &((ovs_be32 *)flow)[mf->flow_be32ofs],
>                                  &plens->be32, mf->n_bits);
> @@ -2053,8 +2059,9 @@ minimask_get_prefix_len(const struct minimask *minimask,
>  * happened to be zeros.
>  */
> static const ovs_be32 *
> -minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
> +minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field)
> {
> +    struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field);
>     size_t u64_ofs = mf->flow_be32ofs / 2;
> 
>     return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
> @@ -2068,7 +2075,7 @@ static void
> trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
> {
>     trie_insert_prefix(&trie->root,
> -                       minimatch_get_prefix(&rule->match, trie->field), mlen);
> +                       minimatch_get_prefix(&rule->match, &trie->field), mlen);
> }
> 
> static void
> @@ -2123,7 +2130,7 @@ static void
> trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
> {
>     trie_remove_prefix(&trie->root,
> -                       minimatch_get_prefix(&rule->match, trie->field), mlen);
> +                       minimatch_get_prefix(&rule->match, &trie->field), mlen);
> }
> 
> /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
> diff --git a/lib/classifier.h b/lib/classifier.h
> index d1bd4aa..f646a8f 100644
> --- a/lib/classifier.h
> +++ b/lib/classifier.h
> @@ -314,13 +314,15 @@ extern "C" {
> struct cls_subtable;
> struct cls_match;
> 
> +struct mf_field;
> +typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr;
> struct trie_node;
> typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr;
> 
> /* Prefix trie for a 'field' */
> struct cls_trie {
> -    const struct mf_field *field; /* Trie field, or NULL. */
> -    rcu_trie_ptr root;            /* NULL if none. */
> +    rcu_field_ptr field;   /* Trie field, or NULL. */
> +    rcu_trie_ptr root;     /* NULL if none. */
> };
> 
> enum {
> diff --git a/tests/test-classifier.c b/tests/test-classifier.c
> index 6d53d01..2d98fad 100644
> --- a/tests/test-classifier.c
> +++ b/tests/test-classifier.c
> @@ -512,8 +512,9 @@ verify_tries(struct classifier *cls)
>     int i;
> 
>     for (i = 0; i < cls->n_tries; i++) {
> -        n_rules += trie_verify(&cls->tries[i].root, 0,
> -                               cls->tries[i].field->n_bits);
> +        const struct mf_field * cls_field
> +            = ovsrcu_get(struct mf_field *, &cls->tries[i].field);
> +        n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits);
>     }
>     assert(n_rules <= cls->n_rules);
> }
> -- 
> 1.8.3.1
> 



More information about the dev mailing list