[ovs-dev] [patch v4 1/2] conntrack: Fix race for NAT cleanup.

Darrell Ball dlu998 at gmail.com
Thu Mar 14 16:08:48 UTC 2019


On Thu, Mar 14, 2019 at 8:25 AM Ilya Maximets <i.maximets at samsung.com>
wrote:

> > Reference lists are not fully protected during cleanup of
> > NAT connections where the bucket lock is transiently not held during
> > list traversal.  This can lead to referencing freed memory during
> > cleaning from multiple contexts.  Fix this by protecting with
> > the existing 'cleanup' mutex in the missed cases where 'conn_clean()'
> > is called.  'conntrack_flush()' is converted to expiry list traversal
> > to support the proper bucket level protection with the 'cleanup' mutex.
> >
> > Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.")
> > Reported-by: solomon <liwei.solomon at gmail.com>
> > Reported-at:
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-March/357056.html
> > Tested-by: solomon <liwei.solomon at gmail.com>
> > Signed-off-by: Darrell Ball <dlu998 at gmail.com>
> > ---
> >
> > This patch is targeted for earlier releases as new RCU patches
> > inherently don't have this race.
> >
> > v4: Fix exhaustion case cleanup race in conn_not_found() as well.
> >     At the same time, simplify the related code.
> >
> > v3: Use cleanup_mutex in conntrack_destroy().
> >
> > v2: Fix typo.
> >
> >  lib/conntrack.c | 147
> ++++++++++++++++++++++++++++++++++++++------------------
> >  1 file changed, 99 insertions(+), 48 deletions(-)
> >
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 691782c..96d7db8 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -355,7 +355,7 @@ conntrack_destroy(struct conntrack *ct)
> >          struct conntrack_bucket *ctb = &ct->buckets[i];
> >          struct conn *conn;
> >
> > -        ovs_mutex_destroy(&ctb->cleanup_mutex);
> > +        ovs_mutex_lock(&ctb->cleanup_mutex);
> >          ct_lock_lock(&ctb->lock);
> >          HMAP_FOR_EACH_POP (conn, node, &ctb->connections) {
> >              if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > @@ -365,7 +365,9 @@ conntrack_destroy(struct conntrack *ct)
> >          }
> >          hmap_destroy(&ctb->connections);
> >          ct_lock_unlock(&ctb->lock);
> > +        ovs_mutex_unlock(&ctb->cleanup_mutex);
> >          ct_lock_destroy(&ctb->lock);
> > +        ovs_mutex_destroy(&ctb->cleanup_mutex);
> >      }
> >      ct_rwlock_wrlock(&ct->resources_lock);
> >      struct nat_conn_key_node *nat_conn_key_node;
> > @@ -753,6 +755,24 @@ conn_lookup(struct conntrack *ct, const struct
> conn_key *key, long long now)
> >      return ctx.conn;
> >  }
> >
> > +/* This function is called with the bucket lock held. */
> > +static struct conn *
> > +conn_lookup_any(const struct conn_key *key,
> > +                const struct conntrack_bucket *ctb, uint32_t hash)
> > +{
> > +    struct conn *conn = NULL;
> > +
> > +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> > +        if (!conn_key_cmp(&conn->key, key)) {
> > +            break;
> > +        }
> > +        if (!conn_key_cmp(&conn->rev_key, key)) {
> > +            break;
> > +        }
> > +    }
> > +    return conn;
> > +}
> > +
> >  static void
> >  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
> >                    long long now, int seq_skew, bool seq_skew_dir)
> > @@ -823,6 +843,20 @@ conn_clean(struct conntrack *ct, struct conn *conn,
> >      }
> >  }
> >
> > +static void
> > +conn_clean_safe(struct conntrack *ct, struct conn *conn,
> > +                struct conntrack_bucket *ctb, uint32_t hash)
> > +{
> > +    ovs_mutex_lock(&ctb->cleanup_mutex);
> > +    ct_lock_lock(&ctb->lock);
> > +    conn = conn_lookup_any(&conn->key, ctb, hash);
> > +    if (conn) {
> > +        conn_clean(ct, conn, ctb);
> > +    }
> > +    ct_lock_unlock(&ctb->lock);
> > +    ovs_mutex_unlock(&ctb->cleanup_mutex);
> > +}
> > +
> >  static bool
> >  ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl)
> >  {
> > @@ -876,9 +910,11 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> >          }
> >
> >          unsigned bucket = hash_to_bucket(ctx->hash);
> > -        nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now);
> > -        ctx->conn = nc;
> > -        nc->rev_key = nc->key;
> > +        struct conn connl;
> > +        nc = &connl;
> > +        memset(nc, 0, sizeof *nc);
> > +        memcpy(&nc->key, &ctx->key, sizeof nc->key);
> > +        memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
> >          conn_key_reverse(&nc->rev_key);
> >
> >          if (ct_verify_helper(helper, ct_alg_ctl)) {
> > @@ -921,6 +957,7 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> >                  ct_rwlock_wrlock(&ct->resources_lock);
> >                  bool nat_res = nat_select_range_tuple(ct, nc,
> >
> conn_for_un_nat_copy);
> > +                ct_rwlock_unlock(&ct->resources_lock);
> >
> >                  if (!nat_res) {
> >                      goto nat_res_exhaustion;
> > @@ -928,15 +965,25 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> >
> >                  /* Update nc with nat adjustments made to
> >                   * conn_for_un_nat_copy by nat_select_range_tuple(). */
> > -                *nc = *conn_for_un_nat_copy;
> > -                ct_rwlock_unlock(&ct->resources_lock);
> > +                memcpy(nc, conn_for_un_nat_copy, sizeof *nc);
> >              }
> >              conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
> >              conn_for_un_nat_copy->nat_info = NULL;
> >              conn_for_un_nat_copy->alg = NULL;
> >              nat_packet(pkt, nc, ctx->icmp_related);
> >          }
> > -        hmap_insert(&ct->buckets[bucket].connections, &nc->node,
> ctx->hash);
> > +        struct conn *nconn = new_conn(&ct->buckets[bucket], pkt,
> &ctx->key,
> > +                                      now);
> > +        memcpy(&nconn->key, &nc->key, sizeof nconn->key);
> > +        memcpy(&nconn->rev_key, &nc->rev_key, sizeof nconn->rev_key);
> > +        memcpy(&nconn->master_key, &nc->master_key, sizeof
> nconn->master_key);
> > +        nconn->alg_related = nc->alg_related;
> > +        nconn->alg = nc->alg;
> > +        nconn->mark = nc->mark;
> > +        nconn->label = nc->label;
> > +        nconn->nat_info = nc->nat_info;
> > +        ctx->conn = nc = nconn;
> > +        hmap_insert(&ct->buckets[bucket].connections, &nconn->node,
> ctx->hash);
> >          atomic_count_inc(&ct->n_conn);
> >      }
> >
> > @@ -949,13 +996,8 @@ conn_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> >       * against with firewall rules or a separate firewall.
> >       * Also using zone partitioning can limit DoS impact. */
> >  nat_res_exhaustion:
> > -    ovs_list_remove(&nc->exp_node);
> > -    delete_conn(nc);
> > -    /* conn_for_un_nat_copy is a local variable in process_one; this
> > -     * memset() serves to document that conn_for_un_nat_copy is from
> > -     * this point on unused. */
> > -    memset(conn_for_un_nat_copy, 0, sizeof *conn_for_un_nat_copy);
> > -    ct_rwlock_unlock(&ct->resources_lock);
> > +    free(nc->alg);
>
> Newer GCC complains here:
>
> lib/conntrack.c:1016:12: error: 'connl.alg' may be used uninitialized in
> this function [-Werror=maybe-uninitialized]
>      free(nc->alg);
>           ~~^~~~~


> "struct conn connl" is allocated on stack, but you're using pointer
> to it after jumping out of its frame.
>

Thanks
Theoretically, it is possible for a compiler implementation not to allocate
enough stack for
the deepest level of block nesting for a function.


>
> Best regards, Ilya Maximets.
>


More information about the dev mailing list