[ovs-dev] [patch v1 1/2] conntrack: Fix race for NAT cleanup.

Darrell Ball dlu998 at gmail.com
Wed Mar 13 15:51:39 UTC 2019


V1 is superceded by the following change to V2.

-    if (OVS_LIKELY(force && ctx->reply && conn)) {
+    if (OVS_UNLIKELY(force && ctx->reply && conn)) {

On Wed, Mar 13, 2019 at 1:08 AM Darrell Ball <dlu998 at gmail.com> wrote:

> Reference lists are not fully protected during cleanup of
> NAT connections where the bucket lock is transiently not held during
> list traversal.  This can lead to referencing freed memory during
> cleaning from multiple contexts.  Fix this by protecting with
> the existing 'cleanup' mutex in the missed cases where 'conn_clean()'
> is called.  'conntrack_flush()' is converted to expiry list traversal
> to support the proper bucket level protection with the 'cleanup' mutex.
>
> Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.")
> Reported-by: solomon <liwei.solomon at gmail.com>
> Reported-at:
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-March/357056.html
> Tested-by: solomon <liwei.solomon at gmail.com>
> Signed-off-by: Darrell Ball <dlu998 at gmail.com>
> ---
>  lib/conntrack.c | 96
> +++++++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 70 insertions(+), 26 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 691782c..40a6021 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -753,6 +753,24 @@ conn_lookup(struct conntrack *ct, const struct
> conn_key *key, long long now)
>      return ctx.conn;
>  }
>
> +/* This function is called with the bucket lock held. */
> +static struct conn *
> +conn_lookup_any(const struct conn_key *key,
> +                const struct conntrack_bucket *ctb, uint32_t hash)
> +{
> +    struct conn *conn = NULL;
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            break;
> +        }
> +        if (!conn_key_cmp(&conn->rev_key, key)) {
> +            break;
> +        }
> +    }
> +    return conn;
> +}
> +
>  static void
>  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
>                    long long now, int seq_skew, bool seq_skew_dir)
> @@ -823,6 +841,20 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>      }
>  }
>
> +static void
> +conn_clean_safe(struct conntrack *ct, struct conn *conn,
> +                struct conntrack_bucket *ctb, uint32_t hash)
> +{
> +    ovs_mutex_lock(&ctb->cleanup_mutex);
> +    ct_lock_lock(&ctb->lock);
> +    conn = conn_lookup_any(&conn->key, ctb, hash);
> +    if (conn) {
> +        conn_clean(ct, conn, ctb);
> +    }
> +    ct_lock_unlock(&ctb->lock);
> +    ovs_mutex_unlock(&ctb->cleanup_mutex);
> +}
> +
>  static bool
>  ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl)
>  {
> @@ -969,6 +1001,7 @@ conn_update_state(struct conntrack *ct, struct
> dp_packet *pkt,
>      OVS_REQUIRES(ct->buckets[bucket].lock)
>  {
>      bool create_new_conn = false;
> +    struct conn lconn;
>
>      if (ctx->icmp_related) {
>          pkt->md.ct_state |= CS_RELATED;
> @@ -995,7 +1028,10 @@ conn_update_state(struct conntrack *ct, struct
> dp_packet *pkt,
>              pkt->md.ct_state = CS_INVALID;
>              break;
>          case CT_UPDATE_NEW:
> -            conn_clean(ct, *conn, &ct->buckets[bucket]);
> +            memcpy(&lconn, *conn, sizeof lconn);
> +            ct_lock_unlock(&ct->buckets[bucket].lock);
> +            conn_clean_safe(ct, &lconn, &ct->buckets[bucket], ctx->hash);
> +            ct_lock_lock(&ct->buckets[bucket].lock);
>              create_new_conn = true;
>              break;
>          default:
> @@ -1184,8 +1220,12 @@ process_one(struct conntrack *ct, struct dp_packet
> *pkt,
>      conn = ctx->conn;
>
>      /* Delete found entry if in wrong direction. 'force' implies commit.
> */
> -    if (conn && force && ctx->reply) {
> -        conn_clean(ct, conn, &ct->buckets[bucket]);
> +    if (OVS_LIKELY(force && ctx->reply && conn)) {
> +        struct conn lconn;
> +        memcpy(&lconn, conn, sizeof lconn);
> +        ct_lock_unlock(&ct->buckets[bucket].lock);
> +        conn_clean_safe(ct, &lconn, &ct->buckets[bucket], ctx->hash);
> +        ct_lock_lock(&ct->buckets[bucket].lock);
>          conn = NULL;
>      }
>
> @@ -1391,19 +1431,17 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
>
>      for (unsigned i = 0; i < N_CT_TM; i++) {
>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -                if (!conn_expired(conn, now) || count >= limit) {
> -                    min_expiration = MIN(min_expiration,
> conn->expiration);
> -                    if (count >= limit) {
> -                        /* Do not check other lists. */
> -                        COVERAGE_INC(conntrack_long_cleanup);
> -                        return min_expiration;
> -                    }
> -                    break;
> +            if (!conn_expired(conn, now) || count >= limit) {
> +                min_expiration = MIN(min_expiration, conn->expiration);
> +                if (count >= limit) {
> +                    /* Do not check other lists. */
> +                    COVERAGE_INC(conntrack_long_cleanup);
> +                    return min_expiration;
>                  }
> -                conn_clean(ct, conn, ctb);
> -                count++;
> +                break;
>              }
> +            conn_clean(ct, conn, ctb);
> +            count++;
>          }
>      }
>      return min_expiration;
> @@ -2547,16 +2585,19 @@ int
>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>  {
>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> -        struct conn *conn, *next;
> -
> -        ct_lock_lock(&ct->buckets[i].lock);
> -        HMAP_FOR_EACH_SAFE (conn, next, node,
> &ct->buckets[i].connections) {
> -            if ((!zone || *zone == conn->key.zone) &&
> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> -                conn_clean(ct, conn, &ct->buckets[i]);
> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> +        ovs_mutex_lock(&ctb->cleanup_mutex);
> +        ct_lock_lock(&ctb->lock);
> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> +            struct conn *conn, *next;
> +            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j])
> {
> +                if (!zone || *zone == conn->key.zone) {
> +                    conn_clean(ct, conn, ctb);
> +                }
>              }
>          }
> -        ct_lock_unlock(&ct->buckets[i].lock);
> +        ct_lock_unlock(&ctb->lock);
> +        ovs_mutex_unlock(&ctb->cleanup_mutex);
>      }
>
>      return 0;
> @@ -2573,16 +2614,19 @@ conntrack_flush_tuple(struct conntrack *ct, const
> struct ct_dpif_tuple *tuple,
>      tuple_to_conn_key(tuple, zone, &ctx.key);
>      ctx.hash = conn_key_hash(&ctx.key, ct->hash_basis);
>      unsigned bucket = hash_to_bucket(ctx.hash);
> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
>
> -    ct_lock_lock(&ct->buckets[bucket].lock);
> -    conn_key_lookup(&ct->buckets[bucket], &ctx, time_msec());
> +    ovs_mutex_lock(&ctb->cleanup_mutex);
> +    ct_lock_lock(&ctb->lock);
> +    conn_key_lookup(ctb, &ctx, time_msec());
>      if (ctx.conn && ctx.conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -        conn_clean(ct, ctx.conn, &ct->buckets[bucket]);
> +        conn_clean(ct, ctx.conn, ctb);
>      } else {
>          VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
>          error = ENOENT;
>      }
> -    ct_lock_unlock(&ct->buckets[bucket].lock);
> +    ct_lock_unlock(&ctb->lock);
> +    ovs_mutex_unlock(&ctb->cleanup_mutex);
>      return error;
>  }
>
> --
> 1.9.1
>
>


More information about the dev mailing list