[ovs-dev] [crash] trigger a panic when flush-conntrack

Li Wei liwei.solomon at gmail.com
Mon Mar 11 03:49:06 UTC 2019


hi  Darrell:

  With your new patch(see attached file), still panic both in conntrack_flush() and sweep_bucket().

(gdb) bt
#0  __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x00007f2d9af7942a in __GI_abort () at abort.c:89
#2  0x00007f2d9afb5c00 in __libc_message (do_abort=do_abort at entry=2, fmt=fmt at entry=0x7f2d9b0aad98 "*** Error in `%s': %s: 0x%s ***\n")
    at ../sysdeps/posix/libc_fatal.c:175
#3  0x00007f2d9afbbfc6 in malloc_printerr (action=3, str=0x7f2d9b0aaef0 "double free or corruption (fasttop)", ptr=<optimized out>, ar_ptr=<optimized out>)
    at malloc.c:5049
#4  0x00007f2d9afbc80e in _int_free (av=0x7f2698000020, p=0x7f1fcbb78440, have_lock=0) at malloc.c:3905
#5  0x000055a2c3e0df90 in delete_conn (conn=conn at entry=0x7f1fcbb78340) at lib/conntrack.c:2419
#6  0x000055a2c3e0f31c in nat_clean (ctb=0x7f2d9ca2cd90, conn=0x7f1fcbb78340, ct=0x7f2d9ca24d98) at lib/conntrack.c:843
#7  conn_clean (ct=ct at entry=0x7f2d9ca24d98, conn=0x7f1fcbb78340, ctb=ctb at entry=0x7f2d9ca2cd90) at lib/conntrack.c:869
#8  0x000055a2c3e14820 in conntrack_flush (ct=0x7f2d9ca24d98, zone=0x0) at lib/conntrack.c:2613
#9  0x000055a2c3cf9b39 in ct_dpif_flush (dpif=0x55a2c4e7b6f0, zone=zone at entry=0x0, tuple=tuple at entry=0x0) at lib/ct-dpif.c:140
#10 0x000055a2c3e18970 in dpctl_flush_conntrack (argc=argc at entry=1, argv=argv at entry=0x55a2c4dd2550, dpctl_p=dpctl_p at entry=0x7ffd9fda7600) at lib/dpctl.c:1388
#11 0x000055a2c3e15d48 in dpctl_unixctl_handler (conn=0x55a2c4d9cf50, argc=1, argv=0x55a2c4dd2550, aux=0x55a2c3e187e0 <dpctl_flush_conntrack>) at lib/dpctl.c:2312
#12 0x000055a2c3db76ea in process_command (request=<optimized out>, conn=0x55a2c4d9cf50) at lib/unixctl.c:308
#13 run_connection (conn=0x55a2c4d9cf50) at lib/unixctl.c:342
#14 unixctl_server_run (server=0x55a2c4d94230) at lib/unixctl.c:393
#15 0x000055a2c393b217 in main (argc=<optimized out>, argv=<optimized out>) at vswitchd/ovs-vswitchd.c:126

# cat /var/log/openvswitch/ovs-vswitchd.log|grep conntrack
2019-03-11T03:36:31.293Z|00001|conntrack(pmd61)|WARN|Unusual condition for un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip 222.15.63.163 dst ip 172.89.78.140 rev src ip 2.128.173.3 rev dst ip 222.15.63.163 src/dst ports 80/10623 rev src/dst ports 10623/80 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
2019-03-11T03:36:33.599Z|00001|conntrack(pmd82)|WARN|Unusual condition for un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip 222.15.63.163 dst ip 172.31.235.224 rev src ip 2.143.150.201 rev dst ip 222.15.63.163 src/dst ports 80/17937 rev src/dst ports 17937/80 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
2019-03-11T03:36:33.907Z|00054|coverage(ct_clean3)|INFO|conntrack_full           13374.0/sec 61885.550/sec     1031.4258/sec   total: 3718506
2019-03-11T03:36:33.907Z|00055|coverage(ct_clean3)|INFO|conntrack_long_cleanup     0.0/sec     0.000/sec        0.0000/sec   total: 188
2019-03-11T03:36:33.907Z|00059|poll_loop(ct_clean3)|INFO|wakeup due to 0-ms timeout at lib/conntrack.c:1554 (64% CPU usage)
2019-03-11T03:36:34.004Z|00221|conntrack|WARN|conn_clean: conn not present in hmap: src ip 240.132.183.203 dst ip 222.15.63.163 rev src ip 222.15.63.163 rev dst ip 172.68.216.36 src/dst ports 31344/80 rev src/dst ports 80/31344 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6


Darrell Ball wrote:
> Hi Solomon
> 
> There was a typo in nat_clean(...)
> 
>      ct_rwlock_unlock(&ct->resources_lock);
>      ct_lock_unlock(&ctb->lock);
>      uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> -    unsigned bucket_rev_conn =
> -        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
> +    unsigned bucket_rev_conn = hash_to_bucket(hash);
>      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
>      ct_rwlock_wrlock(&ct->resources_lock);
> 
> I also cleaned up the patch a little more and ended up with:
> 
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 5410ab4..68d9816 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
> conn_key *key2)
>      return 1;
>  }
> 
> +static bool
> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> +                 const struct conn_key *key)
> +    OVS_REQUIRES(ctb->lock)
> +{
> +    struct conn *conn;
> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
>  static void
>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>                     enum vlog_level vll, bool force, bool rl_on)
> @@ -738,29 +754,53 @@ un_nat_packet(struct dp_packet *pkt, const struct
> conn *conn,
>      }
>  }
> 
> -/* Typical usage of this helper is in non per-packet code;
> - * this is because the bucket lock needs to be held for lookup
> - * and a hash would have already been needed. Hence, this function
> - * is just intended for code clarity. */
> +/* This function is called with the bucket lock held. */
>  static struct conn *
> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
> now)
> +conn_lookup_any(const struct conn_key *key,
> +                const struct conntrack_bucket *ctb, uint32_t hash)
>  {
> -    struct conn_lookup_ctx ctx;
> -    ctx.conn = NULL;
> -    ctx.key = *key;
> -    ctx.hash = conn_key_hash(key, ct->hash_basis);
> -    unsigned bucket = hash_to_bucket(ctx.hash);
> -    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> -    return ctx.conn;
> +    struct conn *conn = NULL;
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            break;
> +        }
> +        if (!conn_key_cmp(&conn->rev_key, key)) {
> +            break;
> +        }
> +    }
> +    return conn;
> +}
> +
> +/* This function is called with the bucket lock held. */
> +static struct conn *
> +conn_lookup_unnat(const struct conn_key *key,
> +                  const struct conntrack_bucket *ctb, uint32_t hash)
> +{
> +    struct conn *conn = NULL;
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)
> +            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> +            break;
> +        }
> +    }
> +    return conn;
>  }
> 
>  static void
>  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
>                    long long now, int seq_skew, bool seq_skew_dir)
>  {
> -    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +    unsigned bucket = hash_to_bucket(hash);
>      ct_lock_lock(&ct->buckets[bucket].lock);
> -    struct conn *conn = conn_lookup(ct, key, now);
> +    struct conn_lookup_ctx ctx;
> +    ctx.key = *key;
> +    ctx.hash = hash;
> +    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> +    struct conn *conn = ctx.conn;
> +
>      if (conn && seq_skew) {
>          conn->seq_skew = seq_skew;
>          conn->seq_skew_dir = seq_skew_dir;
> @@ -777,12 +817,13 @@ nat_clean(struct conntrack *ct, struct conn *conn,
>      nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
> ct->hash_basis);
>      ct_rwlock_unlock(&ct->resources_lock);
>      ct_lock_unlock(&ctb->lock);
> -    unsigned bucket_rev_conn =
> -        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
> +    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> +    unsigned bucket_rev_conn = hash_to_bucket(hash);
>      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
>      ct_rwlock_wrlock(&ct->resources_lock);
> -    long long now = time_msec();
> -    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
> +    struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
> +
> &ct->buckets[bucket_rev_conn],
> +                                              hash);
>      struct nat_conn_key_node *nat_conn_key_node =
>          nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
>                               ct->hash_basis);
> @@ -812,7 +853,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>          expectation_clean(ct, &conn->key, ct->hash_basis);
>      }
>      ovs_list_remove(&conn->exp_node);
> -    hmap_remove(&ctb->connections, &conn->node);
> +
> +    /* Temporary debug check. */
> +    if (conn_key_present(ct, ctb, &conn->key)) {
> +        hmap_remove(&ctb->connections, &conn->node);
> +    } else {
> +        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> +        free(log_msg);
> +    }
> +
>      atomic_count_dec(&ct->n_conn);
>      if (conn->nat_info) {
>          nat_clean(ct, conn, ctb);
> @@ -1005,7 +1055,7 @@ conn_update_state(struct conntrack *ct, struct
> dp_packet *pkt,
> 
>  static void
>  create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
> -                   long long now, bool alg_un_nat)
> +                   bool alg_un_nat)
>  {
>      struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
>      nc->key = conn_for_un_nat_copy->rev_key;
> @@ -1013,7 +1063,9 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
>      uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
>      unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
>      ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
> -    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
> +    struct conn *rev_conn = conn_lookup_any(&nc->key,
> +
> &ct->buckets[un_nat_conn_bucket],
> +                                            un_nat_hash);
> 
>      if (alg_un_nat) {
>          if (!rev_conn) {
> @@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
>          } else {
>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>                                        "create for alg: rev_conn %p",
> rev_conn);
> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>              free(log_msg);
>              free(nc);
>          }
> @@ -1030,16 +1082,26 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn *conn_for_un_nat_copy,
>          ct_rwlock_rdlock(&ct->resources_lock);
> 
>          struct nat_conn_key_node *nat_conn_key_node =
> -            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
> +           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
>          if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
> -            &nc->rev_key) && !rev_conn) {
> -            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> -                        &nc->node, un_nat_hash);
> +                                               &nc->rev_key)) {
> +            if (!rev_conn) {
> +                hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> +                            &nc->node, un_nat_hash);
> +            } else {
> +                char *log_msg = xasprintf("NAT conflict; un-nat will not"
> +                                          "happen; likely DOS");
> +                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> +                free(log_msg);
> +                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
> +                                     ct->hash_basis);
> +                free(nc);
> +            }
>          } else {
>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>                                        "create: nat_conn_key_node/rev_conn "
> -                                      "%p/%p", nat_conn_key_node,
> rev_conn);
> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> +                                      "%p", nat_conn_key_node);
> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>              free(log_msg);
>              free(nc);
>          }
> @@ -1286,7 +1348,7 @@ process_one(struct conntrack *ct, struct dp_packet
> *pkt,
>      ct_lock_unlock(&ct->buckets[bucket].lock);
> 
>      if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
> -        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
> +        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
>      }
> 
>      handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
> @@ -1383,19 +1445,18 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
> 
>      for (unsigned i = 0; i < N_CT_TM; i++) {
>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -                if (!conn_expired(conn, now) || count >= limit) {
> -                    min_expiration = MIN(min_expiration, conn->expiration);
> -                    if (count >= limit) {
> -                        /* Do not check other lists. */
> -                        COVERAGE_INC(conntrack_long_cleanup);
> -                        return min_expiration;
> -                    }
> -                    break;
> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +            if (!conn_expired(conn, now) || count >= limit) {
> +                min_expiration = MIN(min_expiration, conn->expiration);
> +                if (count >= limit) {
> +                    /* Do not check other lists. */
> +                    COVERAGE_INC(conntrack_long_cleanup);
> +                    return min_expiration;
>                  }
> -                conn_clean(ct, conn, ctb);
> -                count++;
> +                break;
>              }
> +            conn_clean(ct, conn, ctb);
> +            count++;
>          }
>      }
>      return min_expiration;
> @@ -2540,16 +2601,18 @@ int
>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>  {
>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> -        struct conn *conn, *next;
> -
> -        ct_lock_lock(&ct->buckets[i].lock);
> -        HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
> {
> -            if ((!zone || *zone == conn->key.zone) &&
> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> -                conn_clean(ct, conn, &ct->buckets[i]);
> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> +        ct_lock_lock(&ctb->lock);
> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> +            struct conn *conn, *next;
> +            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
> +                if (!zone || *zone == conn->key.zone) {
> +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +                    conn_clean(ct, conn, ctb);
> +                }
>              }
>          }
> -        ct_lock_unlock(&ct->buckets[i].lock);
> +        ct_lock_unlock(&ctb->lock);
>      }
> 
>      return 0;
> (END)
> 
> 
> 
> 
> On Fri, Mar 8, 2019 at 8:07 AM Darrell Ball <dlu998 at gmail.com> wrote:
> 
>> Thanks for confirming Solomon; can you check the following patch on your
>> side as well ?
>>
>> Darrell
>>
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 5410ab4..c6b06d6 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
>> struct conn_key *key2)
>>      return 1;
>>  }
>>
>> +static bool
>> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
>> +                 const struct conn_key *key)
>> +    OVS_REQUIRES(ctb->lock)
>> +{
>> +    struct conn *conn;
>> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
>> +
>> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> +        if (!conn_key_cmp(&conn->key, key)) {
>> +            return true;
>> +        }
>> +    }
>> +    return false;
>> +}
>> +
>>  static void
>>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>>                     enum vlog_level vll, bool force, bool rl_on)
>> @@ -738,29 +754,55 @@ un_nat_packet(struct dp_packet *pkt, const struct
>> conn *conn,
>>      }
>>  }
>>
>> -/* Typical usage of this helper is in non per-packet code;
>> - * this is because the bucket lock needs to be held for lookup
>> - * and a hash would have already been needed. Hence, this function
>> - * is just intended for code clarity. */
>>  static struct conn *
>> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
>> now)
>> +conn_lookup_any(struct conntrack *ct, const struct conn_key *key,
>> +                uint32_t hash)
>>  {
>> -    struct conn_lookup_ctx ctx;
>> -    ctx.conn = NULL;
>> -    ctx.key = *key;
>> -    ctx.hash = conn_key_hash(key, ct->hash_basis);
>> -    unsigned bucket = hash_to_bucket(ctx.hash);
>> -    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
>> -    return ctx.conn;
>> +    unsigned bucket = hash_to_bucket(hash);
>> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
>> +    struct conn *conn = NULL;
>> +
>> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> +        if (!conn_key_cmp(&conn->key, key)) {
>> +            break;
>> +        }
>> +        if (!conn_key_cmp(&conn->rev_key, key)) {
>> +            break;
>> +        }
>> +    }
>> +    return conn;
>> +}
>> +
>> +static struct conn *
>> +conn_lookup_unnat(struct conntrack *ct, const struct conn_key *key,
>> +                  uint32_t hash)
>> +{
>> +    unsigned bucket = hash_to_bucket(hash);
>> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
>> +    struct conn *conn = NULL;
>> +
>> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> +        if (!conn_key_cmp(&conn->key, key)
>> +            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>> +            break;
>> +        }
>> +    }
>> +    return conn;
>>  }
>>
>>  static void
>>  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
>>                    long long now, int seq_skew, bool seq_skew_dir)
>>  {
>> -    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
>> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
>> +    unsigned bucket = hash_to_bucket(hash);
>>      ct_lock_lock(&ct->buckets[bucket].lock);
>> -    struct conn *conn = conn_lookup(ct, key, now);
>> +    struct conn_lookup_ctx ctx;
>> +    ctx.key = *key;
>> +    ctx.hash = hash;
>> +    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
>> +    struct conn *conn = ctx.conn;
>> +
>>      if (conn && seq_skew) {
>>          conn->seq_skew = seq_skew;
>>          conn->seq_skew_dir = seq_skew_dir;
>> @@ -777,12 +819,12 @@ nat_clean(struct conntrack *ct, struct conn *conn,
>>      nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
>> ct->hash_basis);
>>      ct_rwlock_unlock(&ct->resources_lock);
>>      ct_lock_unlock(&ctb->lock);
>> +    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
>>      unsigned bucket_rev_conn =
>> -        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
>> +        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
>>      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
>>      ct_rwlock_wrlock(&ct->resources_lock);
>> -    long long now = time_msec();
>> -    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
>> +    struct conn *rev_conn = conn_lookup_unnat(ct, &conn->rev_key, hash);
>>      struct nat_conn_key_node *nat_conn_key_node =
>>          nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
>>                               ct->hash_basis);
>> @@ -812,7 +854,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>>          expectation_clean(ct, &conn->key, ct->hash_basis);
>>      }
>>      ovs_list_remove(&conn->exp_node);
>> -    hmap_remove(&ctb->connections, &conn->node);
>> +
>> +    /* Temporary debug check. */
>> +    if (conn_key_present(ct, ctb, &conn->key)) {
>> +        hmap_remove(&ctb->connections, &conn->node);
>> +    } else {
>> +        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
>> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
>> +        free(log_msg);
>> +    }
>> +
>>      atomic_count_dec(&ct->n_conn);
>>      if (conn->nat_info) {
>>          nat_clean(ct, conn, ctb);
>> @@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct
>> dp_packet *pkt,
>>
>>  static void
>>  create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>> -                   long long now, bool alg_un_nat)
>> +                   bool alg_un_nat)
>>  {
>>      struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
>>      nc->key = conn_for_un_nat_copy->rev_key;
>> @@ -1013,7 +1064,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>>      uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
>>      unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
>>      ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
>> -    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
>> +    struct conn *rev_conn = conn_lookup_any(ct, &nc->key, un_nat_hash);
>>
>>      if (alg_un_nat) {
>>          if (!rev_conn) {
>> @@ -1022,7 +1073,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>>          } else {
>>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>>                                        "create for alg: rev_conn %p",
>> rev_conn);
>> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
>> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>>              free(log_msg);
>>              free(nc);
>>          }
>> @@ -1030,16 +1081,26 @@ create_un_nat_conn(struct conntrack *ct, struct
>> conn *conn_for_un_nat_copy,
>>          ct_rwlock_rdlock(&ct->resources_lock);
>>
>>          struct nat_conn_key_node *nat_conn_key_node =
>> -            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
>> ct->hash_basis);
>> +           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
>> ct->hash_basis);
>>          if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
>> -            &nc->rev_key) && !rev_conn) {
>> -            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
>> -                        &nc->node, un_nat_hash);
>> +                                               &nc->rev_key)) {
>> +            if (!rev_conn) {
>> +                hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
>> +                            &nc->node, un_nat_hash);
>> +            } else {
>> +                char *log_msg = xasprintf("NAT conflict; un-nat will not"
>> +                                          "happen; likely DOS");
>> +                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>> +                free(log_msg);
>> +                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
>> +                                     ct->hash_basis);
>> +                free(nc);
>> +            }
>>          } else {
>>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>>                                        "create: nat_conn_key_node/rev_conn
>> "
>> -                                      "%p/%p", nat_conn_key_node,
>> rev_conn);
>> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
>> +                                      "%p", nat_conn_key_node);
>> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>>              free(log_msg);
>>              free(nc);
>>          }
>> @@ -1286,7 +1347,7 @@ process_one(struct conntrack *ct, struct dp_packet
>> *pkt,
>>      ct_lock_unlock(&ct->buckets[bucket].lock);
>>
>>      if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
>> -        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
>> +        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
>>      }
>>
>>      handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
>> @@ -1383,19 +1444,18 @@ sweep_bucket(struct conntrack *ct, struct
>> conntrack_bucket *ctb,
>>
>>      for (unsigned i = 0; i < N_CT_TM; i++) {
>>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
>> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>> -                if (!conn_expired(conn, now) || count >= limit) {
>> -                    min_expiration = MIN(min_expiration,
>> conn->expiration);
>> -                    if (count >= limit) {
>> -                        /* Do not check other lists. */
>> -                        COVERAGE_INC(conntrack_long_cleanup);
>> -                        return min_expiration;
>> -                    }
>> -                    break;
>> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>> +            if (!conn_expired(conn, now) || count >= limit) {
>> +                min_expiration = MIN(min_expiration, conn->expiration);
>> +                if (count >= limit) {
>> +                    /* Do not check other lists. */
>> +                    COVERAGE_INC(conntrack_long_cleanup);
>> +                    return min_expiration;
>>                  }
>> -                conn_clean(ct, conn, ctb);
>> -                count++;
>> +                break;
>>              }
>> +            conn_clean(ct, conn, ctb);
>> +            count++;
>>          }
>>      }
>>      return min_expiration;
>> @@ -2540,16 +2600,18 @@ int
>>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>  {
>>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>> -        struct conn *conn, *next;
>> -
>> -        ct_lock_lock(&ct->buckets[i].lock);
>> -        HMAP_FOR_EACH_SAFE (conn, next, node,
>> &ct->buckets[i].connections) {
>> -            if ((!zone || *zone == conn->key.zone) &&
>> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> -                conn_clean(ct, conn, &ct->buckets[i]);
>> +        struct conntrack_bucket *ctb = &ct->buckets[i];
>> +        ct_lock_lock(&ctb->lock);
>> +        for (unsigned j = 0; j < N_CT_TM; j++) {
>> +            struct conn *conn, *next;
>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j])
>> {
>> +                if (!zone || *zone == conn->key.zone) {
>> +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>> +                    conn_clean(ct, conn, ctb);
>> +                }
>>              }
>>          }
>> -        ct_lock_unlock(&ct->buckets[i].lock);
>> +        ct_lock_unlock(&ctb->lock);
>>      }
>>
>>      return 0;
>> (END)
>>
>>
>> On Thu, Mar 7, 2019 at 5:30 PM Li Wei <liwei.solomon at gmail.com> wrote:
>>
>>> hi darrell:
>>>
>>> I set nat action with
>>> actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>
>>> With your patch, new double free panic happens in conntrack_flush() and
>>> sweep_bucket():
>>>
>>> ==1st panic==
>>> [Thread debugging using libthread_db enabled]
>>> Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>> -vconsole:emer -vsyslog:err -vfi'.
>>> Program terminated with signal SIGABRT, Aborted.
>>> #0  __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> 51      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
>>> [Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
>>> (gdb) bt
>>> #0  __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> #1  0x00007f92af62a42a in __GI_abort () at abort.c:89
>>> #2  0x00007f92af666c00 in __libc_message (do_abort=do_abort at entry=2,
>>> fmt=fmt at entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
>>>     at ../sysdeps/posix/libc_fatal.c:175
>>> #3  0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0
>>> "double free or corruption (fasttop)", ptr=<optimized out>,
>>> ar_ptr=<optimized out>)
>>>     at malloc.c:5049
>>> #4  0x00007f92af66d80e in _int_free (av=0x7f8bb4000020, p=0x7f8bb70ef770,
>>> have_lock=0) at malloc.c:3905
>>> #5  0x00005622735d3f90 in delete_conn (conn=conn at entry=0x7f8bb70ef670)
>>> at lib/conntrack.c:2380
>>> #6  0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0,
>>> conn=0x7f8bb70ef670, ct=0x7f92b10d5d98) at lib/conntrack.c:816
>>> #7  conn_clean (ct=ct at entry=0x7f92b10d5d98, conn=0x7f8bb70ef670,
>>> ctb=ctb at entry=0x7f92b10da7f0) at lib/conntrack.c:842
>>> #8  0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0)
>>> at lib/conntrack.c:2574
>>> #9  0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0,
>>> zone=zone at entry=0x0, tuple=tuple at entry=0x0) at lib/ct-dpif.c:140
>>> #10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc at entry=1,
>>> argv=argv at entry=0x56227589cc40, dpctl_p=dpctl_p at entry=0x7fff9087fe90) at
>>> lib/dpctl.c:1388
>>> #11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90,
>>> argc=1, argv=0x56227589cc40, aux=0x5622735de6d0 <dpctl_flush_conntrack>) at
>>> lib/dpctl.c:2312
>>> #12 0x000056227357d6ea in process_command (request=<optimized out>,
>>> conn=0x56227589bc90) at lib/unixctl.c:308
>>> #13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
>>> #14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
>>> #15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized
>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>
>>> The debug info in /var/log/openvswitch/ovs-vswitchd.log:
>>> 70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not
>>> present in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip
>>> 222.15.63.163 rev dst ip     172.112.98.138 src/dst ports 54957/80 rev
>>> src/dst ports 80/54957 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>>
>>> ==sec panic==
>>> (gdb) bt
>>> #0  __GI_raise (sig=sig at entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> #1  0x00007faece4b642a in __GI_abort () at abort.c:89
>>> #2  0x00007faece4f2c00 in __libc_message (do_abort=do_abort at entry=2,
>>> fmt=fmt at entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
>>>     at ../sysdeps/posix/libc_fatal.c:175
>>> #3  0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60
>>> "double free or corruption (out)", ptr=<optimized out>, ar_ptr=<optimized
>>> out>)
>>>     at malloc.c:5049
>>> #4  0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>,
>>> p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
>>> #5  0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8,
>>> conn=0x7fa11c50ee40, ct=0x7faecff61d98) at lib/conntrack.c:816
>>> #6  conn_clean (ct=ct at entry=0x7faecff61d98, conn=0x7fa11c50ee40,
>>> ctb=ctb at entry=0x7faecff65cf8) at lib/conntrack.c:842
>>> #7  0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232,
>>> ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
>>> #8  conntrack_clean (now=1287899232, ct=0x7faecff61d98) at
>>> lib/conntrack.c:1457
>>> #9  clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
>>> #10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at
>>> lib/ovs-thread.c:354
>>> #11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at
>>> pthread_create.c:333
>>> #12 0x00007faece56aacf in clone () at
>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>
>>> 2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean: conn
>>> not present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src ip
>>> 222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev src/dst
>>> ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>>
>>>
>>> Darrell Ball wrote:
>>>> Thanks for your help Solomon
>>>>
>>>> Can you give the following debug patch a spin ?
>>>> I will continue to try to repo locally.
>>>>
>>>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>>>> index 5410ab4..6968c03 100644
>>>> --- a/lib/conntrack.c
>>>> +++ b/lib/conntrack.c
>>>> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
>>> struct
>>>> conn_key *key2)
>>>>      return 1;
>>>>  }
>>>>
>>>> +static bool
>>>> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
>>>> +                 const struct conn_key *key)
>>>> +    OVS_REQUIRES(ctb->lock)
>>>> +{
>>>> +    struct conn *conn;
>>>> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
>>>> +
>>>> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>>>> +        if (!conn_key_cmp(&conn->key, key)) {
>>>> +            return true;
>>>> +        }
>>>> +    }
>>>> +    return false;
>>>> +}
>>>> +
>>>>  static void
>>>>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>>>>                     enum vlog_level vll, bool force, bool rl_on)
>>>> @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>>>>          expectation_clean(ct, &conn->key, ct->hash_basis);
>>>>      }
>>>>      ovs_list_remove(&conn->exp_node);
>>>> -    hmap_remove(&ctb->connections, &conn->node);
>>>> +
>>>> +    if (conn_key_present(ct, ctb, &conn->key)) {
>>>> +        hmap_remove(&ctb->connections, &conn->node);
>>>> +    } else {
>>>> +        char *log_msg = xasprintf("conn_clean: conn not present in
>>> hmap");
>>>> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
>>>> +        free(log_msg);
>>>> +    }
>>>>      atomic_count_dec(&ct->n_conn);
>>>>      if (conn->nat_info) {
>>>>          nat_clean(ct, conn, ctb);
>>>> @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
>>>> conntrack_bucket *ctb,
>>>>
>>>>      for (unsigned i = 0; i < N_CT_TM; i++) {
>>>>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
>>>> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>>>> -                if (!conn_expired(conn, now) || count >= limit) {
>>>> -                    min_expiration = MIN(min_expiration,
>>> conn->expiration);
>>>> -                    if (count >= limit) {
>>>> -                        /* Do not check other lists. */
>>>> -                        COVERAGE_INC(conntrack_long_cleanup);
>>>> -                        return min_expiration;
>>>> -                    }
>>>> -                    break;
>>>> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>>> +            if (!conn_expired(conn, now) || count >= limit) {
>>>> +                min_expiration = MIN(min_expiration, conn->expiration);
>>>> +                if (count >= limit) {
>>>> +                    /* Do not check other lists. */
>>>> +                    COVERAGE_INC(conntrack_long_cleanup);
>>>> +                    return min_expiration;
>>>>                  }
>>>> -                conn_clean(ct, conn, ctb);
>>>> -                count++;
>>>> +                break;
>>>>              }
>>>> +            conn_clean(ct, conn, ctb);
>>>> +            count++;
>>>>          }
>>>>      }
>>>>      return min_expiration;
>>>> @@ -2540,16 +2562,18 @@ int
>>>>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>>>  {
>>>>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>>> -        struct conn *conn, *next;
>>>> -
>>>> -        ct_lock_lock(&ct->buckets[i].lock);
>>>> -        HMAP_FOR_EACH_SAFE (conn, next, node,
>>> &ct->buckets[i].connections)
>>>> {
>>>> -            if ((!zone || *zone == conn->key.zone) &&
>>>> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>>>> -                conn_clean(ct, conn, &ct->buckets[i]);
>>>> +        struct conntrack_bucket *ctb = &ct->buckets[i];
>>>> +        ct_lock_lock(&ctb->lock);
>>>> +        for (unsigned j = 0; j < N_CT_TM; j++) {
>>>> +            struct conn *conn, *next;
>>>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>> &ctb->exp_lists[j]) {
>>>> +                if (!zone || *zone == conn->key.zone) {
>>>> +                    ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>> +                    conn_clean(ct, conn, ctb);
>>>> +                }
>>>>              }
>>>>          }
>>>> -        ct_lock_unlock(&ct->buckets[i].lock);
>>>> +        ct_lock_unlock(&ctb->lock);
>>>>      }
>>>>
>>>>      return 0;
>>>> (END)
>>>>
>>>>
>>>> On Wed, Mar 6, 2019 at 11:33 PM solomon <liwei.solomon at gmail.com>
>>> wrote:
>>>>
>>>>> Darrell Ball wrote:
>>>>>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>>>> &ctb->exp_lists[j]) {
>>>>>> +                if (!zone || *zone == conn->key.zone) {
>>>>>> +                    ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>
>>>>> why do we need this assert?
>>>>> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
>>>>> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
>>>>> +                if ((!zone || *zone == conn->key.zone) &&
>>>>> +                    (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>>>>> +                    //ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>
>>>>>
>>>>> with the above code, catch an another panic which in ct_clean thread.
>>>>> I have see the same panic without changeing the code.
>>>>> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>>>>>
>>>>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>>>>> 287         while (*bucket != node) {
>>>>> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
>>>>> (gdb) bt
>>>>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>>>>> #1  conn_clean (ct=ct at entry=0x7f9498700d98, conn=0x7f871bdc41b0,
>>>>> ctb=ctb at entry=0x7f9498701c38) at lib/conntrack.c:815
>>>>> #2  0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
>>>>> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
>>>>> #3  conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
>>>>> lib/conntrack.c:1433
>>>>> #4  clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
>>>>> #5  0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
>>>>> lib/ovs-thread.c:354
>>>>> #6  0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
>>>>> pthread_create.c:333
>>>>> #7  0x00007f9496d09acf in clone () at
>>>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>>> (gdb) p bucket
>>>>> $1 = (struct hmap_node **) 0x8  <====== why is bucket not a point
>>> value?
>>>>> (gdb) p *(struct hmap *) 0x7f9498701c68
>>>>> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
>>>>> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
>>>>> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
>>>>> (gdb) p 203059667&32767
>>>>> $4 = 29139
>>>>> (gdb) p &hmap->buckets[29139]
>>>>> $5 = (struct hmap_node **) 0x7f8609fc8a98
>>>>>
>>>>>
>>>>>> +                    conn_clean(ct, conn, ctb);
>>>>>> +                }
>>>>>>              }
>>>>>>          }
>>>>>> -        ct_lock_unlock(&ct->buckets[i].lock);
>>>>>> +        ct_lock_unlock(&ctb->lock);
>>>>>>      }
>>>>>>
>>>>>>      return 0;
>>>>>>
>>>>>>
>>>>>> which yields conntrack_flush(...) as
>>>>>>
>>>>>> int
>>>>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>>>>> {
>>>>>>     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>>>>>         struct conntrack_bucket *ctb = &ct->buckets[i];
>>>>>>         ct_lock_lock(&ctb->lock);
>>>>>>         for (unsigned j = 0; j < N_CT_TM; j++) {
>>>>>>             struct conn *conn, *next;
>>>>>>             LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>>>> &ctb->exp_lists[j]) {
>>>>>>                 if (!zone || *zone == conn->key.zone) {
>>>>>>                     ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>>                     conn_clean(ct, conn, ctb);
>>>>>>                 }
>>>>>>             }
>>>>>>         }
>>>>>>         ct_lock_unlock(&ctb->lock);
>>>>>>     }
>>>>>>
>>>>>>     return 0;
>>>>>> }
>>>>>>
>>>>>> Thanks Darrell
>>>>>>
>>>>>>
>>>>>>
>>>>>> On Wed, Mar 6, 2019 at 8:06 PM solomon <liwei.solomon at gmail.com>
>>> wrote:
>>>>>>
>>>>>>>
>>>>>>>    When i test conntrack, i catch a panic of ovs.
>>>>>>>
>>>>>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>>>>>> -vconsole:emer -vsyslog:err -vfi'.
>>>>>>> Program terminated with signal SIGSEGV, Segmentation fault.
>>>>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>>>>> 287         while (*bucket != node) {
>>>>>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
>>>>>>> (gdb) bt
>>>>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>>>>> #1  conn_clean (ct=ct at entry=0x7f8178c75d98, conn=0x7f734cde0170,
>>>>>>> ctb=ctb at entry=0x7f8178c7fd40) at lib/conntrack.c:815
>>>>>>> #2  0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98,
>>> zone=0x0)
>>>>> at
>>>>>>> lib/conntrack.c:2549
>>>>>>> #3  0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
>>>>>>> zone=zone at entry=0x0, tuple=tuple at entry=0x0) at lib/ct-dpif.c:140
>>>>>>> #4  0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc at entry=1,
>>>>>>> argv=argv at entry=0x5605c697ec30, dpctl_p=dpctl_p at entry
>>> =0x7fffee718110)
>>>>> at
>>>>>>> lib/dpctl.c:1388
>>>>>>> #5  0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
>>>>>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
>>>>> <dpctl_flush_conntrack>) at
>>>>>>> lib/dpctl.c:2312
>>>>>>> #6  0x00005605c5c806ea in process_command (request=<optimized out>,
>>>>>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
>>>>>>> #7  run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
>>>>>>> #8  unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
>>>>>>> #9  0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
>>>>>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>>>>>
>>>>>>>
>>>>>>> Environment:
>>>>>>> ovs-2.10.1
>>>>>>> dpdk-18.0.2.2
>>>>>>>
>>>>>>> How-To-Repeat:
>>>>>>> 1. configure ovs with snat aciton.
>>>>>>>
>>>>>>> ovs-ofctl  -O OpenFlow15 add-group $br_name "group_id=1, type=select,
>>>>>>> selection_method=hash
>>>>>>>
>>>>>
>>> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>>>>> "
>>>>>>>
>>>>>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
>>>>>>> 3.
>>>>>>> #   ovs-appctl dpctl/ct-get-nconns
>>>>>>> 2063993
>>>>>>> #   ovs-appctl dpctl/flush-conntrack
>>>>>>>
>>>>>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
>>>>>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
>>>>>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Thanks
>>>>>>> Solomon
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>>
>>>>> Thanks
>>>>> Solomon
>>>>>
>>>>
>>>
>>> --
>>>
>>> Thanks
>>> Li Wei
>>>
>>
> 

-- 

Thanks
Li Wei
-------------- next part --------------
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 974f985bd..a4a3d177c 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -249,6 +249,22 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
     return 1;
 }
 
+static bool
+conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
+                 const struct conn_key *key)
+    OVS_REQUIRES(ctb->lock)
+{
+    struct conn *conn;
+    uint32_t hash = conn_key_hash(key, ct->hash_basis);
+
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static void
 ct_print_conn_info(const struct conn *c, const char *log_msg,
                    enum vlog_level vll, bool force, bool rl_on)
@@ -738,29 +754,54 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
     }
 }
 
-/* Typical usage of this helper is in non per-packet code;
- * this is because the bucket lock needs to be held for lookup
- * and a hash would have already been needed. Hence, this function
- * is just intended for code clarity. */
+/* This function is called with the bucket lock held. */
 static struct conn *
-conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now)
+conn_lookup_any(const struct conn_key *key,
+                const struct conntrack_bucket *ctb, uint32_t hash)
 {
-    struct conn_lookup_ctx ctx;
-    ctx.conn = NULL;
-    ctx.key = *key;
-    ctx.hash = conn_key_hash(key, ct->hash_basis);
-    unsigned bucket = hash_to_bucket(ctx.hash);
-    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
-    return ctx.conn;
+    
+    struct conn *conn = NULL;
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)) {
+            break;
+        }
+        if (!conn_key_cmp(&conn->rev_key, key)) {
+            break;
+        }
+    }
+    return conn;
+}
+
+/* This function is called with the bucket lock held. */
+static struct conn *
+conn_lookup_unnat(const struct conn_key *key,
+                  const struct conntrack_bucket *ctb, uint32_t hash)
+{
+    struct conn *conn = NULL;
+
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)
+            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
+            break;
+        }
+    }
+    return conn;
 }
 
 static void
 conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
                   long long now, int seq_skew, bool seq_skew_dir)
 {
-    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
+    uint32_t hash = conn_key_hash(key, ct->hash_basis);
+    unsigned bucket = hash_to_bucket(hash);
+
     ct_lock_lock(&ct->buckets[bucket].lock);
-    struct conn *conn = conn_lookup(ct, key, now);
+    struct conn_lookup_ctx ctx;
+    ctx.key = *key;
+    ctx.hash = hash;
+    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
+    struct conn *conn = ctx.conn;
+
     if (conn && seq_skew) {
         conn->seq_skew = seq_skew;
         conn->seq_skew_dir = seq_skew_dir;
@@ -777,12 +818,14 @@ nat_clean(struct conntrack *ct, struct conn *conn,
     nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key, ct->hash_basis);
     ct_rwlock_unlock(&ct->resources_lock);
     ct_lock_unlock(&ctb->lock);
-    unsigned bucket_rev_conn =
-        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
+    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
+    unsigned bucket_rev_conn = hash_to_bucket(hash);
+
     ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
     ct_rwlock_wrlock(&ct->resources_lock);
-    long long now = time_msec();
-    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
+    struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
+                                              &ct->buckets[bucket_rev_conn],
+                                              hash);
     struct nat_conn_key_node *nat_conn_key_node =
         nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
                              ct->hash_basis);
@@ -812,7 +855,15 @@ conn_clean(struct conntrack *ct, struct conn *conn,
         expectation_clean(ct, &conn->key, ct->hash_basis);
     }
     ovs_list_remove(&conn->exp_node);
-    hmap_remove(&ctb->connections, &conn->node);
+    //hmap_remove(&ctb->connections, &conn->node);
+
+    if (conn_key_present(ct, ctb, &conn->key)) {
+        hmap_remove(&ctb->connections, &conn->node);
+    } else {
+        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
+        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
+        free(log_msg);
+    }
     atomic_count_dec(&ct->n_conn);
     if (conn->nat_info) {
         nat_clean(ct, conn, ctb);
@@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
 
 static void
 create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
-                   long long now, bool alg_un_nat)
+                   bool alg_un_nat)
 {
     struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
     nc->key = conn_for_un_nat_copy->rev_key;
@@ -1013,7 +1064,8 @@ create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
     uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
     unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
     ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
-    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
+    struct conn *rev_conn = conn_lookup_any(&nc->key, &ct->buckets[un_nat_conn_bucket],
+                                            un_nat_hash);
 
     if (alg_un_nat) {
         if (!rev_conn) {
@@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
         } else {
             char *log_msg = xasprintf("Unusual condition for un_nat conn "
                                       "create for alg: rev_conn %p", rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
             free(log_msg);
             free(nc);
         }
@@ -1032,14 +1084,25 @@ create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
         struct nat_conn_key_node *nat_conn_key_node =
             nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key, ct->hash_basis);
         if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
-            &nc->rev_key) && !rev_conn) {
-            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
-                        &nc->node, un_nat_hash);
+            &nc->rev_key)) {
+
+            if (!rev_conn) {
+                hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
+                            &nc->node, un_nat_hash);
+            } else {
+                char *log_msg = xasprintf("NAT conflict; un-nat will not"
+                                          "happen; likely DOS");
+                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
+                free(log_msg);
+                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
+                                     ct->hash_basis);
+                free(nc);
+            }
         } else {
             char *log_msg = xasprintf("Unusual condition for un_nat conn "
                                       "create: nat_conn_key_node/rev_conn "
                                       "%p/%p", nat_conn_key_node, rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
             free(log_msg);
             free(nc);
         }
@@ -1286,7 +1349,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
     ct_lock_unlock(&ct->buckets[bucket].lock);
 
     if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
-        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
+        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
     }
 
     handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
@@ -1383,19 +1446,19 @@ sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb,
 
     for (unsigned i = 0; i < N_CT_TM; i++) {
         LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
-            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-                if (!conn_expired(conn, now) || count >= limit) {
-                    min_expiration = MIN(min_expiration, conn->expiration);
-                    if (count >= limit) {
-                        /* Do not check other lists. */
-                        COVERAGE_INC(conntrack_long_cleanup);
-                        return min_expiration;
-                    }
-                    break;
+            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+            if (!conn_expired(conn, now) || count >= limit) {
+                min_expiration = MIN(min_expiration, conn->expiration);
+                if (count >= limit) {
+                    /* Do not check other lists. */
+                    COVERAGE_INC(conntrack_long_cleanup);
+                    return min_expiration;
+
                 }
-                conn_clean(ct, conn, ctb);
-                count++;
-            }
+                break;
+             }
+             conn_clean(ct, conn, ctb);
+             count++;
         }
     }
     return min_expiration;
@@ -2540,16 +2603,18 @@ int
 conntrack_flush(struct conntrack *ct, const uint16_t *zone)
 {
     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conn *conn, *next;
-
-        ct_lock_lock(&ct->buckets[i].lock);
-        HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections) {
-            if ((!zone || *zone == conn->key.zone) &&
-                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
-                conn_clean(ct, conn, &ct->buckets[i]);
+        struct conntrack_bucket *ctb = &ct->buckets[i];
+        ct_lock_lock(&ctb->lock);
+        for (unsigned j = 0; j < N_CT_TM; j++) {
+            struct conn *conn, *next;
+            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
+                if (!zone || *zone == conn->key.zone) {
+                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+                    conn_clean(ct, conn, ctb);
+                }
             }
         }
-        ct_lock_unlock(&ct->buckets[i].lock);
+        ct_lock_unlock(&ctb->lock);
     }
 
     return 0;


More information about the dev mailing list