[ovs-dev] [patch v3] conntrack: Add rcu support.

Darrell Ball dlu998 at gmail.com
Tue Sep 18 07:52:52 UTC 2018


Add in rcu support for conntrack. At the same time, the array of
hmaps is replaced by a cmap.  Using a single map also simplifies
the handling of nat and allows the removal of the nat_conn map.
There is still some cleanup to do and a few things to check. I'll
probably split out into several patches later, as some cosmetic
changes were made at the same time, like getting rid of the
private conntrack structure contents exported outside of conntrack;
these fields are made static to conntrack now.

Signed-off-by: Darrell Ball <dlu998 at gmail.com>
---

v2->v3: Fixed expiry list locks for ct_sweep().

        Remove extra calls to ct_lock.

        Further changes to stop exporting conntrack private members
        and methods; deprecate conntrack struct (splice out later as
        a separate patch).

        Unwrapped ct lock wrappers for easier debugging (Ben suggestion)

v1->v2: Some synchronization changes; new expiry lists lock.
        Fix the 4 Travis builds that were failing.

 lib/conntrack-icmp.c    |   20 +-
 lib/conntrack-other.c   |   14 +-
 lib/conntrack-private.h |   80 +++-
 lib/conntrack-tcp.c     |   33 +-
 lib/conntrack.c         | 1156 ++++++++++++++++++++---------------------------
 lib/conntrack.h         |  180 +-------
 6 files changed, 594 insertions(+), 889 deletions(-)

diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
index 40fd1d8..4c7960b 100644
--- a/lib/conntrack-icmp.c
+++ b/lib/conntrack-icmp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2018 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,16 +46,17 @@ conn_icmp_cast(const struct conn *conn)
 }
 
 static enum ct_update_res
-icmp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
-                 struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
+icmp_conn_update(struct conn *conn_, struct dp_packet *pkt OVS_UNUSED,
+                 bool reply, long long now)
 {
     struct conn_icmp *conn = conn_icmp_cast(conn_);
 
-    if (reply && conn->state != ICMPS_REPLY) {
+    conn->state = ICMPS_FIRST;
+    if (reply) {
         conn->state = ICMPS_REPLY;
     }
 
-    conn_update_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+    conn_update_expiration(&conn->up, icmp_timeouts[conn->state], now);
 
     return CT_UPDATE_VALID;
 }
@@ -79,15 +80,12 @@ icmp6_valid_new(struct dp_packet *pkt)
 }
 
 static struct conn *
-icmp_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt OVS_UNUSED,
-               long long now)
+icmp_new_conn(struct dp_packet *pkt OVS_UNUSED, long long now)
 {
-    struct conn_icmp *conn;
-
-    conn = xzalloc(sizeof *conn);
+    struct conn_icmp *conn = xzalloc(sizeof *conn);
     conn->state = ICMPS_FIRST;
 
-    conn_init_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+    conn_init_expiration(&conn->up, icmp_timeouts[conn->state], now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index 2920889..79efb5f 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2018 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,8 +43,8 @@ conn_other_cast(const struct conn *conn)
 }
 
 static enum ct_update_res
-other_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
-                  struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
+other_conn_update(struct conn *conn_, struct dp_packet *pkt OVS_UNUSED,
+                  bool reply, long long now)
 {
     struct conn_other *conn = conn_other_cast(conn_);
 
@@ -54,7 +54,7 @@ other_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
         conn->state = OTHERS_MULTIPLE;
     }
 
-    conn_update_expiration(ctb, &conn->up, other_timeouts[conn->state], now);
+    conn_update_expiration(&conn->up, other_timeouts[conn->state], now);
 
     return CT_UPDATE_VALID;
 }
@@ -66,15 +66,13 @@ other_valid_new(struct dp_packet *pkt OVS_UNUSED)
 }
 
 static struct conn *
-other_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt OVS_UNUSED,
-               long long now)
+other_new_conn(struct dp_packet *pkt OVS_UNUSED, long long now)
 {
     struct conn_other *conn;
 
     conn = xzalloc(sizeof *conn);
     conn->state = OTHERS_FIRST;
-
-    conn_init_expiration(ctb, &conn->up, other_timeouts[conn->state], now);
+    conn_init_expiration(&conn->up, other_timeouts[conn->state], now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index a344801..c4a546a 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2018 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <netinet/in.h>
 #include <netinet/ip6.h>
 
+#include "cmap.h"
 #include "conntrack.h"
 #include "ct-dpif.h"
 #include "openvswitch/hmap.h"
@@ -57,12 +58,6 @@ struct conn_key {
     uint8_t nw_proto;
 };
 
-struct nat_conn_key_node {
-    struct hmap_node node;
-    struct conn_key key;
-    struct conn_key value;
-};
-
 /* This is used for alg expectations; an expectation is a
  * context created in preparation for establishing a data
  * connection. The expectation is created by the control
@@ -87,18 +82,29 @@ struct alg_exp_node {
     bool nat_rpl_dst;
 };
 
+struct OVS_LOCKABLE ct_ce_lock {
+    struct ovs_mutex lock;
+};
+
+struct OVS_LOCKABLE ct_expiry_lock {
+    struct ovs_mutex lock;
+};
+
 struct conn {
     struct conn_key key;
     struct conn_key rev_key;
     /* Only used for orig_tuple support. */
     struct conn_key master_key;
+    struct ct_ce_lock lock;
     long long expiration;
     struct ovs_list exp_node;
     struct hmap_node node;
+    struct cmap_node cm_node;
     ovs_u128 label;
     /* XXX: consider flattening. */
     struct nat_action_info_t *nat_info;
     char *alg;
+    struct conn *nat_conn;
     int seq_skew;
     uint32_t mark;
     uint8_t conn_type;
@@ -119,39 +125,73 @@ enum ct_conn_type {
     CT_CONN_TYPE_UN_NAT,
 };
 
+extern struct ct_l4_proto ct_proto_tcp;
+extern struct ct_l4_proto ct_proto_other;
+extern struct ct_l4_proto ct_proto_icmp4;
+extern struct ct_l4_proto ct_proto_icmp6;
+
 struct ct_l4_proto {
-    struct conn *(*new_conn)(struct conntrack_bucket *, struct dp_packet *pkt,
-                             long long now);
+    struct conn *(*new_conn)(struct dp_packet *pkt, long long now);
     bool (*valid_new)(struct dp_packet *pkt);
     enum ct_update_res (*conn_update)(struct conn *conn,
-                                      struct conntrack_bucket *,
                                       struct dp_packet *pkt, bool reply,
                                       long long now);
     void (*conn_get_protoinfo)(const struct conn *,
                                struct ct_dpif_protoinfo *);
 };
 
-extern struct ct_l4_proto ct_proto_tcp;
-extern struct ct_l4_proto ct_proto_other;
-extern struct ct_l4_proto ct_proto_icmp4;
-extern struct ct_l4_proto ct_proto_icmp6;
+/* Timeouts: all the possible timeout states passed to update_expiration()
+ * are listed here. The name will be prefix by CT_TM_ and the value is in
+ * milliseconds */
+#define CT_TIMEOUTS \
+    CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
+    CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
+    CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
+    CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
+    CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
+    CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
+    CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
+    CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
+    CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
+    CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
+    CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
+
+/* The smallest of the above values: it is used as an upper bound for the
+ * interval between two rounds of cleanup of expired entries */
+#define CT_TM_MIN (30 * 1000)
+
+#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
+    CT_TIMEOUTS
+#undef CT_TIMEOUT
+
+enum ct_timeout {
+#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
+    CT_TIMEOUTS
+#undef CT_TIMEOUT
+    N_CT_TM
+};
 
 extern long long ct_timeout_val[];
+extern struct ovs_list cm_exp_lists[N_CT_TM];
+extern struct ct_expiry_lock ex_lock;
 
 static inline void
-conn_init_expiration(struct conntrack_bucket *ctb, struct conn *conn,
-                        enum ct_timeout tm, long long now)
+conn_init_expiration(struct conn *conn, enum ct_timeout tm, long long now)
 {
     conn->expiration = now + ct_timeout_val[tm];
-    ovs_list_push_back(&ctb->exp_lists[tm], &conn->exp_node);
+    ovs_mutex_lock(&ex_lock.lock);
+    ovs_list_push_back(&cm_exp_lists[tm], &conn->exp_node);
+    ovs_mutex_unlock(&ex_lock.lock);
 }
 
 static inline void
-conn_update_expiration(struct conntrack_bucket *ctb, struct conn *conn,
-                       enum ct_timeout tm, long long now)
+conn_update_expiration(struct conn *conn, enum ct_timeout tm, long long now)
 {
+    ovs_mutex_lock(&ex_lock.lock);
     ovs_list_remove(&conn->exp_node);
-    conn_init_expiration(ctb, conn, tm, now);
+    conn->expiration = now + ct_timeout_val[tm];
+    ovs_list_push_back(&cm_exp_lists[tm], &conn->exp_node);
+    ovs_mutex_unlock(&ex_lock.lock);
 }
 
 static inline uint32_t
diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
index 86d313d..e0b4d11 100644
--- a/lib/conntrack-tcp.c
+++ b/lib/conntrack-tcp.c
@@ -145,8 +145,8 @@ tcp_get_wscale(const struct tcp_header *tcp)
 }
 
 static enum ct_update_res
-tcp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
-                struct dp_packet *pkt, bool reply, long long now)
+tcp_conn_update(struct conn *conn_, struct dp_packet *pkt, bool reply,
+                long long now)
 {
     struct conn_tcp *conn = conn_tcp_cast(conn_);
     struct tcp_header *tcp = dp_packet_l4(pkt);
@@ -156,20 +156,23 @@ tcp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
     struct tcp_peer *dst = &conn->peer[reply ? 0 : 1];
     uint8_t sws = 0, dws = 0;
     uint16_t tcp_flags = TCP_FLAGS(tcp->tcp_ctl);
+    enum ct_update_res rc = CT_UPDATE_VALID;
 
     uint16_t win = ntohs(tcp->tcp_winsz);
     uint32_t ack, end, seq, orig_seq;
     uint32_t p_len = tcp_payload_length(pkt);
 
     if (tcp_invalid_flags(tcp_flags)) {
-        return CT_UPDATE_INVALID;
+        rc = CT_UPDATE_INVALID;
+        goto out;
     }
 
     if (((tcp_flags & (TCP_SYN | TCP_ACK)) == TCP_SYN)
         && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
         && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
         src->state = dst->state = CT_DPIF_TCPS_CLOSED;
-        return CT_UPDATE_NEW;
+        rc = CT_UPDATE_NEW;
+        goto out;
     }
 
     if (src->wscale & CT_WSCALE_FLAG
@@ -317,18 +320,18 @@ tcp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
 
         if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
             && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
-            conn_update_expiration(ctb, &conn->up, CT_TM_TCP_CLOSED, now);
+            conn_update_expiration(&conn->up, CT_TM_TCP_CLOSED, now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    && dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ctb, &conn->up, CT_TM_TCP_FIN_WAIT, now);
+            conn_update_expiration(&conn->up, CT_TM_TCP_FIN_WAIT, now);
         } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
                    || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
-            conn_update_expiration(ctb, &conn->up, CT_TM_TCP_OPENING, now);
+            conn_update_expiration(&conn->up, CT_TM_TCP_OPENING, now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    || dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ctb, &conn->up, CT_TM_TCP_CLOSING, now);
+            conn_update_expiration(&conn->up, CT_TM_TCP_CLOSING, now);
         } else {
-            conn_update_expiration(ctb, &conn->up, CT_TM_TCP_ESTABLISHED, now);
+            conn_update_expiration(&conn->up, CT_TM_TCP_ESTABLISHED, now);
         }
     } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
                 || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
@@ -385,10 +388,12 @@ tcp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
             src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
         }
     } else {
-        return CT_UPDATE_INVALID;
+        rc = CT_UPDATE_INVALID;
+        goto out;
     }
 
-    return CT_UPDATE_VALID;
+out:
+    return rc;
 }
 
 static bool
@@ -412,8 +417,7 @@ tcp_valid_new(struct dp_packet *pkt)
 }
 
 static struct conn *
-tcp_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt,
-             long long now)
+tcp_new_conn(struct dp_packet *pkt, long long now)
 {
     struct conn_tcp* newconn = NULL;
     struct tcp_header *tcp = dp_packet_l4(pkt);
@@ -449,8 +453,7 @@ tcp_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt,
     src->state = CT_DPIF_TCPS_SYN_SENT;
     dst->state = CT_DPIF_TCPS_CLOSED;
 
-    conn_init_expiration(ctb, &newconn->up, CT_TM_TCP_FIRST_PACKET,
-                         now);
+    conn_init_expiration(&newconn->up, CT_TM_TCP_FIRST_PACKET, now);
 
     return &newconn->up;
 }
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 974f985..ed9a300 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2018 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -76,48 +76,67 @@ enum ct_alg_ctl_type {
     CT_ALG_CTL_SIP,
 };
 
-static bool conn_key_extract(struct conntrack *, struct dp_packet *,
-                             ovs_be16 dl_type, struct conn_lookup_ctx *,
-                             uint16_t zone);
+struct OVS_LOCKABLE ct_rwlock {
+    struct ovs_rwlock lock;
+};
+
+/* This lock is used to guard alg_expectations and
+ * alg_expectation_refs. */
+static struct ct_rwlock resources_lock;
+
+/* Hash table for alg expectations. Expectations are created
+ * by control connections to help create data connections. */
+static struct hmap alg_expectations OVS_GUARDED_BY(resources_lock);
+/* Only needed to be able to cleanup expectations from non-control
+ * connection context; otherwise a pointer to the expectation from
+ * the control connection would suffice. */
+static struct hindex alg_expectation_refs OVS_GUARDED_BY(resources_lock);
+
+struct OVS_LOCKABLE ct_lock {
+    struct ovs_mutex lock;
+};
+
+static struct ct_lock ct_lock;
+static struct cmap cm_conns OVS_GUARDED_BY(ct_lock);
+struct ct_expiry_lock ex_lock;
+struct ovs_list cm_exp_lists[N_CT_TM] OVS_GUARDED_BY(ex_lock);
+/* Salt for hashing a connection key. */
+static uint32_t hash_basis;
+/* The thread performing periodic cleanup of the connection
+ * tracker */
+static pthread_t clean_thread;
+/* Latch to destroy the 'clean_thread' */
+static struct latch clean_thread_exit;
+
+/* Number of connections currently in the connection tracker. */
+static atomic_count n_conn;
+/* Connections limit. When this limit is reached, no new connection
+ * will be accepted. */
+static atomic_uint n_conn_limit;
+
+static bool conn_key_extract(struct dp_packet *, ovs_be16 dl_type,
+                             struct conn_lookup_ctx *, uint16_t zone);
 static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
 static void conn_key_reverse(struct conn_key *);
-static void conn_key_lookup(struct conntrack_bucket *ctb,
-                            struct conn_lookup_ctx *ctx,
-                            long long now);
 static bool valid_new(struct dp_packet *pkt, struct conn_key *);
-static struct conn *new_conn(struct conntrack_bucket *, struct dp_packet *pkt,
-                             struct conn_key *, long long now);
-static void delete_conn(struct conn *);
-static enum ct_update_res conn_update(struct conn *,
-                                      struct conntrack_bucket *ctb,
-                                      struct dp_packet *, bool reply,
+static struct conn *new_conn(struct dp_packet *pkt, struct conn_key *,
+                             long long now);
+static enum ct_update_res conn_update(struct dp_packet *pkt,
+                                      struct conn *conn,
+                                      struct conn_lookup_ctx *ctx,
                                       long long now);
+static void delete_conn(struct conn *);
+static void delete_conn_one(struct conn *conn);
 static bool conn_expired(struct conn *, long long now);
 static void set_mark(struct dp_packet *, struct conn *,
                      uint32_t val, uint32_t mask);
 static void set_label(struct dp_packet *, struct conn *,
                       const struct ovs_key_ct_labels *val,
                       const struct ovs_key_ct_labels *mask);
-static void *clean_thread_main(void *f_);
-
-static struct nat_conn_key_node *
-nat_conn_keys_lookup(struct hmap *nat_conn_keys,
-                     const struct conn_key *key,
-                     uint32_t basis);
+static void *clean_thread_main(void *);
 
 static bool
-nat_conn_keys_insert(struct hmap *nat_conn_keys,
-                     const struct conn *nat_conn,
-                     uint32_t hash_basis);
-
-static void
-nat_conn_keys_remove(struct hmap *nat_conn_keys,
-                     const struct conn_key *key,
-                     uint32_t basis);
-
-static bool
-nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
-                       struct conn *nat_conn);
+nat_select_range_tuple(const struct conn *conn, struct conn *nat_conn);
 
 static uint8_t
 reverse_icmp_type(uint8_t type);
@@ -139,8 +158,7 @@ repl_ftp_v4_addr(struct dp_packet *pkt, ovs_be32 v4_addr_rep,
                  size_t addr_offset_from_ftp_data_start);
 
 static enum ftp_ctl_pkt
-process_ftp_ctl_v4(struct conntrack *ct,
-                   struct dp_packet *pkt,
+process_ftp_ctl_v4(struct dp_packet *pkt,
                    const struct conn *conn_for_expectation,
                    ovs_be32 *v4_addr_rep,
                    char **ftp_data_v4_start,
@@ -151,8 +169,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
                     struct dp_packet *pkt);
 
 static void
-expectation_clean(struct conntrack *ct, const struct conn_key *master_key,
-                  uint32_t basis);
+expectation_clean(const struct conn_key *master_key);
 
 static struct ct_l4_proto *l4_protos[] = {
     [IPPROTO_TCP] = &ct_proto_tcp,
@@ -162,21 +179,18 @@ static struct ct_l4_proto *l4_protos[] = {
 };
 
 static void
-handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
-               struct dp_packet *pkt,
+handle_ftp_ctl(const struct conn_lookup_ctx *ctx, struct dp_packet *pkt,
                const struct conn *conn_for_expectation,
                long long now, enum ftp_ctl_pkt ftp_ctl, bool nat);
 
 static void
-handle_tftp_ctl(struct conntrack *ct,
-                const struct conn_lookup_ctx *ctx OVS_UNUSED,
+handle_tftp_ctl(const struct conn_lookup_ctx *ctx OVS_UNUSED,
                 struct dp_packet *pkt,
                 const struct conn *conn_for_expectation,
                 long long now OVS_UNUSED,
                 enum ftp_ctl_pkt ftp_ctl OVS_UNUSED, bool nat OVS_UNUSED);
 
-typedef void (*alg_helper)(struct conntrack *ct,
-                           const struct conn_lookup_ctx *ctx,
+typedef void (*alg_helper)(const struct conn_lookup_ctx *ctx,
                            struct dp_packet *pkt,
                            const struct conn *conn_for_expectation,
                            long long now, enum ftp_ctl_pkt ftp_ctl,
@@ -250,6 +264,50 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
 }
 
 static void
+conn_key_lookup(const struct conn_key *key, uint32_t hash, long long now,
+                struct conn **conn_out, bool *reply)
+{
+    struct conn *conn;
+    *conn_out = NULL;
+
+    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &cm_conns) {
+        if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
+            *conn_out = conn;
+            *reply = false;
+            break;
+        }
+        if (!conn_key_cmp(&conn->rev_key, key) && !conn_expired(conn, now)) {
+            *conn_out = conn;
+            *reply = true;
+            break;
+        }
+    }
+}
+
+static bool
+conn_available(const struct conn_key *key, uint32_t hash, long long now)
+{
+    struct conn *conn;
+    bool found = false;
+
+    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &cm_conns) {
+        if (!conn_key_cmp(&conn->key, key)
+            && !conn_expired(conn, now)) {
+            found = true;
+            break;
+        }
+
+        if (!conn_key_cmp(&conn->rev_key, key)
+                && !conn_expired(conn, now)) {
+            found = true;
+            break;
+        }
+    }
+
+    return !found;
+}
+
+static void
 ct_print_conn_info(const struct conn *c, const char *log_msg,
                    enum vlog_level vll, bool force, bool rl_on)
 {
@@ -309,88 +367,131 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
 void
 conntrack_init(struct conntrack *ct)
 {
-    long long now = time_msec();
+    ovs_rwlock_init(&resources_lock.lock);
+    ovs_rwlock_wrlock(&resources_lock.lock);
+    hmap_init(&alg_expectations);
+    hindex_init(&alg_expectation_refs);
+    ovs_rwlock_unlock(&resources_lock.lock);
 
-    ct_rwlock_init(&ct->resources_lock);
-    ct_rwlock_wrlock(&ct->resources_lock);
-    hmap_init(&ct->nat_conn_keys);
-    hmap_init(&ct->alg_expectations);
-    hindex_init(&ct->alg_expectation_refs);
-    ovs_list_init(&ct->alg_exp_list);
-    ct_rwlock_unlock(&ct->resources_lock);
+    ovs_mutex_init_adaptive(&ct_lock.lock);
+    ovs_mutex_lock(&ct_lock.lock);
+    cmap_init(&cm_conns);
+    ovs_mutex_unlock(&ct_lock.lock);
 
-    for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conntrack_bucket *ctb = &ct->buckets[i];
+    ovs_mutex_init_adaptive(&ex_lock.lock);
+    ovs_mutex_lock(&ex_lock.lock);
+    for (unsigned i = 0; i < ARRAY_SIZE(cm_exp_lists); i++) {
+        ovs_list_init(&cm_exp_lists[i]);
+    }
+    ovs_mutex_unlock(&ex_lock.lock);
 
-        ct_lock_init(&ctb->lock);
-        ct_lock_lock(&ctb->lock);
-        hmap_init(&ctb->connections);
-        for (unsigned j = 0; j < ARRAY_SIZE(ctb->exp_lists); j++) {
-            ovs_list_init(&ctb->exp_lists[j]);
-        }
-        ct_lock_unlock(&ctb->lock);
-        ovs_mutex_init(&ctb->cleanup_mutex);
-        ovs_mutex_lock(&ctb->cleanup_mutex);
-        ctb->next_cleanup = now + CT_TM_MIN;
-        ovs_mutex_unlock(&ctb->cleanup_mutex);
+    hash_basis = random_uint32();
+    atomic_count_init(&n_conn, 0);
+    atomic_init(&n_conn_limit, DEFAULT_N_CONN_LIMIT);
+    latch_init(&clean_thread_exit);
+    clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
+}
+
+/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT.  Also
+ * removes the associated nat 'conn' from the lookup datastructures. */
+static void
+conn_clean(struct conn *conn)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+
+    if (conn->alg) {
+        expectation_clean(&conn->key);
+    }
+
+    uint32_t hash = conn_key_hash(&conn->key, hash_basis);
+    cmap_remove(&cm_conns, &conn->cm_node, hash);
+    ovs_mutex_lock(&ex_lock.lock);
+    ovs_list_remove(&conn->exp_node);
+    ovs_mutex_unlock(&ex_lock.lock);
+    if (conn->nat_conn) {
+        hash = conn_key_hash(&conn->nat_conn->key, hash_basis);
+        cmap_remove(&cm_conns, &conn->nat_conn->cm_node, hash);
+        ovs_mutex_lock(&ex_lock.lock);
+        ovs_list_remove(&conn->nat_conn->exp_node);
+        ovs_mutex_unlock(&ex_lock.lock);
+    }
+    ovsrcu_postpone(delete_conn, conn);
+    atomic_count_dec(&n_conn);
+}
+
+/* Similar to conn_clean(), except without the expiry list locking. */
+static void
+conn_clean_sweep(struct conn *conn)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    if (conn->alg) {
+        expectation_clean(&conn->key);
+    }
+
+    uint32_t hash = conn_key_hash(&conn->key, hash_basis);
+    cmap_remove(&cm_conns, &conn->cm_node, hash);
+    ovs_list_remove(&conn->exp_node);
+    if (conn->nat_conn) {
+        hash = conn_key_hash(&conn->nat_conn->key, hash_basis);
+        cmap_remove(&cm_conns, &conn->nat_conn->cm_node, hash);
+        ovs_list_remove(&conn->nat_conn->exp_node);
     }
-    ct->hash_basis = random_uint32();
-    atomic_count_init(&ct->n_conn, 0);
-    atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
-    latch_init(&ct->clean_thread_exit);
-    ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
+    ovsrcu_postpone(delete_conn, conn);
+    atomic_count_dec(&n_conn);
+}
+
+static void
+conn_clean_one(struct conn *conn)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    if (conn->alg) {
+        expectation_clean(&conn->key);
+    }
+
+    uint32_t hash = conn_key_hash(&conn->key, hash_basis);
+    cmap_remove(&cm_conns, &conn->cm_node, hash);
+    ovs_mutex_lock(&ex_lock.lock);
+    ovs_list_remove(&conn->exp_node);
+    ovs_mutex_unlock(&ex_lock.lock);
+    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+        atomic_count_dec(&n_conn);
+    }
+    ovsrcu_postpone(delete_conn_one, conn);
 }
 
 /* Destroys the connection tracker 'ct' and frees all the allocated memory. */
 void
-conntrack_destroy(struct conntrack *ct)
+conntrack_destroy(struct conntrack *ct OVS_UNUSED)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
-    latch_set(&ct->clean_thread_exit);
-    pthread_join(ct->clean_thread, NULL);
-    latch_destroy(&ct->clean_thread_exit);
-    for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conntrack_bucket *ctb = &ct->buckets[i];
-        struct conn *conn;
+    struct conn *conn;
+    latch_set(&clean_thread_exit);
+    pthread_join(clean_thread, NULL);
+    latch_destroy(&clean_thread_exit);
 
-        ovs_mutex_destroy(&ctb->cleanup_mutex);
-        ct_lock_lock(&ctb->lock);
-        HMAP_FOR_EACH_POP (conn, node, &ctb->connections) {
-            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-                atomic_count_dec(&ct->n_conn);
-            }
-            delete_conn(conn);
-        }
-        hmap_destroy(&ctb->connections);
-        ct_lock_unlock(&ctb->lock);
-        ct_lock_destroy(&ctb->lock);
-    }
-    ct_rwlock_wrlock(&ct->resources_lock);
-    struct nat_conn_key_node *nat_conn_key_node;
-    HMAP_FOR_EACH_POP (nat_conn_key_node, node, &ct->nat_conn_keys) {
-        free(nat_conn_key_node);
+    ovs_mutex_lock(&ct_lock.lock);
+    CMAP_FOR_EACH (conn, cm_node, &cm_conns) {
+        conn_clean_one(conn);
     }
-    hmap_destroy(&ct->nat_conn_keys);
+    cmap_destroy(&cm_conns);
+    ovs_mutex_unlock(&ct_lock.lock);
+    ovs_mutex_destroy(&ct_lock.lock);
 
+    ovs_mutex_destroy(&ex_lock.lock);
+
+    ovs_rwlock_wrlock(&resources_lock.lock);
     struct alg_exp_node *alg_exp_node;
-    HMAP_FOR_EACH_POP (alg_exp_node, node, &ct->alg_expectations) {
+    HMAP_FOR_EACH_POP (alg_exp_node, node, &alg_expectations) {
         free(alg_exp_node);
     }
 
-    ovs_list_poison(&ct->alg_exp_list);
-    hmap_destroy(&ct->alg_expectations);
-    hindex_destroy(&ct->alg_expectation_refs);
-    ct_rwlock_unlock(&ct->resources_lock);
-    ct_rwlock_destroy(&ct->resources_lock);
+    hmap_destroy(&alg_expectations);
+    hindex_destroy(&alg_expectation_refs);
+    ovs_rwlock_unlock(&resources_lock.lock);
+    ovs_rwlock_destroy(&resources_lock.lock);
 }
 
-static unsigned hash_to_bucket(uint32_t hash)
-{
-    /* Extracts the most significant bits in hash. The least significant bits
-     * are already used internally by the hmap implementation. */
-    BUILD_ASSERT(CONNTRACK_BUCKETS_SHIFT < 32 && CONNTRACK_BUCKETS_SHIFT >= 1);
-
-    return (hash >> (32 - CONNTRACK_BUCKETS_SHIFT)) % CONNTRACK_BUCKETS;
-}
 
 static void
 write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn,
@@ -513,14 +614,14 @@ alg_src_ip_wc(enum ct_alg_ctl_type alg_ctl_type)
 }
 
 static void
-handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
-               struct dp_packet *pkt, enum ct_alg_ctl_type ct_alg_ctl,
-               const struct conn *conn, long long now, bool nat,
+handle_alg_ctl(const struct conn_lookup_ctx *ctx, struct dp_packet *pkt,
+               enum ct_alg_ctl_type ct_alg_ctl, const struct conn *conn,
+               long long now, bool nat,
                const struct conn *conn_for_expectation)
 {
     /* ALG control packet handling with expectation creation. */
     if (OVS_UNLIKELY(alg_helpers[ct_alg_ctl] && conn && conn->alg)) {
-        alg_helpers[ct_alg_ctl](ct, ctx, pkt, conn_for_expectation, now,
+        alg_helpers[ct_alg_ctl](ctx, pkt, conn_for_expectation, now,
                                 CT_FTP_CTL_INTEREST, nat);
     }
 }
@@ -738,87 +839,19 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
     }
 }
 
-/* Typical usage of this helper is in non per-packet code;
- * this is because the bucket lock needs to be held for lookup
- * and a hash would have already been needed. Hence, this function
- * is just intended for code clarity. */
-static struct conn *
-conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now)
-{
-    struct conn_lookup_ctx ctx;
-    ctx.conn = NULL;
-    ctx.key = *key;
-    ctx.hash = conn_key_hash(key, ct->hash_basis);
-    unsigned bucket = hash_to_bucket(ctx.hash);
-    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
-    return ctx.conn;
-}
-
 static void
-conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
-                  long long now, int seq_skew, bool seq_skew_dir)
+conn_seq_skew_set(const struct conn_key *key, long long now, int seq_skew,
+                  bool seq_skew_dir)
 {
-    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
-    ct_lock_lock(&ct->buckets[bucket].lock);
-    struct conn *conn = conn_lookup(ct, key, now);
+    struct conn *conn;
+    bool reply;
+    uint32_t hash = conn_key_hash(key, hash_basis);
+    conn_key_lookup(key, hash, now, &conn, &reply);
+
     if (conn && seq_skew) {
         conn->seq_skew = seq_skew;
         conn->seq_skew_dir = seq_skew_dir;
     }
-    ct_lock_unlock(&ct->buckets[bucket].lock);
-}
-
-static void
-nat_clean(struct conntrack *ct, struct conn *conn,
-          struct conntrack_bucket *ctb)
-    OVS_REQUIRES(ctb->lock)
-{
-    ct_rwlock_wrlock(&ct->resources_lock);
-    nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key, ct->hash_basis);
-    ct_rwlock_unlock(&ct->resources_lock);
-    ct_lock_unlock(&ctb->lock);
-    unsigned bucket_rev_conn =
-        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
-    ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
-    ct_rwlock_wrlock(&ct->resources_lock);
-    long long now = time_msec();
-    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
-    struct nat_conn_key_node *nat_conn_key_node =
-        nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
-                             ct->hash_basis);
-
-    /* In the unlikely event, rev conn was recreated, then skip
-     * rev_conn cleanup. */
-    if (rev_conn && (!nat_conn_key_node ||
-                     conn_key_cmp(&nat_conn_key_node->value,
-                                  &rev_conn->rev_key))) {
-        hmap_remove(&ct->buckets[bucket_rev_conn].connections,
-                    &rev_conn->node);
-        free(rev_conn);
-    }
-
-    delete_conn(conn);
-    ct_rwlock_unlock(&ct->resources_lock);
-    ct_lock_unlock(&ct->buckets[bucket_rev_conn].lock);
-    ct_lock_lock(&ctb->lock);
-}
-
-static void
-conn_clean(struct conntrack *ct, struct conn *conn,
-           struct conntrack_bucket *ctb)
-    OVS_REQUIRES(ctb->lock)
-{
-    if (conn->alg) {
-        expectation_clean(ct, &conn->key, ct->hash_basis);
-    }
-    ovs_list_remove(&conn->exp_node);
-    hmap_remove(&ctb->connections, &conn->node);
-    atomic_count_dec(&ct->n_conn);
-    if (conn->nat_info) {
-        nat_clean(ct, conn, ctb);
-    } else {
-        delete_conn(conn);
-    }
 }
 
 static bool
@@ -841,17 +874,15 @@ ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl)
     }
 }
 
-/* This function is called with the bucket lock held. */
 static struct conn *
-conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
-               struct conn_lookup_ctx *ctx, bool commit, long long now,
+conn_not_found(struct dp_packet *pkt, struct conn_lookup_ctx *ctx,
+               bool commit, long long now,
                const struct nat_action_info_t *nat_action_info,
-               struct conn *conn_for_un_nat_copy,
-               const char *helper,
-               const struct alg_exp_node *alg_exp,
+               const char *helper, const struct alg_exp_node *alg_exp,
                enum ct_alg_ctl_type ct_alg_ctl)
 {
     struct conn *nc = NULL;
+    struct conn *nat_conn = NULL;
 
     if (!valid_new(pkt, &ctx->key)) {
         pkt->md.ct_state = CS_INVALID;
@@ -865,16 +896,15 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
     }
 
     if (commit) {
-        unsigned int n_conn_limit;
-        atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
+        unsigned int n_conn_limit_;
+        atomic_read_relaxed(&n_conn_limit, &n_conn_limit_);
 
-        if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
+        if (atomic_count_get(&n_conn) >= n_conn_limit_) {
             COVERAGE_INC(conntrack_full);
             return nc;
         }
 
-        unsigned bucket = hash_to_bucket(ctx->hash);
-        nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now);
+        nc = new_conn(pkt, &ctx->key, now);
         ctx->conn = nc;
         nc->rev_key = nc->key;
         conn_key_reverse(&nc->rev_key);
@@ -893,6 +923,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         if (nat_action_info) {
             nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
 
+            nat_conn = xzalloc(sizeof *nat_conn);
+
             if (alg_exp) {
                 if (alg_exp->nat_rpl_dst) {
                     nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
@@ -901,70 +933,59 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
                     nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
                     nc->nat_info->nat_action = NAT_ACTION_DST;
                 }
-                *conn_for_un_nat_copy = *nc;
-                ct_rwlock_wrlock(&ct->resources_lock);
-                bool new_insert = nat_conn_keys_insert(&ct->nat_conn_keys,
-                                                       conn_for_un_nat_copy,
-                                                       ct->hash_basis);
-                ct_rwlock_unlock(&ct->resources_lock);
-                if (!new_insert) {
-                    char *log_msg = xasprintf("Pre-existing alg "
-                                              "nat_conn_key");
-                    ct_print_conn_info(conn_for_un_nat_copy, log_msg, VLL_INFO,
-                                       true, false);
-                    free(log_msg);
-                }
+                *nat_conn = *nc;
             } else {
-                *conn_for_un_nat_copy = *nc;
-                ct_rwlock_wrlock(&ct->resources_lock);
-                bool nat_res = nat_select_range_tuple(ct, nc,
-                                                      conn_for_un_nat_copy);
+                *nat_conn = *nc;
+                bool nat_res = nat_select_range_tuple(nc, nat_conn);
 
                 if (!nat_res) {
                     goto nat_res_exhaustion;
                 }
 
-                /* Update nc with nat adjustments made to
-                 * conn_for_un_nat_copy by nat_select_range_tuple(). */
-                *nc = *conn_for_un_nat_copy;
-                ct_rwlock_unlock(&ct->resources_lock);
+                /* Update nc with nat adjustments. */
+                *nc = *nat_conn;
             }
-            conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
-            conn_for_un_nat_copy->nat_info = NULL;
-            conn_for_un_nat_copy->alg = NULL;
             nat_packet(pkt, nc, ctx->icmp_related);
+
+            nat_conn->key = nc->rev_key;
+            nat_conn->rev_key = nc->key;
+            nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
+            nat_conn->nat_info = NULL;
+            nat_conn->alg = NULL;
+            nat_conn->nat_conn = NULL;
+            uint32_t nat_hash = conn_key_hash(&nat_conn->key,
+                                              hash_basis);
+            cmap_insert(&cm_conns, &nat_conn->cm_node, nat_hash);
         }
-        hmap_insert(&ct->buckets[bucket].connections, &nc->node, ctx->hash);
-        atomic_count_inc(&ct->n_conn);
+
+        nc->nat_conn = nat_conn;
+        ovs_mutex_init_adaptive(&nc->lock.lock);
+        nc->conn_type = CT_CONN_TYPE_DEFAULT;
+        cmap_insert(&cm_conns, &nc->cm_node, ctx->hash);
+        atomic_count_inc(&n_conn);
     }
 
     return nc;
 
-    /* This would be a user error or a DOS attack.
-     * A user error is prevented by allocating enough
-     * combinations of NAT addresses when combined with
-     * ephemeral ports.  A DOS attack should be protected
-     * against with firewall rules or a separate firewall.
-     * Also using zone partitioning can limit DoS impact. */
+    /* This would be a user error or a DOS attack.  A user error is prevented
+     * by allocating enough combinations of NAT addresses when combined with
+     * ephemeral ports.  A DOS attack should be protected against with
+     * firewall rules or a separate firewall.  Also using zone partitioning
+     * can limit DoS impact. */
 nat_res_exhaustion:
+    free(nat_conn);
     ovs_list_remove(&nc->exp_node);
     delete_conn(nc);
-    /* conn_for_un_nat_copy is a local variable in process_one; this
-     * memset() serves to document that conn_for_un_nat_copy is from
-     * this point on unused. */
-    memset(conn_for_un_nat_copy, 0, sizeof *conn_for_un_nat_copy);
-    ct_rwlock_unlock(&ct->resources_lock);
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
     VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
                  "if DoS attack, use firewalling and/or zone partitioning.");
     return NULL;
 }
 
+/* Reminder: change **conn to *conn. */
 static bool
-conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
-                  struct conn_lookup_ctx *ctx, struct conn **conn,
-                  long long now, unsigned bucket)
-    OVS_REQUIRES(ct->buckets[bucket].lock)
+conn_update_state(struct dp_packet *pkt, struct conn_lookup_ctx *ctx,
+                  struct conn *conn, long long now)
 {
     bool create_new_conn = false;
 
@@ -974,12 +995,11 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
             pkt->md.ct_state |= CS_REPLY_DIR;
         }
     } else {
-        if ((*conn)->alg_related) {
+        if (conn->alg_related) {
             pkt->md.ct_state |= CS_RELATED;
         }
 
-        enum ct_update_res res = conn_update(*conn, &ct->buckets[bucket],
-                                             pkt, ctx->reply, now);
+        enum ct_update_res res = conn_update(pkt, conn, ctx, now);
 
         switch (res) {
         case CT_UPDATE_VALID:
@@ -993,7 +1013,9 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
             pkt->md.ct_state = CS_INVALID;
             break;
         case CT_UPDATE_NEW:
-            conn_clean(ct, *conn, &ct->buckets[bucket]);
+            ovs_mutex_lock(&ct_lock.lock);
+            conn_clean(conn);
+            ovs_mutex_unlock(&ct_lock.lock);
             create_new_conn = true;
             break;
         default:
@@ -1004,51 +1026,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
 }
 
 static void
-create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
-                   long long now, bool alg_un_nat)
-{
-    struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
-    nc->key = conn_for_un_nat_copy->rev_key;
-    nc->rev_key = conn_for_un_nat_copy->key;
-    uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
-    unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
-    ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
-    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
-
-    if (alg_un_nat) {
-        if (!rev_conn) {
-            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
-                        &nc->node, un_nat_hash);
-        } else {
-            char *log_msg = xasprintf("Unusual condition for un_nat conn "
-                                      "create for alg: rev_conn %p", rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
-            free(log_msg);
-            free(nc);
-        }
-    } else {
-        ct_rwlock_rdlock(&ct->resources_lock);
-
-        struct nat_conn_key_node *nat_conn_key_node =
-            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key, ct->hash_basis);
-        if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
-            &nc->rev_key) && !rev_conn) {
-            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
-                        &nc->node, un_nat_hash);
-        } else {
-            char *log_msg = xasprintf("Unusual condition for un_nat conn "
-                                      "create: nat_conn_key_node/rev_conn "
-                                      "%p/%p", nat_conn_key_node, rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
-            free(log_msg);
-            free(nc);
-        }
-        ct_rwlock_unlock(&ct->resources_lock);
-    }
-    ct_lock_unlock(&ct->buckets[un_nat_conn_bucket].lock);
-}
-
-static void
 handle_nat(struct dp_packet *pkt, struct conn *conn,
            uint16_t zone, bool reply, bool related)
 {
@@ -1069,11 +1046,9 @@ handle_nat(struct dp_packet *pkt, struct conn *conn,
 }
 
 static bool
-check_orig_tuple(struct conntrack *ct, struct dp_packet *pkt,
-                 struct conn_lookup_ctx *ctx_in, long long now,
-                 unsigned *bucket, struct conn **conn,
+check_orig_tuple(struct dp_packet *pkt, struct conn_lookup_ctx *ctx_in,
+                 long long now, struct conn **conn,
                  const struct nat_action_info_t *nat_action_info)
-    OVS_REQUIRES(ct->buckets[*bucket].lock)
 {
     if ((ctx_in->key.dl_type == htons(ETH_TYPE_IP) &&
          !pkt->md.ct_orig_tuple.ipv4.ipv4_proto) ||
@@ -1084,82 +1059,74 @@ check_orig_tuple(struct conntrack *ct, struct dp_packet *pkt,
         return false;
     }
 
-    ct_lock_unlock(&ct->buckets[*bucket].lock);
-    struct conn_lookup_ctx ctx;
-    memset(&ctx, 0 , sizeof ctx);
-    ctx.conn = NULL;
+    struct conn_key key;
+    memset(&key, 0 , sizeof key);
 
     if (ctx_in->key.dl_type == htons(ETH_TYPE_IP)) {
-        ctx.key.src.addr.ipv4_aligned = pkt->md.ct_orig_tuple.ipv4.ipv4_src;
-        ctx.key.dst.addr.ipv4_aligned = pkt->md.ct_orig_tuple.ipv4.ipv4_dst;
+        key.src.addr.ipv4_aligned = pkt->md.ct_orig_tuple.ipv4.ipv4_src;
+        key.dst.addr.ipv4_aligned = pkt->md.ct_orig_tuple.ipv4.ipv4_dst;
 
         if (ctx_in->key.nw_proto == IPPROTO_ICMP) {
-            ctx.key.src.icmp_id = ctx_in->key.src.icmp_id;
-            ctx.key.dst.icmp_id = ctx_in->key.dst.icmp_id;
+            key.src.icmp_id = ctx_in->key.src.icmp_id;
+            key.dst.icmp_id = ctx_in->key.dst.icmp_id;
             uint16_t src_port = ntohs(pkt->md.ct_orig_tuple.ipv4.src_port);
-            ctx.key.src.icmp_type = (uint8_t) src_port;
-            ctx.key.dst.icmp_type = reverse_icmp_type(ctx.key.src.icmp_type);
+            key.src.icmp_type = (uint8_t) src_port;
+            key.dst.icmp_type = reverse_icmp_type(key.src.icmp_type);
         } else {
-            ctx.key.src.port = pkt->md.ct_orig_tuple.ipv4.src_port;
-            ctx.key.dst.port = pkt->md.ct_orig_tuple.ipv4.dst_port;
+            key.src.port = pkt->md.ct_orig_tuple.ipv4.src_port;
+            key.dst.port = pkt->md.ct_orig_tuple.ipv4.dst_port;
         }
-        ctx.key.nw_proto = pkt->md.ct_orig_tuple.ipv4.ipv4_proto;
+        key.nw_proto = pkt->md.ct_orig_tuple.ipv4.ipv4_proto;
     } else {
-        ctx.key.src.addr.ipv6_aligned = pkt->md.ct_orig_tuple.ipv6.ipv6_src;
-        ctx.key.dst.addr.ipv6_aligned = pkt->md.ct_orig_tuple.ipv6.ipv6_dst;
+        key.src.addr.ipv6_aligned = pkt->md.ct_orig_tuple.ipv6.ipv6_src;
+        key.dst.addr.ipv6_aligned = pkt->md.ct_orig_tuple.ipv6.ipv6_dst;
 
         if (ctx_in->key.nw_proto == IPPROTO_ICMPV6) {
-            ctx.key.src.icmp_id = ctx_in->key.src.icmp_id;
-            ctx.key.dst.icmp_id = ctx_in->key.dst.icmp_id;
+            key.src.icmp_id = ctx_in->key.src.icmp_id;
+            key.dst.icmp_id = ctx_in->key.dst.icmp_id;
             uint16_t src_port = ntohs(pkt->md.ct_orig_tuple.ipv6.src_port);
-            ctx.key.src.icmp_type = (uint8_t) src_port;
-            ctx.key.dst.icmp_type = reverse_icmp6_type(ctx.key.src.icmp_type);
+            key.src.icmp_type = (uint8_t) src_port;
+            key.dst.icmp_type = reverse_icmp6_type(key.src.icmp_type);
         } else {
-            ctx.key.src.port = pkt->md.ct_orig_tuple.ipv6.src_port;
-            ctx.key.dst.port = pkt->md.ct_orig_tuple.ipv6.dst_port;
+            key.src.port = pkt->md.ct_orig_tuple.ipv6.src_port;
+            key.dst.port = pkt->md.ct_orig_tuple.ipv6.dst_port;
         }
-        ctx.key.nw_proto = pkt->md.ct_orig_tuple.ipv6.ipv6_proto;
+        key.nw_proto = pkt->md.ct_orig_tuple.ipv6.ipv6_proto;
     }
 
-    ctx.key.dl_type = ctx_in->key.dl_type;
-    ctx.key.zone = pkt->md.ct_zone;
-    ctx.hash = conn_key_hash(&ctx.key, ct->hash_basis);
-    *bucket = hash_to_bucket(ctx.hash);
-    ct_lock_lock(&ct->buckets[*bucket].lock);
-    conn_key_lookup(&ct->buckets[*bucket], &ctx, now);
-    *conn = ctx.conn;
-    return *conn ? true : false;
-}
+    key.dl_type = ctx_in->key.dl_type;
+    key.zone = pkt->md.ct_zone;
+    uint32_t hash = conn_key_hash(&key, hash_basis);
+    bool reply;
+    conn_key_lookup(&key, hash, now, conn, &reply);
 
-static bool
-is_un_nat_conn_valid(const struct conn *un_nat_conn)
-{
-    return un_nat_conn->conn_type == CT_CONN_TYPE_UN_NAT;
+    return *conn ? true : false;
 }
 
 static bool
-conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
-                      struct conn_lookup_ctx *ctx, struct conn *conn,
+conn_update_state_alg(struct dp_packet *pkt, struct conn_lookup_ctx *ctx,
+                      struct conn *conn,
                       const struct nat_action_info_t *nat_action_info,
                       enum ct_alg_ctl_type ct_alg_ctl, long long now,
-                      unsigned bucket, bool *create_new_conn)
-    OVS_REQUIRES(ct->buckets[bucket].lock)
+                      bool *create_new_conn)
 {
     if (is_ftp_ctl(ct_alg_ctl)) {
         /* Keep sequence tracking in sync with the source of the
          * sequence skew. */
         if (ctx->reply != conn->seq_skew_dir) {
-            handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
+            ovs_mutex_lock(&conn->lock.lock);
+            handle_ftp_ctl(ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
                            !!nat_action_info);
-            *create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now,
-                                                bucket);
+            ovs_mutex_unlock(&conn->lock.lock);
+            *create_new_conn = conn_update_state(pkt, ctx, conn, now);
         } else {
-            *create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now,
-                                                bucket);
+            *create_new_conn = conn_update_state(pkt, ctx, conn, now);
 
             if (*create_new_conn == false) {
-                handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
+                ovs_mutex_lock(&conn->lock.lock);
+                handle_ftp_ctl(ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
                                !!nat_action_info);
+                ovs_mutex_unlock(&conn->lock.lock);
             }
         }
         return true;
@@ -1168,76 +1135,60 @@ conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
 }
 
 static void
-process_one(struct conntrack *ct, struct dp_packet *pkt,
-            struct conn_lookup_ctx *ctx, uint16_t zone,
-            bool force, bool commit, long long now, const uint32_t *setmark,
+process_one(struct dp_packet *pkt, struct conn_lookup_ctx *ctx, uint16_t zone,
+            bool force, bool commit, long long now,
+            const uint32_t *setmark,
             const struct ovs_key_ct_labels *setlabel,
             const struct nat_action_info_t *nat_action_info,
             ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
 {
-    struct conn *conn;
-    unsigned bucket = hash_to_bucket(ctx->hash);
-    ct_lock_lock(&ct->buckets[bucket].lock);
-    conn_key_lookup(&ct->buckets[bucket], ctx, now);
-    conn = ctx->conn;
+    bool create_new_conn = false;
+
+    conn_key_lookup(&ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply);
+    struct conn *conn = ctx->conn;
 
     /* Delete found entry if in wrong direction. 'force' implies commit. */
     if (conn && force && ctx->reply) {
-        conn_clean(ct, conn, &ct->buckets[bucket]);
+        ovs_mutex_lock(&ct_lock.lock);
+        conn_clean(conn);
+        ovs_mutex_unlock(&ct_lock.lock);
         conn = NULL;
     }
 
     if (OVS_LIKELY(conn)) {
         if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
-
             ctx->reply = true;
+            struct conn *rev_conn = conn;  /* Save for debugging. */
+            uint32_t hash = conn_key_hash(&conn->rev_key, hash_basis);
+            conn_key_lookup(&ctx->key, hash, now, &conn, &ctx->reply);
 
-            struct conn_lookup_ctx ctx2;
-            ctx2.conn = NULL;
-            ctx2.key = conn->rev_key;
-            ctx2.hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
-
-            ct_lock_unlock(&ct->buckets[bucket].lock);
-            bucket = hash_to_bucket(ctx2.hash);
-
-            ct_lock_lock(&ct->buckets[bucket].lock);
-            conn_key_lookup(&ct->buckets[bucket], &ctx2, now);
-
-            if (ctx2.conn) {
-                conn = ctx2.conn;
-            } else {
-                /* It is a race condition where conn has timed out and removed
-                 * between unlock of the rev_conn and lock of the forward conn;
-                 * nothing to do. */
+            if (!conn) {
                 pkt->md.ct_state |= CS_TRACKED | CS_INVALID;
-                ct_lock_unlock(&ct->buckets[bucket].lock);
+                char *log_msg = xasprintf("Missing master conn %p", rev_conn);
+                ct_print_conn_info(conn, log_msg, VLL_ERR, true, false);
+                free(log_msg);
                 return;
             }
         }
     }
 
-    bool create_new_conn = false;
-    struct conn conn_for_un_nat_copy;
-    conn_for_un_nat_copy.conn_type = CT_CONN_TYPE_DEFAULT;
-
     enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
                                                        helper);
 
     if (OVS_LIKELY(conn)) {
-        if (OVS_LIKELY(!conn_update_state_alg(ct, pkt, ctx, conn,
+        if (OVS_LIKELY(!conn_update_state_alg(pkt, ctx, conn,
                                               nat_action_info,
-                                              ct_alg_ctl, now, bucket,
+                                              ct_alg_ctl, now,
                                               &create_new_conn))) {
-            create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now,
-                                                bucket);
+
+            create_new_conn = conn_update_state(pkt, ctx, conn, now);
         }
         if (nat_action_info && !create_new_conn) {
             handle_nat(pkt, conn, zone, ctx->reply, ctx->icmp_related);
         }
 
-    } else if (check_orig_tuple(ct, pkt, ctx, now, &bucket, &conn,
-                               nat_action_info)) {
-        create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, bucket);
+    } else if (check_orig_tuple(pkt, ctx, now, &conn, nat_action_info)) {
+        create_new_conn = conn_update_state(pkt, ctx, conn, now);
     } else {
         if (ctx->icmp_related) {
             /* An icmp related conn should always be found; no new
@@ -1252,20 +1203,20 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
     struct alg_exp_node alg_exp_entry;
 
     if (OVS_UNLIKELY(create_new_conn)) {
-
-        ct_rwlock_rdlock(&ct->resources_lock);
-        alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key,
-                                     ct->hash_basis,
+        ovs_rwlock_rdlock(&resources_lock.lock);
+        alg_exp = expectation_lookup(&alg_expectations, &ctx->key,
+                                     hash_basis,
                                      alg_src_ip_wc(ct_alg_ctl));
         if (alg_exp) {
             alg_exp_entry = *alg_exp;
             alg_exp = &alg_exp_entry;
         }
-        ct_rwlock_unlock(&ct->resources_lock);
+        ovs_rwlock_unlock(&resources_lock.lock);
 
-        conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
-                              &conn_for_un_nat_copy, helper, alg_exp,
-                              ct_alg_ctl);
+        ovs_mutex_lock(&ct_lock.lock);
+        conn = conn_not_found(pkt, ctx, commit, now, nat_action_info,
+                              helper, alg_exp, ct_alg_ctl);
+        ovs_mutex_unlock(&ct_lock.lock);
     }
 
     write_ct_md(pkt, zone, conn, &ctx->key, alg_exp);
@@ -1278,23 +1229,11 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
         set_label(pkt, conn, &setlabel[0], &setlabel[1]);
     }
 
-    struct conn conn_for_expectation;
-    if (OVS_UNLIKELY((ct_alg_ctl != CT_ALG_CTL_NONE) && conn)) {
-        conn_for_expectation = *conn;
-    }
-
-    ct_lock_unlock(&ct->buckets[bucket].lock);
-
-    if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
-        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
-    }
-
-    handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
-                   &conn_for_expectation);
+    handle_alg_ctl(ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info, conn);
 }
 
 /* Sends the packets in '*pkt_batch' through the connection tracker 'ct'.  All
- * the packets should have the same 'dl_type' (IPv4 or IPv6) and should have
+ * the packets must have the same 'dl_type' (IPv4 or IPv6) and should have
  * the l3 and and l4 offset properly set.
  *
  * If 'commit' is true, the packets are allowed to create new entries in the
@@ -1302,25 +1241,26 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
  * elements array containing a value and a mask to set the connection mark.
  * 'setlabel' behaves similarly for the connection label.*/
 int
-conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
-                  ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
+conntrack_execute(struct conntrack *ct OVS_UNUSED,
+                  struct dp_packet_batch *pkt_batch, ovs_be16 dl_type,
+                  bool force, bool commit, uint16_t zone,
                   const uint32_t *setmark,
                   const struct ovs_key_ct_labels *setlabel,
                   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                   const struct nat_action_info_t *nat_action_info,
                   long long now)
 {
-
     struct dp_packet *packet;
     struct conn_lookup_ctx ctx;
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
-        if (!conn_key_extract(ct, packet, dl_type, &ctx, zone)) {
+        if (packet->md.ct_state == CS_INVALID
+            || !conn_key_extract(packet, dl_type, &ctx, zone)) {
             packet->md.ct_state = CS_INVALID;
             write_ct_md(packet, zone, NULL, NULL, NULL);
             continue;
         }
-        process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
+        process_one(packet, &ctx, zone, force, commit, now, setmark,
                     setlabel, nat_action_info, tp_src, tp_dst, helper);
     }
 
@@ -1368,36 +1308,45 @@ set_label(struct dp_packet *pkt, struct conn *conn,
 }
 
 
-/* Delete the expired connections from 'ctb', up to 'limit'. Returns the
- * earliest expiration time among the remaining connections in 'ctb'.  Returns
- * LLONG_MAX if 'ctb' is empty.  The return value might be smaller than 'now',
- * if 'limit' is reached */
+/* Delete the expired connections, up to 'limit'. Returns the earliest
+ * expiration time among the remaining connections in all expiration lists.
+ * Returns LLONG_MAX if all expiration lists are empty.  The return value
+ * might be smaller than 'now',if 'limit' is reached */
 static long long
-sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb,
-             long long now, size_t limit)
-    OVS_REQUIRES(ctb->lock)
+ct_sweep(long long now, size_t limit)
 {
     struct conn *conn, *next;
     long long min_expiration = LLONG_MAX;
     size_t count = 0;
 
+    ovs_mutex_lock(&ct_lock.lock);
+
     for (unsigned i = 0; i < N_CT_TM; i++) {
-        LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
+        /* Does not need to be 'safe' anymore. */
+        ovs_mutex_lock(&ex_lock.lock);
+        LIST_FOR_EACH_SAFE (conn, next, exp_node, &cm_exp_lists[i]) {
             if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
                 if (!conn_expired(conn, now) || count >= limit) {
                     min_expiration = MIN(min_expiration, conn->expiration);
                     if (count >= limit) {
                         /* Do not check other lists. */
                         COVERAGE_INC(conntrack_long_cleanup);
-                        return min_expiration;
+                        ovs_mutex_unlock(&ex_lock.lock);
+                        goto out;
                     }
                     break;
                 }
-                conn_clean(ct, conn, ctb);
+                conn_clean_sweep(conn);
                 count++;
             }
         }
+        ovs_mutex_unlock(&ex_lock.lock);
     }
+
+out:
+    VLOG_DBG("conntrack cleanup %"PRIuSIZE" entries in %lld msec", count,
+             time_msec() - now);
+    ovs_mutex_unlock(&ct_lock.lock);
     return min_expiration;
 }
 
@@ -1406,52 +1355,17 @@ sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb,
  * 'now', meaning that an internal limit has been reached, and some expired
  * connections have not been deleted. */
 static long long
-conntrack_clean(struct conntrack *ct, long long now)
+conntrack_clean(long long now)
 {
     long long next_wakeup = now + CT_TM_MIN;
-    unsigned int n_conn_limit;
-    size_t clean_count = 0;
+    unsigned int n_conn_limit_;
 
-    atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
-
-    for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conntrack_bucket *ctb = &ct->buckets[i];
-        size_t prev_count;
-        long long min_exp;
-
-        ovs_mutex_lock(&ctb->cleanup_mutex);
-        if (ctb->next_cleanup > now) {
-            goto next_bucket;
-        }
-
-        ct_lock_lock(&ctb->lock);
-        prev_count = hmap_count(&ctb->connections);
-        /* If the connections are well distributed among buckets, we want to
-         * limit to 10% of the global limit equally split among buckets. If
-         * the bucket is busier than the others, we limit to 10% of its
-         * current size. */
-        min_exp = sweep_bucket(ct, ctb, now,
-                MAX(prev_count/10, n_conn_limit/(CONNTRACK_BUCKETS*10)));
-        clean_count += prev_count - hmap_count(&ctb->connections);
-
-        if (min_exp > now) {
-            /* We call hmap_shrink() only if sweep_bucket() managed to delete
-             * every expired connection. */
-            hmap_shrink(&ctb->connections);
-        }
-
-        ct_lock_unlock(&ctb->lock);
-
-        ctb->next_cleanup = MIN(min_exp, now + CT_TM_MIN);
-
-next_bucket:
-        next_wakeup = MIN(next_wakeup, ctb->next_cleanup);
-        ovs_mutex_unlock(&ctb->cleanup_mutex);
-    }
-
-    VLOG_DBG("conntrack cleanup %"PRIuSIZE" entries in %lld msec",
-             clean_count, time_msec() - now);
+    atomic_read_relaxed(&n_conn_limit, &n_conn_limit_);
 
+    long long cm_min_exp =
+        ct_sweep(now, MAX(cmap_count(&cm_conns) / 10,
+                 n_conn_limit_ / 10));
+    next_wakeup = MIN(cm_min_exp, next_wakeup);
     return next_wakeup;
 }
 
@@ -1469,30 +1383,28 @@ next_bucket:
  *   are coping with the current cleanup tasks, then we wait at least
  *   5 seconds to do further cleanup.
  *
- * - We don't want to keep the buckets locked too long, as we might prevent
+ * - We don't want to keep the map locked too long, as we might prevent
  *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
- *   behind, there is at least some 200ms blocks of time when buckets will be
+ *   behind, there is at least some 200ms blocks of time when the map will be
  *   left alone, so the datapath can operate unhindered.
  */
 #define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
 #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
 
 static void *
-clean_thread_main(void *f_)
+clean_thread_main(void *f OVS_UNUSED)
 {
-    struct conntrack *ct = f_;
-
-    while (!latch_is_set(&ct->clean_thread_exit)) {
+    while (!latch_is_set(&clean_thread_exit)) {
         long long next_wake;
         long long now = time_msec();
-        next_wake = conntrack_clean(ct, now);
+        next_wake = conntrack_clean(now);
 
         if (next_wake < now) {
             poll_timer_wait_until(now + CT_CLEAN_MIN_INTERVAL);
         } else {
             poll_timer_wait_until(MAX(next_wake, now + CT_CLEAN_INTERVAL));
         }
-        latch_wait(&ct->clean_thread_exit);
+        latch_wait(&clean_thread_exit);
         poll_block();
     }
 
@@ -1898,7 +1810,7 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
 }
 
 static bool
-conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
+conn_key_extract(struct dp_packet *pkt, ovs_be16 dl_type,
                  struct conn_lookup_ctx *ctx, uint16_t zone)
 {
     const struct eth_header *l2 = dp_packet_eth(pkt);
@@ -1972,7 +1884,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
             /* Validate the checksum only when hwol is not supported. */
             if (extract_l4(&ctx->key, l4, tail - l4, &ctx->icmp_related, l3,
                            !hwol_good_l4_csum)) {
-                ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
+                ctx->hash = conn_key_hash(&ctx->key, hash_basis);
                 return true;
             }
         }
@@ -2113,8 +2025,7 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
 }
 
 static bool
-nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
-                       struct conn *nat_conn)
+nat_select_range_tuple(const struct conn *conn, struct conn *nat_conn)
 {
     enum { MIN_NAT_EPHEMERAL_PORT = 1024,
            MAX_NAT_EPHEMERAL_PORT = 65535 };
@@ -2122,7 +2033,7 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
     uint16_t min_port;
     uint16_t max_port;
     uint16_t first_port;
-    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
+    uint32_t hash = nat_range_hash(conn, hash_basis);
 
     if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
         (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
@@ -2172,7 +2083,9 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
 
     uint16_t port = first_port;
     bool all_ports_tried = false;
-    bool original_ports_tried = false;
+    /* For DNAT, we don't try ephemeral ports. */
+    bool ephemeral_ports_tried =
+        conn->nat_info->nat_action & NAT_ACTION_DST ? true : false;
     struct ct_addr first_addr = ct_addr;
 
     while (true) {
@@ -2191,8 +2104,10 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
             nat_conn->rev_key.src.port = htons(port);
         }
 
-        bool new_insert = nat_conn_keys_insert(&ct->nat_conn_keys, nat_conn,
-                                               ct->hash_basis);
+        uint32_t conn_hash = conn_key_hash(&nat_conn->rev_key, hash_basis);
+        bool new_insert = conn_available(&nat_conn->rev_key, conn_hash,
+                                         time_msec());
+
         if (new_insert) {
             return true;
         } else if (!all_ports_tried) {
@@ -2218,8 +2133,8 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
                 ct_addr = conn->nat_info->min_addr;
             }
             if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
-                if (!original_ports_tried) {
-                    original_ports_tried = true;
+                if (!ephemeral_ports_tried) {
+                    ephemeral_ports_tried = true;
                     ct_addr = conn->nat_info->min_addr;
                     min_port = MIN_NAT_EPHEMERAL_PORT;
                     max_port = MAX_NAT_EPHEMERAL_PORT;
@@ -2235,94 +2150,6 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
     return false;
 }
 
-/* This function must be called with the ct->resources lock taken. */
-static struct nat_conn_key_node *
-nat_conn_keys_lookup(struct hmap *nat_conn_keys,
-                     const struct conn_key *key,
-                     uint32_t basis)
-{
-    struct nat_conn_key_node *nat_conn_key_node;
-
-    HMAP_FOR_EACH_WITH_HASH (nat_conn_key_node, node,
-                             conn_key_hash(key, basis), nat_conn_keys) {
-        if (!conn_key_cmp(&nat_conn_key_node->key, key)) {
-            return nat_conn_key_node;
-        }
-    }
-    return NULL;
-}
-
-/* This function must be called with the ct->resources lock taken. */
-static bool
-nat_conn_keys_insert(struct hmap *nat_conn_keys, const struct conn *nat_conn,
-                     uint32_t basis)
-{
-    struct nat_conn_key_node *nat_conn_key_node =
-        nat_conn_keys_lookup(nat_conn_keys, &nat_conn->rev_key, basis);
-
-    if (!nat_conn_key_node) {
-        struct nat_conn_key_node *nat_conn_key = xzalloc(sizeof *nat_conn_key);
-        nat_conn_key->key = nat_conn->rev_key;
-        nat_conn_key->value = nat_conn->key;
-        hmap_insert(nat_conn_keys, &nat_conn_key->node,
-                    conn_key_hash(&nat_conn_key->key, basis));
-        return true;
-    }
-    return false;
-}
-
-/* This function must be called with the ct->resources write lock taken. */
-static void
-nat_conn_keys_remove(struct hmap *nat_conn_keys,
-                     const struct conn_key *key,
-                     uint32_t basis)
-{
-    struct nat_conn_key_node *nat_conn_key_node;
-
-    HMAP_FOR_EACH_WITH_HASH (nat_conn_key_node, node,
-                             conn_key_hash(key, basis), nat_conn_keys) {
-        if (!conn_key_cmp(&nat_conn_key_node->key, key)) {
-            hmap_remove(nat_conn_keys, &nat_conn_key_node->node);
-            free(nat_conn_key_node);
-            return;
-        }
-    }
-}
-
-static void
-conn_key_lookup(struct conntrack_bucket *ctb, struct conn_lookup_ctx *ctx,
-                long long now)
-    OVS_REQUIRES(ctb->lock)
-{
-    uint32_t hash = ctx->hash;
-    struct conn *conn;
-
-    ctx->conn = NULL;
-
-    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
-        if (!conn_key_cmp(&conn->key, &ctx->key)
-                && !conn_expired(conn, now)) {
-            ctx->conn = conn;
-            ctx->reply = false;
-            break;
-        }
-        if (!conn_key_cmp(&conn->rev_key, &ctx->key)
-                && !conn_expired(conn, now)) {
-            ctx->conn = conn;
-            ctx->reply = true;
-            break;
-        }
-    }
-}
-
-static enum ct_update_res
-conn_update(struct conn *conn, struct conntrack_bucket *ctb,
-            struct dp_packet *pkt, bool reply, long long now)
-{
-    return l4_protos[conn->key.nw_proto]->conn_update(conn, ctb, pkt,
-                                                      reply, now);
-}
-
 static bool
 conn_expired(struct conn *conn, long long now)
 {
@@ -2339,10 +2166,9 @@ valid_new(struct dp_packet *pkt, struct conn_key *key)
 }
 
 static struct conn *
-new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt,
-         struct conn_key *key, long long now)
+new_conn(struct dp_packet *pkt, struct conn_key *key, long long now)
 {
-    struct conn *newconn = l4_protos[key->nw_proto]->new_conn(ctb, pkt, now);
+    struct conn *newconn = l4_protos[key->nw_proto]->new_conn(pkt, now);
     if (newconn) {
         newconn->key = *key;
     }
@@ -2350,11 +2176,40 @@ new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt,
     return newconn;
 }
 
+static enum ct_update_res
+conn_update(struct dp_packet *pkt, struct conn *conn,
+            struct conn_lookup_ctx *ctx, long long now)
+{
+    ovs_mutex_lock(&conn->lock.lock);
+    enum ct_update_res update_res =
+        l4_protos[conn->key.nw_proto]->conn_update(conn, pkt, ctx->reply,
+                                                   now);
+    ovs_mutex_unlock(&conn->lock.lock);
+    return update_res;
+}
+
 static void
 delete_conn(struct conn *conn)
 {
-    free(conn->nat_info);
-    free(conn->alg);
+    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+        free(conn->nat_info);
+        free(conn->alg);
+        ovs_mutex_destroy(&conn->lock.lock);
+        if (conn->nat_conn) {
+            free(conn->nat_conn);
+        }
+        free(conn);
+    }
+}
+
+static void
+delete_conn_one(struct conn *conn)
+{
+    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+        free(conn->nat_info);
+        free(conn->alg);
+        ovs_mutex_destroy(&conn->lock.lock);
+    }
     free(conn);
 }
 
@@ -2487,46 +2342,30 @@ conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump,
     }
 
     dump->ct = ct;
-    *ptot_bkts = CONNTRACK_BUCKETS;
+    *ptot_bkts = 1; /* Need to clean up the callers. */
     return 0;
 }
 
 int
 conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
 {
-    struct conntrack *ct = dump->ct;
     long long now = time_msec();
 
-    while (dump->bucket < CONNTRACK_BUCKETS) {
-        struct hmap_node *node;
-
-        ct_lock_lock(&ct->buckets[dump->bucket].lock);
-        for (;;) {
-            struct conn *conn;
-
-            node = hmap_at_position(&ct->buckets[dump->bucket].connections,
-                                    &dump->bucket_pos);
-            if (!node) {
-                break;
-            }
-            INIT_CONTAINER(conn, node, node);
-            if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
-                 (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
-                conn_to_ct_dpif_entry(conn, entry, now, dump->bucket);
-                break;
-            }
-            /* Else continue, until we find an entry in the appropriate zone
-             * or the bucket has been scanned completely. */
+    for (;;) {
+        struct cmap_node *cm_node = cmap_next_position(&cm_conns,
+                                                       &dump->cm_pos);
+        if (!cm_node) {
+            break;
         }
-        ct_lock_unlock(&ct->buckets[dump->bucket].lock);
-
-        if (!node) {
-            memset(&dump->bucket_pos, 0, sizeof dump->bucket_pos);
-            dump->bucket++;
-        } else {
+        struct conn *conn;
+        INIT_CONTAINER(conn, cm_node, cm_node);
+        if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
+             (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
+            conn_to_ct_dpif_entry(conn, entry, now, 0);
             return 0;
         }
     }
+
     return EOF;
 }
 
@@ -2537,71 +2376,72 @@ conntrack_dump_done(struct conntrack_dump *dump OVS_UNUSED)
 }
 
 int
-conntrack_flush(struct conntrack *ct, const uint16_t *zone)
+conntrack_flush(struct conntrack *ct OVS_UNUSED, const uint16_t *zone)
 {
-    for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conn *conn, *next;
+    struct conn *conn;
 
-        ct_lock_lock(&ct->buckets[i].lock);
-        HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections) {
-            if ((!zone || *zone == conn->key.zone) &&
-                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
-                conn_clean(ct, conn, &ct->buckets[i]);
-            }
+    ovs_mutex_lock(&ct_lock.lock);
+
+    CMAP_FOR_EACH (conn, cm_node, &cm_conns) {
+        if (!zone || *zone == conn->key.zone) {
+            conn_clean_one(conn);
         }
-        ct_lock_unlock(&ct->buckets[i].lock);
     }
 
+    ovs_mutex_unlock(&ct_lock.lock);
+
     return 0;
 }
 
 int
-conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
-                      uint16_t zone)
+conntrack_flush_tuple(struct conntrack *ct OVS_UNUSED,
+                      const struct ct_dpif_tuple *tuple, uint16_t zone)
 {
-    struct conn_lookup_ctx ctx;
     int error = 0;
+    struct conn_lookup_ctx ctx;
 
     memset(&ctx, 0, sizeof(ctx));
     tuple_to_conn_key(tuple, zone, &ctx.key);
-    ctx.hash = conn_key_hash(&ctx.key, ct->hash_basis);
-    unsigned bucket = hash_to_bucket(ctx.hash);
+    ctx.hash = conn_key_hash(&ctx.key, hash_basis);
+
+    ovs_mutex_lock(&ct_lock.lock);
+    conn_key_lookup(&ctx.key, ctx.hash, time_msec(), &ctx.conn, &ctx.reply);
 
-    ct_lock_lock(&ct->buckets[bucket].lock);
-    conn_key_lookup(&ct->buckets[bucket], &ctx, time_msec());
-    if (ctx.conn) {
-        conn_clean(ct, ctx.conn, &ct->buckets[bucket]);
+    if (ctx.conn && ctx.conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+        conn_clean(ctx.conn);
     } else {
         error = ENOENT;
     }
-    ct_lock_unlock(&ct->buckets[bucket].lock);
+
+    ovs_mutex_unlock(&ct_lock.lock);
+
     return error;
 }
 
 int
-conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns)
+conntrack_set_maxconns(struct conntrack *ct OVS_UNUSED, uint32_t maxconns)
 {
-    atomic_store_relaxed(&ct->n_conn_limit, maxconns);
+    atomic_store_relaxed(&n_conn_limit, maxconns);
     return 0;
 }
 
 int
-conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns)
+conntrack_get_maxconns(struct conntrack *ct OVS_UNUSED, uint32_t *maxconns)
 {
-    atomic_read_relaxed(&ct->n_conn_limit, maxconns);
+    atomic_read_relaxed(&n_conn_limit, maxconns);
     return 0;
 }
 
 int
-conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns)
+conntrack_get_nconns(struct conntrack *ct OVS_UNUSED, uint32_t *nconns)
 {
-    *nconns = atomic_count_get(&ct->n_conn);
+    *nconns = atomic_count_get(&n_conn);
     return 0;
 }
 
 /* This function must be called with the ct->resources read lock taken. */
 static struct alg_exp_node *
-expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
+expectation_lookup(struct hmap *alg_expectations_, const struct conn_key *key,
                    uint32_t basis, bool src_ip_wc)
 {
     struct conn_key check_key = *key;
@@ -2615,7 +2455,7 @@ expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
 
     HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
                              conn_key_hash(&check_key, basis),
-                             alg_expectations) {
+                             alg_expectations_) {
         if (!conn_key_cmp(&alg_exp_node->key, &check_key)) {
             return alg_exp_node;
         }
@@ -2625,15 +2465,15 @@ expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
 
 /* This function must be called with the ct->resources write lock taken. */
 static void
-expectation_remove(struct hmap *alg_expectations,
+expectation_remove(struct hmap *alg_expectations_,
                    const struct conn_key *key, uint32_t basis)
 {
     struct alg_exp_node *alg_exp_node;
 
     HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, conn_key_hash(key, basis),
-                             alg_expectations) {
+                             alg_expectations_) {
         if (!conn_key_cmp(&alg_exp_node->key, key)) {
-            hmap_remove(alg_expectations, &alg_exp_node->node);
+            hmap_remove(alg_expectations_, &alg_exp_node->node);
             break;
         }
     }
@@ -2641,7 +2481,7 @@ expectation_remove(struct hmap *alg_expectations,
 
 /* This function must be called with the ct->resources read lock taken. */
 static struct alg_exp_node *
-expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs,
+expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs_,
                               const struct conn_key *master_key,
                               const struct conn_key *alg_exp_key,
                               uint32_t basis)
@@ -2650,7 +2490,7 @@ expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs,
 
     HINDEX_FOR_EACH_WITH_HASH (alg_exp_node, node_ref,
                                conn_key_hash(master_key, basis),
-                               alg_expectation_refs) {
+                               alg_expectation_refs_) {
         if (!conn_key_cmp(&alg_exp_node->master_key, master_key) &&
             !conn_key_cmp(&alg_exp_node->key, alg_exp_key)) {
             return alg_exp_node;
@@ -2661,42 +2501,40 @@ expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs,
 
 /* This function must be called with the ct->resources write lock taken. */
 static void
-expectation_ref_create(struct hindex *alg_expectation_refs,
+expectation_ref_create(struct hindex *alg_expectation_refs_,
                        struct alg_exp_node *alg_exp_node,
                        uint32_t basis)
 {
-    if (!expectation_ref_lookup_unique(alg_expectation_refs,
+    if (!expectation_ref_lookup_unique(alg_expectation_refs_,
                                        &alg_exp_node->master_key,
                                        &alg_exp_node->key, basis)) {
-        hindex_insert(alg_expectation_refs, &alg_exp_node->node_ref,
+        hindex_insert(alg_expectation_refs_, &alg_exp_node->node_ref,
                       conn_key_hash(&alg_exp_node->master_key, basis));
     }
 }
 
 static void
-expectation_clean(struct conntrack *ct, const struct conn_key *master_key,
-                  uint32_t basis)
+expectation_clean(const struct conn_key *master_key)
 {
-    ct_rwlock_wrlock(&ct->resources_lock);
+    ovs_rwlock_wrlock(&resources_lock.lock);
 
     struct alg_exp_node *node, *next;
     HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref,
-                                    conn_key_hash(master_key, basis),
-                                    &ct->alg_expectation_refs) {
+                                    conn_key_hash(master_key, hash_basis),
+                                    &alg_expectation_refs) {
         if (!conn_key_cmp(&node->master_key, master_key)) {
-            expectation_remove(&ct->alg_expectations, &node->key, basis);
-            hindex_remove(&ct->alg_expectation_refs, &node->node_ref);
+            expectation_remove(&alg_expectations, &node->key, hash_basis);
+            hindex_remove(&alg_expectation_refs, &node->node_ref);
             free(node);
         }
     }
 
-    ct_rwlock_unlock(&ct->resources_lock);
+    ovs_rwlock_unlock(&resources_lock.lock);
 }
 
 static void
-expectation_create(struct conntrack *ct, ovs_be16 dst_port,
-                   const struct conn *master_conn, bool reply, bool src_ip_wc,
-                   bool skip_nat)
+expectation_create(ovs_be16 dst_port, const struct conn *master_conn,
+                   bool reply, bool src_ip_wc, bool skip_nat)
 {
     struct ct_addr src_addr;
     struct ct_addr dst_addr;
@@ -2739,21 +2577,21 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
     /* Take the write lock here because it is almost 100%
      * likely that the lookup will fail and
      * expectation_create() will be called below. */
-    ct_rwlock_wrlock(&ct->resources_lock);
+    ovs_rwlock_wrlock(&resources_lock.lock);
     struct alg_exp_node *alg_exp = expectation_lookup(
-        &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis, src_ip_wc);
+        &alg_expectations, &alg_exp_node->key, hash_basis, src_ip_wc);
     if (alg_exp) {
         free(alg_exp_node);
-        ct_rwlock_unlock(&ct->resources_lock);
+        ovs_rwlock_unlock(&resources_lock.lock);
         return;
     }
 
     alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr;
-    hmap_insert(&ct->alg_expectations, &alg_exp_node->node,
-                conn_key_hash(&alg_exp_node->key, ct->hash_basis));
-    expectation_ref_create(&ct->alg_expectation_refs, alg_exp_node,
-                           ct->hash_basis);
-    ct_rwlock_unlock(&ct->resources_lock);
+    hmap_insert(&alg_expectations, &alg_exp_node->node,
+                conn_key_hash(&alg_exp_node->key, hash_basis));
+    expectation_ref_create(&alg_expectation_refs, alg_exp_node,
+                           hash_basis);
+    ovs_rwlock_unlock(&resources_lock.lock);
 }
 
 static uint8_t
@@ -2881,8 +2719,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
 }
 
 static enum ftp_ctl_pkt
-process_ftp_ctl_v4(struct conntrack *ct,
-                   struct dp_packet *pkt,
+process_ftp_ctl_v4(struct dp_packet *pkt,
                    const struct conn *conn_for_expectation,
                    ovs_be32 *v4_addr_rep,
                    char **ftp_data_v4_start,
@@ -3011,7 +2848,7 @@ process_ftp_ctl_v4(struct conntrack *ct,
         return CT_FTP_CTL_INVALID;
     }
 
-    expectation_create(ct, port, conn_for_expectation,
+    expectation_create(port, conn_for_expectation,
                        !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
     return CT_FTP_CTL_INTEREST;
 }
@@ -3026,8 +2863,7 @@ skip_ipv6_digits(char *str)
 }
 
 static enum ftp_ctl_pkt
-process_ftp_ctl_v6(struct conntrack *ct,
-                   struct dp_packet *pkt,
+process_ftp_ctl_v6(struct dp_packet *pkt,
                    const struct conn *conn_for_expectation,
                    struct ct_addr *v6_addr_rep,
                    char **ftp_data_start,
@@ -3114,7 +2950,7 @@ process_ftp_ctl_v6(struct conntrack *ct,
         OVS_NOT_REACHED();
     }
 
-    expectation_create(ct, port, conn_for_expectation,
+    expectation_create(port, conn_for_expectation,
                        !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
     return CT_FTP_CTL_INTEREST;
 }
@@ -3162,8 +2998,7 @@ repl_ftp_v6_addr(struct dp_packet *pkt, struct ct_addr v6_addr_rep,
 }
 
 static void
-handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
-               struct dp_packet *pkt,
+handle_ftp_ctl(const struct conn_lookup_ctx *ctx, struct dp_packet *pkt,
                const struct conn *conn_for_expectation,
                long long now, enum ftp_ctl_pkt ftp_ctl, bool nat)
 {
@@ -3192,12 +3027,12 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
     } else if (ftp_ctl == CT_FTP_CTL_INTEREST) {
         enum ftp_ctl_pkt rc;
         if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
-            rc = process_ftp_ctl_v6(ct, pkt, conn_for_expectation,
+            rc = process_ftp_ctl_v6(pkt, conn_for_expectation,
                                     &v6_addr_rep, &ftp_data_start,
                                     &addr_offset_from_ftp_data_start,
                                     &addr_size, &mode);
         } else {
-            rc = process_ftp_ctl_v4(ct, pkt, conn_for_expectation,
+            rc = process_ftp_ctl_v4(pkt, conn_for_expectation,
                                     &v4_addr_rep, &ftp_data_start,
                                     &addr_offset_from_ftp_data_start);
         }
@@ -3217,7 +3052,7 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
                     ip_len = ntohs(nh6->ip6_ctlun.ip6_un1.ip6_un1_plen);
                     ip_len += seq_skew;
                     nh6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(ip_len);
-                    conn_seq_skew_set(ct, &conn_for_expectation->key, now,
+                    conn_seq_skew_set(&conn_for_expectation->key, now,
                                       seq_skew, ctx->reply);
                 }
             } else {
@@ -3229,7 +3064,7 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
                     l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
                                           l3_hdr->ip_tot_len, htons(ip_len));
                     l3_hdr->ip_tot_len = htons(ip_len);
-                    conn_seq_skew_set(ct, &conn_for_expectation->key, now,
+                    conn_seq_skew_set(&conn_for_expectation->key, now,
                                       seq_skew, ctx->reply);
                 }
             }
@@ -3286,14 +3121,13 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
 }
 
 static void
-handle_tftp_ctl(struct conntrack *ct,
-                const struct conn_lookup_ctx *ctx OVS_UNUSED,
+handle_tftp_ctl(const struct conn_lookup_ctx *ctx OVS_UNUSED,
                 struct dp_packet *pkt,
                 const struct conn *conn_for_expectation,
                 long long now OVS_UNUSED,
                 enum ftp_ctl_pkt ftp_ctl OVS_UNUSED, bool nat OVS_UNUSED)
 {
-    expectation_create(ct, conn_for_expectation->key.src.port,
+    expectation_create(conn_for_expectation->key.src.port,
                        conn_for_expectation,
                        !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
 }
diff --git a/lib/conntrack.h b/lib/conntrack.h
index e3a5dcc..a9cc2eb 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2018 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 
 #include <stdbool.h>
 
+#include "cmap.h"
 #include "latch.h"
 #include "odp-netlink.h"
 #include "openvswitch/hmap.h"
@@ -44,12 +45,6 @@
  *
  *     conntrack_init(&ct);
  *
- * It is necessary to periodically issue a call to
- *
- *     conntrack_run(&ct);
- *
- * to allow the module to clean up expired connections.
- *
  * To send a group of packets through the connection tracker:
  *
  *     conntrack_execute(&ct, pkts, n_pkts, ...);
@@ -104,6 +99,7 @@ struct conntrack_dump {
     struct conntrack *ct;
     unsigned bucket;
     struct hmap_position bucket_pos;
+    struct cmap_position cm_pos;
     bool filter_zone;
     uint16_t zone;
 };
@@ -123,176 +119,12 @@ int conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns);
 int conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns);
 int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns);
 
-/* 'struct ct_lock' is a wrapper for an adaptive mutex.  It's useful to try
- * different types of locks (e.g. spinlocks) */
-
-struct OVS_LOCKABLE ct_lock {
-    struct ovs_mutex lock;
-};
 
-struct OVS_LOCKABLE ct_rwlock {
-    struct ovs_rwlock lock;
+struct conntrack {
+    int empty;
+    /* No internal members are exported; 'empty' satisfies Windows build  */
 };
 
-static inline void ct_lock_init(struct ct_lock *lock)
-{
-    ovs_mutex_init_adaptive(&lock->lock);
-}
-
-static inline void ct_lock_lock(struct ct_lock *lock)
-    OVS_ACQUIRES(lock)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_mutex_lock(&lock->lock);
-}
-
-static inline void ct_lock_unlock(struct ct_lock *lock)
-    OVS_RELEASES(lock)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_mutex_unlock(&lock->lock);
-}
-
-static inline void ct_lock_destroy(struct ct_lock *lock)
-{
-    ovs_mutex_destroy(&lock->lock);
-}
-
-static inline void ct_rwlock_init(struct ct_rwlock *lock)
-{
-    ovs_rwlock_init(&lock->lock);
-}
-
-
-static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
-    OVS_ACQ_WRLOCK(lock)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_rwlock_wrlock(&lock->lock);
-}
-
-static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
-    OVS_ACQ_RDLOCK(lock)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_rwlock_rdlock(&lock->lock);
-}
-
-static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
-    OVS_RELEASES(lock)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_rwlock_unlock(&lock->lock);
-}
-
-static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
-{
-    ovs_rwlock_destroy(&lock->lock);
-}
-
 
-/* Timeouts: all the possible timeout states passed to update_expiration()
- * are listed here. The name will be prefix by CT_TM_ and the value is in
- * milliseconds */
-#define CT_TIMEOUTS \
-    CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
-    CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
-    CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
-    CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
-    CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
-    CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
-    CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
-    CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
-    CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
-    CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
-    CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
-
-/* The smallest of the above values: it is used as an upper bound for the
- * interval between two rounds of cleanup of expired entries */
-#define CT_TM_MIN (30 * 1000)
-
-#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
-    CT_TIMEOUTS
-#undef CT_TIMEOUT
-
-enum ct_timeout {
-#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
-    CT_TIMEOUTS
-#undef CT_TIMEOUT
-    N_CT_TM
-};
-
-/* Locking:
- *
- * The connections are kept in different buckets, which are completely
- * independent. The connection bucket is determined by the hash of its key.
- *
- * Each bucket has two locks. Acquisition order is, from outermost to
- * innermost:
- *
- *    cleanup_mutex
- *    lock
- *
- * */
-struct conntrack_bucket {
-    /* Protects 'connections' and 'exp_lists'.  Used in the fast path */
-    struct ct_lock lock;
-    /* Contains the connections in the bucket, indexed by 'struct conn_key' */
-    struct hmap connections OVS_GUARDED;
-    /* For each possible timeout we have a list of connections. When the
-     * timeout of a connection is updated, we move it to the back of the list.
-     * Since the connection in a list have the same relative timeout, the list
-     * will be ordered, with the oldest connections to the front. */
-    struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
-
-    /* Protects 'next_cleanup'. Used to make sure that there's only one thread
-     * performing the cleanup. */
-    struct ovs_mutex cleanup_mutex;
-    long long next_cleanup OVS_GUARDED;
-};
-
-#define CONNTRACK_BUCKETS_SHIFT 8
-#define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)
-
-struct conntrack {
-    /* Independent buckets containing the connections */
-    struct conntrack_bucket buckets[CONNTRACK_BUCKETS];
-
-    /* Salt for hashing a connection key. */
-    uint32_t hash_basis;
-
-    /* The thread performing periodic cleanup of the connection
-     * tracker */
-    pthread_t clean_thread;
-    /* Latch to destroy the 'clean_thread' */
-    struct latch clean_thread_exit;
-
-    /* Number of connections currently in the connection tracker. */
-    atomic_count n_conn;
-    /* Connections limit. When this limit is reached, no new connection
-     * will be accepted. */
-    atomic_uint n_conn_limit;
-
-    /* The following resources are referenced during nat connection
-     * creation and deletion. */
-    struct hmap nat_conn_keys OVS_GUARDED;
-    /* Hash table for alg expectations. Expectations are created
-     * by control connections to help create data connections. */
-    struct hmap alg_expectations OVS_GUARDED;
-    /* Used to lookup alg expectations from the control context. */
-    struct hindex alg_expectation_refs OVS_GUARDED;
-    /* Expiry list for alg expectations. */
-    struct ovs_list alg_exp_list OVS_GUARDED;
-    /* This lock is used during NAT connection creation and deletion;
-     * it is taken after a bucket lock and given back before that
-     * bucket unlock.
-     * This lock is similarly used to guard alg_expectations and
-     * alg_expectation_refs. If a bucket lock is also held during
-     * the normal code flow, then is must be taken first and released
-     * last.
-     */
-    struct ct_rwlock resources_lock;
-
-};
 
 #endif /* conntrack.h */
-- 
1.9.1



More information about the dev mailing list