[ovs-dev] [PATCH v1 9/9] conntrack: Use an atomic conn expiration value

Gaetan Rivet grive at u256.net
Wed Feb 17 16:34:43 UTC 2021


A lock is taken during conn_lookup() to check whether a connection is
expired before returning it. This lock can have some contention.

Even though this lock ensures a consistent sequence of writes, it does
not imply a specific order. A ct_clean thread taking the lock first
could read a value that would be updated immediately after by a PMD
waiting on the same lock, just as well as the opposite order.

As such, the expiration time can be stale anytime it is read. In this
context, using an atomic will ensure the same write consistency while
keeping the same (lack of) guarantee for reads. Reading the atomic will
however be less costly than taking and releasing the lock.

Signed-off-by: Gaetan Rivet <grive at u256.net>
Reviewed-by: Eli Britstein <elibr at nvidia.com>
---
 lib/conntrack-private.h |  2 +-
 lib/conntrack-tp.c      |  2 +-
 lib/conntrack.c         | 25 +++++++++++++++----------
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index e8ffb5bf9..7f3f8714a 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -148,7 +148,7 @@ struct conn {
     struct ovs_mutex lock; /* Guards all mutable fields. */
     struct conn_expire exp;
     ovs_u128 label;
-    long long expiration;
+    atomic_llong expiration;
     uint32_t mark;
     int seq_skew;
 
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index 01ffa30df..a4ea19c1b 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -276,7 +276,7 @@ conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
     OVS_REQUIRES(conn->lock)
 {
     conn_expire_init(conn, ct);
-    conn->expiration = now + tp_value * 1000;
+    atomic_store_relaxed(&conn->expiration, now + tp_value * 1000);
     conn->exp.tm = tm;
     if (!atomic_flag_test_and_set(&conn->exp.insert_once)) {
         ovsrcu_postpone(conn_expire_insert, conn);
diff --git a/lib/conntrack.c b/lib/conntrack.c
index e042683aa..193d550cb 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -99,6 +99,7 @@ static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
                                       struct dp_packet *pkt,
                                       struct conn_lookup_ctx *ctx,
                                       long long now);
+static long long int conn_expiration(const struct conn *);
 static bool conn_expired(struct conn *, long long now);
 static void set_mark(struct dp_packet *, struct conn *,
                      uint32_t val, uint32_t mask);
@@ -1555,10 +1556,8 @@ ct_sweep(struct conntrack *ct, long long now, size_t limit)
 
     for (unsigned i = 0; i < N_CT_TM; i++) {
         RCULIST_FOR_EACH (conn, exp.node, &ct->exp_lists[i]) {
-            ovs_mutex_lock(&conn->lock);
-            if (now < conn->expiration || count >= limit) {
-                min_expiration = MIN(min_expiration, conn->expiration);
-                ovs_mutex_unlock(&conn->lock);
+            if (!conn_expired(conn, now) || count >= limit) {
+                min_expiration = MIN(min_expiration, conn_expiration(conn));
                 if (count >= limit) {
                     /* Do not check other lists. */
                     COVERAGE_INC(conntrack_long_cleanup);
@@ -1566,7 +1565,6 @@ ct_sweep(struct conntrack *ct, long long now, size_t limit)
                 }
                 break;
             } else {
-                ovs_mutex_unlock(&conn->lock);
                 conn_clean(ct, conn);
             }
             count++;
@@ -2395,14 +2393,21 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
     return update_res;
 }
 
+static long long int
+conn_expiration(const struct conn *conn)
+{
+    long long int expiration;
+
+    atomic_read_relaxed(&CONST_CAST(struct conn *, conn)->expiration,
+                        &expiration);
+    return expiration;
+}
+
 static bool
 conn_expired(struct conn *conn, long long now)
 {
     if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_lock(&conn->lock);
-        bool expired = now >= conn->expiration ? true : false;
-        ovs_mutex_unlock(&conn->lock);
-        return expired;
+        return now >= conn_expiration(conn);
     }
     return false;
 }
@@ -2545,7 +2550,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry,
     entry->mark = conn->mark;
     memcpy(&entry->labels, &conn->label, sizeof entry->labels);
 
-    long long expiration = conn->expiration - now;
+    long long expiration = conn_expiration(conn) - now;
 
     struct ct_l4_proto *class = l4_protos[conn->key.nw_proto];
     if (class->conn_get_protoinfo) {
-- 
2.30.0



More information about the dev mailing list