[ovs-dev] [PATCHv2] userspace: Add conntrack timeout policy support.

William Tu u9012063 at gmail.com
Tue Apr 7 21:07:09 UTC 2020


Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
policy support") adds conntrack timeout policy for kernel datapath.
This patch enables support for the userspace datapath.  I tested
using the 'make check-system-userspace' which checks the timeout
policies for ICMP and UDP cases.

Signed-off-by: William Tu <u9012063 at gmail.com>
---
v2:
  - enable icmp_reply timeout for userspace datapath
  - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/672248069
---
 Documentation/faq/releases.rst   |   2 +-
 NEWS                             |   2 +
 lib/automake.mk                  |   2 +
 lib/conntrack-icmp.c             |   7 +-
 lib/conntrack-other.c            |   9 +-
 lib/conntrack-private.h          |  27 +++--
 lib/conntrack-tcp.c              |  22 ++--
 lib/conntrack-tp.c               | 247 +++++++++++++++++++++++++++++++++++++++
 lib/conntrack-tp.h               |  24 ++++
 lib/conntrack.c                  | 159 ++++++++++++++++++++++++-
 lib/conntrack.h                  |  14 ++-
 lib/dpif-netdev.c                |  74 ++++++++++--
 tests/system-traffic.at          |   4 +-
 tests/system-userspace-macros.at |   6 +-
 tests/test-conntrack.c           |   6 +-
 15 files changed, 563 insertions(+), 42 deletions(-)
 create mode 100644 lib/conntrack-tp.c
 create mode 100644 lib/conntrack-tp.h

diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index b3507bd1c7fa..ffcda37f9985 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
     ========================== ============== ============== ========= =======
     Connection tracking             4.3            2.5          2.6      YES
     Conntrack Fragment Reass.       4.3            2.6          2.12     YES
-    Conntrack Timeout Policies      5.2            2.12         NO       NO
+    Conntrack Timeout Policies      5.2            2.12         2.13     NO
     Conntrack Zone Limit            4.18           2.10         2.13     YES
     Conntrack NAT                   4.6            2.6          2.8      YES
     Tunnel - LISP                   NO             2.11         NO       NO
diff --git a/NEWS b/NEWS
index 70bd17584594..1e6af8f57bdd 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ Post-v2.13.0
      * Deprecated DPDK ring ports (dpdkr) are no longer supported.
    - Linux datapath:
      * Support for kernel versions up to 5.5.x.
+   - Userspace datapath:
+     * Add support for conntrack zone-based timeout policy.
 
 
 v2.13.0 - 14 Feb 2020
diff --git a/lib/automake.mk b/lib/automake.mk
index 95925b57c351..86940ccd2f9e 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -53,6 +53,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/conntrack-icmp.c \
 	lib/conntrack-private.h \
 	lib/conntrack-tcp.c \
+	lib/conntrack-tp.c \
+	lib/conntrack-tp.h \
 	lib/conntrack-other.c \
 	lib/conntrack.c \
 	lib/conntrack.h \
diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
index 63246f0124d0..81393a6f1846 100644
--- a/lib/conntrack-icmp.c
+++ b/lib/conntrack-icmp.c
@@ -22,6 +22,7 @@
 #include <netinet/icmp6.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM icmp_state {
@@ -51,7 +52,8 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_,
 {
     struct conn_icmp *conn = conn_icmp_cast(conn_);
     conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
-    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+    conn_update_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+                                   now);
 
     return CT_UPDATE_VALID;
 }
@@ -80,7 +82,8 @@ icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
 {
     struct conn_icmp *conn = xzalloc(sizeof *conn);
     conn->state = ICMPS_FIRST;
-    conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+    conn_init_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state],
+                                 now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index de22ef87cc19..cabd51964c08 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -17,6 +17,7 @@
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM other_state {
@@ -56,8 +57,9 @@ other_conn_update(struct conntrack *ct, struct conn *conn_,
         ret = CT_UPDATE_VALID_NEW;
     }
 
-    conn_update_expiration(ct, &conn->up, other_timeouts[conn->state], now);
-
+    conn_update_expiration_with_tp(ct, &conn->up,
+                                   other_timeouts[conn->state],
+                                   now);
     return ret;
 }
 
@@ -76,7 +78,8 @@ other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
     conn = xzalloc(sizeof *conn);
     conn->state = OTHERS_FIRST;
 
-    conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now);
+    conn_init_expiration_with_tp(ct, &conn->up, other_timeouts[conn->state],
+                                 now);
 
     return &conn->up;
 }
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 9a8ca3910157..852482303684 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -118,6 +118,8 @@ struct conn {
     /* Immutable data. */
     bool alg_related; /* True if alg data connection. */
     enum ct_conn_type conn_type;
+
+    uint32_t tpid; /* Timeout policy ID. */
 };
 
 enum ct_update_res {
@@ -143,9 +145,9 @@ enum ct_update_res {
     CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
     CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
 
-/* The smallest of the above values: it is used as an upper bound for the
- * interval between two rounds of cleanup of expired entries */
-#define CT_TM_MIN (30 * 1000)
+/* This is used as an upper bound for the interval between two
+ * rounds of cleanup of expired entries */
+#define CT_TM_MIN (1 * 1000)
 
 #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
     CT_TIMEOUTS
@@ -163,6 +165,7 @@ struct conntrack {
     struct cmap conns OVS_GUARDED;
     struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
     struct hmap zone_limits OVS_GUARDED;
+    struct hmap timeout_policies OVS_GUARDED;
     uint32_t hash_basis; /* Salt for hashing a connection key. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
     struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
@@ -212,16 +215,22 @@ extern long long ct_timeout_val[];
 /* ct_lock must be held. */
 static inline void
 conn_init_expiration(struct conntrack *ct, struct conn *conn,
-                     enum ct_timeout tm, long long now)
+                     enum ct_timeout tm, long long now,
+                     uint32_t tp_value, bool use_default)
 {
-    conn->expiration = now + ct_timeout_val[tm];
+    if (use_default) {
+        conn->expiration = now + ct_timeout_val[tm];
+    } else {
+        conn->expiration = now + tp_value * 1000;
+    }
     ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
 }
 
 /* The conn entry lock must be held on entry and exit. */
 static inline void
 conn_update_expiration(struct conntrack *ct, struct conn *conn,
-                       enum ct_timeout tm, long long now)
+                       enum ct_timeout tm, long long now,
+                       uint32_t tp_value, bool use_default)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     ovs_mutex_unlock(&conn->lock);
@@ -229,7 +238,11 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn,
     ovs_mutex_lock(&ct->ct_lock);
     ovs_mutex_lock(&conn->lock);
     if (!conn->cleaned) {
-        conn->expiration = now + ct_timeout_val[tm];
+        if (use_default) {
+            conn->expiration = now + ct_timeout_val[tm];
+        } else {
+            conn->expiration = now + tp_value * 1000;
+        }
         ovs_list_remove(&conn->exp_node);
         ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
     }
diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
index 47261c7551d1..194c5a1ba410 100644
--- a/lib/conntrack-tcp.c
+++ b/lib/conntrack-tcp.c
@@ -39,6 +39,7 @@
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "coverage.h"
 #include "ct-dpif.h"
 #include "dp-packet.h"
@@ -188,7 +189,8 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
             return CT_UPDATE_NEW;
         } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
             src->state = CT_DPIF_TCPS_SYN_SENT;
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now);
+            conn_update_expiration_with_tp(ct, &conn->up,
+                                           CT_TM_TCP_FIRST_PACKET, now);
             return CT_UPDATE_VALID_NEW;
         }
     }
@@ -339,18 +341,23 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
 
         if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
             && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSED, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSED,
+                                           now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    && dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIN_WAIT, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_FIN_WAIT,
+                                           now);
         } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
                    || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_OPENING, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_OPENING,
+                                           now);
         } else if (src->state >= CT_DPIF_TCPS_CLOSING
                    || dst->state >= CT_DPIF_TCPS_CLOSING) {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSING, now);
+            conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSING,
+                                           now);
         } else {
-            conn_update_expiration(ct, &conn->up, CT_TM_TCP_ESTABLISHED, now);
+            conn_update_expiration_with_tp(ct, &conn->up,
+                                           CT_TM_TCP_ESTABLISHED, now);
         }
     } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
                 || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
@@ -471,7 +478,8 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
     src->state = CT_DPIF_TCPS_SYN_SENT;
     dst->state = CT_DPIF_TCPS_CLOSED;
 
-    conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
+    conn_init_expiration_with_tp(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET,
+                                 now);
 
     return &newconn->up;
 }
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
new file mode 100644
index 000000000000..6be4b8276340
--- /dev/null
+++ b/lib/conntrack-tp.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "conntrack-private.h"
+#include "conntrack-tp.h"
+#include "dp-packet.h"
+
+#include "openvswitch/vlog.h"
+VLOG_DEFINE_THIS_MODULE(conntrack_tp);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static const char *ct_timeout_str[] = {
+#define CT_TIMEOUT(NAME, VALUE) #NAME
+    CT_TIMEOUTS
+#undef CT_TIMEOUT
+};
+
+static inline bool
+check_present_and_set(struct timeout_policy *tp, uint32_t *v,
+                      enum ct_dpif_tp_attr attr)
+{
+    if (tp && (tp->p.present & (1 << attr))) {
+        *v = tp->p.attrs[attr];
+        return true;
+    }
+    return false;
+}
+
+#define TP_HAS_FUNC(ATTR)  \
+static bool \
+tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v)              \
+{                                                                  \
+    return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR);   \
+}
+
+/* These functions check whether the timeout value is set from the
+ * present bit.  If true, then set the value to '*v'.  For meaning
+ * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
+ */
+TP_HAS_FUNC(TCP_SYN_SENT)
+TP_HAS_FUNC(TCP_SYN_RECV)
+TP_HAS_FUNC(TCP_ESTABLISHED)
+TP_HAS_FUNC(TCP_FIN_WAIT)
+TP_HAS_FUNC(TCP_TIME_WAIT)
+TP_HAS_FUNC(TCP_CLOSE)
+TP_HAS_FUNC(UDP_FIRST)
+TP_HAS_FUNC(UDP_SINGLE)
+TP_HAS_FUNC(UDP_MULTIPLE)
+TP_HAS_FUNC(ICMP_FIRST)
+TP_HAS_FUNC(ICMP_REPLY)
+
+static bool
+conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+                                   enum ct_timeout tm, long long now,
+                                   struct timeout_policy *tp)
+{
+    uint32_t val;
+
+    switch (tm) {
+    case CT_TM_TCP_FIRST_PACKET:
+        if (tp_has_TCP_SYN_SENT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_OPENING:
+        if (tp_has_TCP_SYN_RECV(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_ESTABLISHED:
+        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSING:
+        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_FIN_WAIT:
+        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSED:
+        if (tp_has_TCP_CLOSE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_FIRST:
+        if (tp_has_UDP_FIRST(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_BIDIR:
+        if (tp_has_UDP_SINGLE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_OTHER_MULTIPLE:
+        if (tp_has_UDP_MULTIPLE(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_ICMP_FIRST:
+        if (tp_has_ICMP_FIRST(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case CT_TM_ICMP_REPLY:
+        if (tp_has_ICMP_REPLY(tp, &val)) {
+            goto update_with_val;
+        }
+        break;
+    case N_CT_TM:
+    default:
+        OVS_NOT_REACHED();
+        break;
+    }
+    return false;
+
+update_with_val:
+    conn_update_expiration(ct, conn, tm, now, val, false);
+    return true;
+}
+
+static bool
+conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
+                                 enum ct_timeout tm, long long now,
+                                 struct timeout_policy *tp)
+{
+    uint32_t val;
+
+    switch (tm) {
+    case CT_TM_TCP_FIRST_PACKET:
+        if (tp_has_TCP_SYN_SENT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_OPENING:
+        if (tp_has_TCP_SYN_RECV(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_ESTABLISHED:
+        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSING:
+        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_FIN_WAIT:
+        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_TCP_CLOSED:
+        if (tp_has_TCP_CLOSE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_FIRST:
+        if (tp_has_UDP_FIRST(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_BIDIR:
+        if (tp_has_UDP_SINGLE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_OTHER_MULTIPLE:
+        if (tp_has_UDP_MULTIPLE(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_ICMP_FIRST:
+        if (tp_has_ICMP_FIRST(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case CT_TM_ICMP_REPLY:
+        if (tp_has_ICMP_REPLY(tp, &val)) {
+            goto init_with_val;
+        }
+        break;
+    case N_CT_TM:
+    default:
+        OVS_NOT_REACHED();
+        break;
+    }
+    return false;
+
+init_with_val:
+    conn_init_expiration(ct, conn, tm, now, val, false);
+    return true;
+}
+
+void
+conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                             enum ct_timeout tm, long long now)
+{
+    struct timeout_policy *tp;
+
+    tp = timeout_policy_lookup(ct, conn->tpid);
+    if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
+        VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
+                    ct_timeout_str[tm]);
+    } else {
+        /* Init with default. */
+        conn_init_expiration(ct, conn, tm, now, 0, true);
+    }
+}
+
+void
+conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                               enum ct_timeout tm, long long now)
+{
+    struct timeout_policy *tp;
+
+    tp = timeout_policy_lookup(ct, conn->tpid);
+    if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
+        VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
+                    ct_timeout_str[tm]);
+    } else {
+        /* Update with default. */
+        conn_update_expiration(ct, conn, tm, now, 0, true);
+    }
+}
diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
new file mode 100644
index 000000000000..34048ac2aeea
--- /dev/null
+++ b/lib/conntrack-tp.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONNTRACK_TP_H
+#define CONNTRACK_TP_H 1
+
+void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                                  enum ct_timeout tm, long long now);
+void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
+                                    enum ct_timeout tm, long long now);
+#endif
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 0cbc8f6d2b25..de934bdf7169 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -143,6 +143,12 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
 static void
 expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
 
+static void timeout_policy_init(struct conntrack *ct);
+static int timeout_policy_create(struct conntrack *ct,
+                                 struct timeout_policy *tp);
+static void timeout_policy_clean(struct conntrack *ct,
+                                 struct timeout_policy *tp);
+
 static struct ct_l4_proto *l4_protos[] = {
     [IPPROTO_TCP] = &ct_proto_tcp,
     [IPPROTO_UDP] = &ct_proto_other,
@@ -312,6 +318,7 @@ conntrack_init(void)
     }
     hmap_init(&ct->zone_limits);
     ct->zone_limit_seq = 0;
+    timeout_policy_init(ct);
     ovs_mutex_unlock(&ct->ct_lock);
 
     ct->hash_basis = random_uint32();
@@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
     return 0;
 }
 
+struct timeout_policy *
+timeout_policy_get(struct conntrack *ct, int32_t tpid)
+{
+    struct timeout_policy *tp;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    tp = timeout_policy_lookup(ct, tpid);
+    if (!tp) {
+        ovs_mutex_unlock(&ct->ct_lock);
+        return NULL;
+    }
+
+    ovs_mutex_unlock(&ct->ct_lock);
+    return tp;
+}
+
+struct timeout_policy *
+timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy *tp;
+    uint32_t hash;
+
+    hash = zone_key_hash(tpid, ct->hash_basis);
+    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
+        if (tp->p.id == tpid) {
+            return tp;
+        }
+    }
+    return NULL;
+}
+
+static bool
+is_valid_tpid(uint32_t tpid OVS_UNUSED)
+{
+    return true;
+}
+
+static int
+timeout_policy_create(struct conntrack *ct,
+                      struct timeout_policy *new_tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    uint32_t tpid = new_tp->p.id;
+    uint32_t hash;
+
+    if (is_valid_tpid(tpid)) {
+        struct timeout_policy *tp;
+
+        tp = xzalloc(sizeof *tp);
+        memcpy(&tp->p, &new_tp->p, sizeof tp->p);
+
+        hash = zone_key_hash(tpid, ct->hash_basis);
+        hmap_insert(&ct->timeout_policies, &tp->node, hash);
+
+        return 0;
+    } else {
+        return EINVAL;
+    }
+}
+
+static void
+update_existing_tp(struct timeout_policy *tp_dst,
+                   struct timeout_policy *tp_src)
+{
+    struct ct_dpif_timeout_policy *dst, *src;
+    int i;
+
+    dst = &tp_dst->p;
+    src = &tp_src->p;
+
+    /* Set the value to dst if present bit in src is set. */
+    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
+        if (src->present & (1 << i)) {
+            dst->attrs[i] = src->attrs[i];
+            dst->present |= (1 << i);
+        }
+    }
+}
+
+int
+timeout_policy_update(struct conntrack *ct, struct timeout_policy *new_tp)
+{
+    int err = 0;
+    uint32_t tpid = new_tp->p.id;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+    if (tp) {
+        VLOG_INFO("Changed timeout policy of existing tpid %d", tpid);
+        update_existing_tp(tp, new_tp);
+    } else {
+        err = timeout_policy_create(ct, new_tp);
+        if (err) {
+            VLOG_WARN("Request to create timeout policy failed");
+        } else {
+            VLOG_INFO("Created timeout policy tpid %d", tpid);
+        }
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return err;
+}
+
+static void
+timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    hmap_remove(&ct->timeout_policies, &tp->node);
+    free(tp);
+}
+
+int
+timeout_policy_delete(struct conntrack *ct, uint32_t tpid)
+{
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tpid);
+    if (tp) {
+        VLOG_INFO("Deleted timeout policy for id %d", tpid);
+        timeout_policy_clean(ct, tp);
+    } else {
+        VLOG_INFO("Attempted delete of non-existent timeout policy: zone %d",
+                  tpid);
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return 0;
+}
+
+void
+timeout_policy_init(struct conntrack *ct)
+{
+    hmap_init(&ct->timeout_policies);
+}
+
 static void
 conn_clean_cmn(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
@@ -502,6 +642,12 @@ conntrack_destroy(struct conntrack *ct)
     }
     hmap_destroy(&ct->zone_limits);
 
+    struct timeout_policy *tp;
+    HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
+        free(tp);
+    }
+    hmap_destroy(&ct->timeout_policies);
+
     ovs_mutex_unlock(&ct->ct_lock);
     ovs_mutex_destroy(&ct->ct_lock);
 
@@ -1275,7 +1421,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
             bool force, bool commit, long long now, const uint32_t *setmark,
             const struct ovs_key_ct_labels *setlabel,
             const struct nat_action_info_t *nat_action_info,
-            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
+            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+            uint32_t tpid)
 {
     /* Reset ct_state whenever entering a new zone. */
     if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
@@ -1323,6 +1470,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
                                               nat_action_info,
                                               ct_alg_ctl, now,
                                               &create_new_conn))) {
+            conn->tpid = tpid;
             create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
         }
         if (nat_action_info && !create_new_conn) {
@@ -1395,7 +1543,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                   const struct ovs_key_ct_labels *setlabel,
                   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                   const struct nat_action_info_t *nat_action_info,
-                  long long now)
+                  long long now, uint32_t tpid)
 {
     ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
                              ct->hash_basis);
@@ -1417,7 +1565,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
             write_ct_md(packet, zone, NULL, NULL, NULL);
         } else {
             process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
-                        setlabel, nat_action_info, tp_src, tp_dst, helper);
+                        setlabel, nat_action_info, tp_src, tp_dst, helper,
+                        tpid);
         }
     }
 
@@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now)
  * - We want to reduce the number of wakeups and batch connection cleanup
  *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
  *   are coping with the current cleanup tasks, then we wait at least
- *   5 seconds to do further cleanup.
+ *   1 seconds to do further cleanup.
  *
  * - We don't want to keep the map locked too long, as we might prevent
  *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
  *   behind, there is at least some 200ms blocks of time when the map will be
  *   left alone, so the datapath can operate unhindered.
  */
-#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
+#define CT_CLEAN_INTERVAL 1000 /* 1 second */
 #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
 
 static void *
diff --git a/lib/conntrack.h b/lib/conntrack.h
index b0d0fc8d9597..a55609bb2907 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -20,6 +20,7 @@
 #include <stdbool.h>
 
 #include "cmap.h"
+#include "ct-dpif.h"
 #include "latch.h"
 #include "odp-netlink.h"
 #include "openvswitch/hmap.h"
@@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                       const struct ovs_key_ct_labels *setlabel,
                       ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                       const struct nat_action_info_t *nat_action_info,
-                      long long now);
+                      long long now, uint32_t tpid);
 void conntrack_clear(struct dp_packet *packet);
 
 struct conntrack_dump {
@@ -111,6 +112,11 @@ struct conntrack_zone_limit {
     uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
 };
 
+struct timeout_policy {
+    struct hmap_node node;
+    struct ct_dpif_timeout_policy p;
+};
+
 enum {
     INVALID_ZONE = -2,
     DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
@@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
                                            int32_t zone);
 int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
 int zone_limit_delete(struct conntrack *ct, uint16_t zone);
+int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
+struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid);
+struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
+                                             int32_t tpid);
+
 
 #endif /* conntrack.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e456cc9befbe..41e805a832c8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         bool commit = false;
         unsigned int left;
         uint16_t zone = 0;
+        uint32_t tpid = 0;
         const char *helper = NULL;
         const uint32_t *setmark = NULL;
         const struct ovs_key_ct_labels *setlabel = NULL;
@@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
                  * netlink events. */
                 break;
             case OVS_CT_ATTR_TIMEOUT:
-                /* Userspace datapath does not support customized timeout
-                 * policy yet. */
+                if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
+                    VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
+                }
                 break;
             case OVS_CT_ATTR_NAT: {
                 const struct nlattr *b_nest;
@@ -7459,7 +7461,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
                           commit, zone, setmark, setlabel, aux->flow->tp_src,
                           aux->flow->tp_dst, helper, nat_action_info_ref,
-                          pmd->ctx.now / 1000);
+                          pmd->ctx.now / 1000, tpid);
         break;
     }
 
@@ -7693,6 +7695,64 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
 }
 
 static int
+dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
+                                  const struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy tp;
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    memcpy(&tp.p, dpif_tp, sizeof tp.p);
+    err = timeout_policy_update(dp->conntrack, &tp);
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
+                                  struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy *tp;
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    tp = timeout_policy_get(dp->conntrack, tp_id);
+    if (!tp) {
+        return EINVAL;
+    }
+    memcpy(dpif_tp, &tp->p, sizeof tp->p);
+    return err;
+}
+
+static int
+dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
+                                  uint32_t tp_id)
+{
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    err = timeout_policy_delete(dp->conntrack, tp_id);
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+                                       uint32_t tp_id,
+                                       uint16_t dl_type OVS_UNUSED,
+                                       uint8_t nw_proto OVS_UNUSED,
+                                       char **tp_name, bool *is_generic)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "%"PRIu32, tp_id);
+    *tp_name = ds_steal_cstr(&ds);
+    *is_generic = true;
+    return 0;
+}
+
+static int
 dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7802,13 +7862,13 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_ct_set_limits,
     dpif_netdev_ct_get_limits,
     dpif_netdev_ct_del_limits,
-    NULL,                       /* ct_set_timeout_policy */
-    NULL,                       /* ct_get_timeout_policy */
-    NULL,                       /* ct_del_timeout_policy */
+    dpif_netdev_ct_set_timeout_policy,
+    dpif_netdev_ct_get_timeout_policy,
+    dpif_netdev_ct_del_timeout_policy,
     NULL,                       /* ct_timeout_policy_dump_start */
     NULL,                       /* ct_timeout_policy_dump_next */
     NULL,                       /* ct_timeout_policy_dump_done */
-    NULL,                       /* ct_get_timeout_policy_name */
+    dpif_netdev_ct_get_timeout_policy_name,
     dpif_netdev_ipf_set_enabled,
     dpif_netdev_ipf_set_min_frag,
     dpif_netdev_ipf_set_max_nfrags,
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 4a39c929c207..a61c934aaf2f 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3301,8 +3301,10 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 AT_CHECK([ovs-appctl dpctl/flush-conntrack])
 
 dnl Shorten the udp_single and icmp_first timeout in zone 5
+dnl Userspace datapath uses udp_first and icmp_reply, and
+dnl kernel datapath uses udp_single and icmp_first
 VSCTL_ADD_DATAPATH_TABLE()
-AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_first=3 udp_single=3 icmp_first=3 icmp_reply=3])
 
 dnl Send ICMP and UDP traffic
 NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index ba7f4102f494..72c84b9c7c82 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -99,12 +99,8 @@ m4_define([CHECK_CONNTRACK_NAT])
 # CHECK_CONNTRACK_TIMEOUT()
 #
 # Perform requirements checks for running conntrack customized timeout tests.
-* The userspace datapath does not support this feature yet.
 #
-m4_define([CHECK_CONNTRACK_TIMEOUT],
-[
-    AT_SKIP_IF([:])
-])
+m4_define([CHECK_CONNTRACK_TIMEOUT])
 
 # CHECK_CT_DPIF_SET_GET_MAXCONNS()
 #
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index f77ee75e38df..e7c73220aef5 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -90,7 +90,7 @@ ct_thread_main(void *aux_)
     ovs_barrier_block(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
         conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
     ovs_barrier_block(&barrier);
     destroy_packets(pkt_batch);
@@ -174,7 +174,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
 
         if (flow.dl_type != dl_type) {
             conntrack_execute(ct_, &new_batch, dl_type, false, true, 0,
-                              NULL, NULL, 0, 0, NULL, NULL, now);
+                              NULL, NULL, 0, 0, NULL, NULL, now, 0);
             dp_packet_batch_init(&new_batch);
         }
         dp_packet_batch_add(&new_batch, packet);
@@ -182,7 +182,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
 
     if (!dp_packet_batch_is_empty(&new_batch)) {
         conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
 
 }
-- 
2.7.4



More information about the dev mailing list