[ovs-dev] [patch v2] conntrack: Support zone limits.

Darrell Ball dlu998 at gmail.com
Tue Dec 3 05:06:48 UTC 2019


Signed-off-by: Darrell Ball <dlu998 at gmail.com>
---

v2: Address review comment from Ben; one involves creating an
    admit zone in the connection entry to track the zone used
    for zone limit accounting when the entry was created and use
    that zone at cleanup time accounting.

    Updated dpctl.man.
    Fixed a bug in zone_limit_get() to return default zone by default..
    Fixed a parameter in zone_limit_delete().
    Some small cleanups.

 Documentation/faq/releases.rst   |   2 +-
 NEWS                             |   1 +
 lib/conntrack-private.h          |   2 +
 lib/conntrack.c                  | 134 +++++++++++++++++++++++++++++++++++++++
 lib/conntrack.h                  |  17 +++++
 lib/dpctl.man                    |   8 +--
 lib/dpif-netdev.c                |  88 ++++++++++++++++++++++++-
 tests/system-kmod-macros.at      |   7 --
 tests/system-traffic.at          |   1 -
 tests/system-userspace-macros.at |   9 ---
 10 files changed, 243 insertions(+), 26 deletions(-)

diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index e02dda1..4072c99 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
     Connection tracking             4.3            2.5          2.6      YES
     Conntrack Fragment Reass.       4.3            2.6          2.10     YES
     Conntrack Timeout Policies      5.2            2.12         NO       NO
-    Conntrack Zone Limit            4.18           2.10         NO       YES
+    Conntrack Zone Limit            4.18           2.10         2.13     YES
     Conntrack NAT                   4.6            2.6          2.8      YES
     Tunnel - LISP                   NO             2.11         NO       NO
     Tunnel - STT                    NO             2.4          NO       YES
diff --git a/NEWS b/NEWS
index 222280b..17f92ba 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ Post-v2.12.0
    - Userspace datapath:
      * Add option to enable, disable and query TCP sequence checking in
        conntrack.
+     * Add support for conntrack zone limits.
    - AF_XDP:
      * New option 'use-need-wakeup' for netdev-afxdp to control enabling
        of corresponding 'need_wakeup' flag in AF_XDP rings.  Enabled by default
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 590f139..7d36eac 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -105,6 +105,7 @@ struct conn {
     long long expiration;
     uint32_t mark;
     int seq_skew;
+    int32_t admit_zone; /* The zone for managing zone limit counts. */
     bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP
                         * control messages; true if reply direction. */
     bool cleaned; /* True if cleaned from expiry lists. */
@@ -155,6 +156,7 @@ struct conntrack {
     struct ovs_mutex ct_lock; /* Protects 2 following fields. */
     struct cmap conns OVS_GUARDED;
     struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
+    struct hmap zone_limits OVS_GUARDED;
     uint32_t hash_basis; /* Salt for hashing a connection key. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
     struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
diff --git a/lib/conntrack.c b/lib/conntrack.c
index df7b9fa..33d540a 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -76,6 +76,11 @@ enum ct_alg_ctl_type {
     CT_ALG_CTL_SIP,
 };
 
+struct zone_limit {
+    struct hmap_node node;
+    struct conntrack_zone_limit czl;
+};
+
 static bool conn_key_extract(struct conntrack *, struct dp_packet *,
                              ovs_be16 dl_type, struct conn_lookup_ctx *,
                              uint16_t zone);
@@ -305,6 +310,7 @@ conntrack_init(void)
     for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
         ovs_list_init(&ct->exp_lists[i]);
     }
+    hmap_init(&ct->zone_limits);
     ovs_mutex_unlock(&ct->ct_lock);
 
     ct->hash_basis = random_uint32();
@@ -318,6 +324,110 @@ conntrack_init(void)
     return ct;
 }
 
+static uint32_t
+zone_key_hash(int32_t zone, uint32_t basis)
+{
+    size_t hash = hash_int((OVS_FORCE uint32_t) zone, basis);
+    return hash;
+}
+
+static struct zone_limit *
+zone_limit_lookup(struct conntrack *ct, int32_t zone)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    uint32_t hash = zone_key_hash(zone, ct->hash_basis);
+    struct zone_limit *zl;
+    HMAP_FOR_EACH_IN_BUCKET (zl, node, hash, &ct->zone_limits) {
+        if (zl->czl.zone == zone) {
+            return zl;
+        }
+    }
+    return NULL;
+}
+
+static struct zone_limit *
+zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct zone_limit *zl = zone_limit_lookup(ct, zone);
+    return zl ? zl : zone_limit_lookup(ct, DEFAULT_ZONE);
+}
+
+struct conntrack_zone_limit
+zone_limit_get(struct conntrack *ct, int32_t zone)
+{
+    ovs_mutex_lock(&ct->ct_lock);
+    struct conntrack_zone_limit czl = {DEFAULT_ZONE, 0, 0};
+    struct zone_limit *zl = zone_limit_lookup_or_default(ct, zone);
+    if (zl) {
+        czl = zl->czl;
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return czl;
+}
+
+static int
+zone_limit_create(struct conntrack *ct, int32_t zone, uint32_t limit)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    if (zone >= DEFAULT_ZONE && zone <= MAX_ZONE) {
+        struct zone_limit *zl = xzalloc(sizeof *zl);
+        zl->czl.limit = limit;
+        zl->czl.zone = zone;
+        uint32_t hash = zone_key_hash(zone, ct->hash_basis);
+        hmap_insert(&ct->zone_limits, &zl->node, hash);
+        return 0;
+    } else {
+        return EINVAL;
+    }
+}
+
+int
+zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit)
+{
+    int err = 0;
+    ovs_mutex_lock(&ct->ct_lock);
+    struct zone_limit *zl = zone_limit_lookup(ct, zone);
+    if (zl) {
+        zl->czl.limit = limit;
+        VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone);
+    } else {
+        err = zone_limit_create(ct, zone, limit);
+        if (!err) {
+            VLOG_INFO("Created zone limit of %u for zone %d", limit, zone);
+        } else {
+            VLOG_WARN("Request to create zone limit for invalid zone %d",
+                      zone);
+        }
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return err;
+}
+
+static void
+zone_limit_clean(struct conntrack *ct, struct zone_limit *zl)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    hmap_remove(&ct->zone_limits, &zl->node);
+    free(zl);
+}
+
+int
+zone_limit_delete(struct conntrack *ct, uint16_t zone)
+{
+    ovs_mutex_lock(&ct->ct_lock);
+    struct zone_limit *zl = zone_limit_lookup(ct, zone);
+    if (zl) {
+        zone_limit_clean(ct, zl);
+        VLOG_INFO("Deleted zone limit for zone %d", zone);
+    } else {
+        VLOG_INFO("Attempted delete of non-existent zone limit: zone %d",
+                  zone);
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return 0;
+}
+
 static void
 conn_clean_cmn(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
@@ -328,6 +438,11 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
 
     uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
     cmap_remove(&ct->conns, &conn->cm_node, hash);
+
+    struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
+    if (zl) {
+        zl->czl.count--;
+    }
 }
 
 /* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT.  Also
@@ -378,6 +493,13 @@ conntrack_destroy(struct conntrack *ct)
         conn_clean_one(ct, conn);
     }
     cmap_destroy(&ct->conns);
+
+    struct zone_limit *zl;
+    HMAP_FOR_EACH_POP (zl, node, &ct->zone_limits) {
+        free(zl);
+    }
+    hmap_destroy(&ct->zone_limits);
+
     ovs_mutex_unlock(&ct->ct_lock);
     ovs_mutex_destroy(&ct->ct_lock);
 
@@ -850,6 +972,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
     }
 
     if (commit) {
+        struct zone_limit *zl = zone_limit_lookup_or_default(ct,
+                                                             ctx->key.zone);
+        if (zl && zl->czl.count >= zl->czl.limit) {
+            return nc;
+        }
+
         unsigned int n_conn_limit;
         atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
         if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
@@ -915,6 +1043,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
         atomic_count_inc(&ct->n_conn);
         ctx->conn = nc; /* For completeness. */
+        if (zl) {
+            nc->admit_zone = zl->czl.zone;
+            zl->czl.count++;
+        } else {
+            nc->admit_zone = INVALID_ZONE;
+        }
     }
 
     return nc;
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 75409ba..20e41ba 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -104,6 +104,19 @@ struct conntrack_dump {
     uint16_t zone;
 };
 
+struct conntrack_zone_limit {
+    int32_t zone;
+    uint32_t limit;
+    uint32_t count;
+};
+
+enum {
+    INVALID_ZONE = -2,
+    DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
+    MIN_ZONE = 0,
+    MAX_ZONE = 0xFFFF,
+};
+
 struct ct_dpif_entry;
 struct ct_dpif_tuple;
 
@@ -121,5 +134,9 @@ int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns);
 int conntrack_set_tcp_seq_chk(struct conntrack *ct, bool enabled);
 bool conntrack_get_tcp_seq_chk(struct conntrack *ct);
 struct ipf *conntrack_ipf_ctx(struct conntrack *ct);
+struct conntrack_zone_limit zone_limit_get(struct conntrack *ct,
+                                           int32_t zone);
+int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
+int zone_limit_delete(struct conntrack *ct, uint16_t zone);
 
 #endif /* conntrack.h */
diff --git a/lib/dpctl.man b/lib/dpctl.man
index 6529bae..090c3b9 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -346,18 +346,16 @@ particular zone is not specified in the datapath, it defaults to the
 default per-zone limit.  A default zone may be specified with the
 \fBdefault=\fIdefault_limit\fR argument.   Initially, the default
 per-zone limit is unlimited.  An unlimited number of entries may be set
-with \fB0\fR limit.  Only supported for Linux kernel datapath.
+with \fB0\fR limit.
 .
 .TP
 \*(DX\fBct\-del\-limits\fR [\fIdp\fR] \fBzone=\fIzone[,zone]\fR...
 Deletes the connection tracking limit for \fIzone\fR.  Multiple zones may
-be specified with a comma-separated list.  Only supported for Linux
-kernel datapath.
+be specified with a comma-separated list.
 .
 .TP
 \*(DX\fBct\-get\-limits\fR [\fIdp\fR] [\fBzone=\fIzone\fR[\fB,\fIzone\fR]...]
 Retrieves the maximum allowed number of connections and current
 counts per-zone.  If \fIzone\fR is given, only the specified zone(s) are
 printed.  If no zones are specified, all the zone limits and counts are
-provided.  The command always displays the default zone limit.  Only
-supported for Linux kernel datapath.
+provided.  The command always displays the default zone limit.
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 885a4df..1e54936 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7471,6 +7471,88 @@ dpif_netdev_ct_get_tcp_seq_chk(struct dpif *dpif, bool *enabled)
 }
 
 static int
+dpif_netdev_ct_set_limits(struct dpif *dpif OVS_UNUSED,
+                           const uint32_t *default_limits,
+                           const struct ovs_list *zone_limits)
+{
+    int err = 0;
+    struct dp_netdev *dp = get_dp_netdev(dpif);
+    if (default_limits) {
+        err = zone_limit_update(dp->conntrack, DEFAULT_ZONE, *default_limits);
+        if (err != 0) {
+            return err;
+        }
+    }
+
+    struct ct_dpif_zone_limit *zone_limit;
+    LIST_FOR_EACH (zone_limit, node, zone_limits) {
+        err = zone_limit_update(dp->conntrack, zone_limit->zone,
+                                zone_limit->limit);
+        if (err != 0) {
+            break;
+        }
+    }
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_limits(struct dpif *dpif OVS_UNUSED,
+                           uint32_t *default_limit,
+                           const struct ovs_list *zone_limits_request,
+                           struct ovs_list *zone_limits_reply)
+{
+    struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct conntrack_zone_limit czl;
+
+    czl = zone_limit_get(dp->conntrack, DEFAULT_ZONE);
+    if (czl.zone == DEFAULT_ZONE) {
+        *default_limit = czl.limit;
+    } else {
+        return EINVAL;
+    }
+
+    if (!ovs_list_is_empty(zone_limits_request)) {
+        struct ct_dpif_zone_limit *zone_limit;
+        LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
+            czl = zone_limit_get(dp->conntrack, zone_limit->zone);
+            if (czl.zone == zone_limit->zone || czl.zone == DEFAULT_ZONE) {
+                ct_dpif_push_zone_limit(zone_limits_reply, zone_limit->zone,
+                                        czl.limit, czl.count);
+            } else {
+                return EINVAL;
+            }
+        }
+    } else {
+        for (int z = MIN_ZONE; z <= MAX_ZONE; z++) {
+            czl = zone_limit_get(dp->conntrack, z);
+            if (czl.zone == z) {
+                ct_dpif_push_zone_limit(zone_limits_reply, z, czl.limit,
+                                        czl.count);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
+                           const struct ovs_list *zone_limits)
+{
+    int err = 0;
+    struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct ct_dpif_zone_limit *zone_limit;
+    LIST_FOR_EACH (zone_limit, node, zone_limits) {
+        err = zone_limit_delete(dp->conntrack, zone_limit->zone);
+        if (err != 0) {
+            break;
+        }
+    }
+
+    return err;
+}
+
+static int
 dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7576,9 +7658,9 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_ct_get_nconns,
     dpif_netdev_ct_set_tcp_seq_chk,
     dpif_netdev_ct_get_tcp_seq_chk,
-    NULL,                       /* ct_set_limits */
-    NULL,                       /* ct_get_limits */
-    NULL,                       /* ct_del_limits */
+    dpif_netdev_ct_set_limits,
+    dpif_netdev_ct_get_limits,
+    dpif_netdev_ct_del_limits,
     NULL,                       /* ct_set_timeout_policy */
     NULL,                       /* ct_get_timeout_policy */
     NULL,                       /* ct_del_timeout_policy */
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index 9e89aec..daf66bd 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -110,13 +110,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT],
     on_exit 'modprobe -r nfnetlink_cttimeout'
 ])
 
-# CHECK_CT_DPIF_PER_ZONE_LIMIT()
-#
-# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
-# zone. The kernel datapath does support this feature. Will remove this check
-# after both kernel and userspace datapath support it.
-m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT])
-
 # CHECK_CT_DPIF_SET_GET_MAXCONNS()
 #
 # Perform requirements checks for running ovs-dpctl ct-set-maxconns or
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index cde7429..0fb7aac 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3574,7 +3574,6 @@ AT_CLEANUP
 
 AT_SETUP([conntrack - limit by zone])
 CHECK_CONNTRACK()
-CHECK_CT_DPIF_PER_ZONE_LIMIT()
 OVS_TRAFFIC_VSWITCHD_START()
 
 ADD_NAMESPACES(at_ns0, at_ns1)
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index a419f30..ba7f410 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -106,15 +106,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT],
     AT_SKIP_IF([:])
 ])
 
-# CHECK_CT_DPIF_PER_ZONE_LIMIT()
-#
-# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
-# zone. The userspace datapath does not support this feature yet.
-m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT],
-[
-    AT_SKIP_IF([:])
-])
-
 # CHECK_CT_DPIF_SET_GET_MAXCONNS()
 #
 # Perform requirements checks for running ovs-dpctl ct-set-maxconns or
-- 
1.9.1



More information about the dev mailing list