[ovs-dev] [PATCHv4 2/2] userspace: Add conntrack timeout policy support.
Yi-Hung Wei
yihung.wei at gmail.com
Tue Apr 28 18:11:00 UTC 2020
On Mon, Apr 27, 2020 at 8:42 AM William Tu <u9012063 at gmail.com> wrote:
>
> Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> policy support") adds conntrack timeout policy for kernel datapath.
> This patch enables support for the userspace datapath. I tested
> using the 'make check-system-userspace' which checks the timeout
> policies for ICMP and UDP cases.
>
> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
> v4: address feedback from Yi-Hung
> - move default policy value to lib/conntrack-tp.c
> - separate icmp bug patch
> - refactor and fix include issues
> - fix the clang lock analysis annotation
> - keep clean interval to 5 seconds
> - improve tests in system-traffic.at
> - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/680158645
> ---
Thanks for v4. I only have a few minor comments below.
Thanks,
-Yi-Hung
> +++ b/lib/conntrack-tp.c
> @@ -0,0 +1,301 @@
> +/*
> + * Copyright (c) 2020 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "conntrack-private.h"
> +#include "conntrack-tp.h"
> +#include "ct-dpif.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +
> +static const char *ct_timeout_str[] = {
> +#define CT_TIMEOUT(NAME) #NAME,
> + CT_TIMEOUTS
> +#undef CT_TIMEOUT
> +};
> +
> +/* Default timeout policy in seconds. */
> +static unsigned int ct_dpif_netdev_tp_def[] = {
> + [CT_DPIF_TP_ATTR_TCP_SYN_SENT] = 30,
> + [CT_DPIF_TP_ATTR_TCP_SYN_RECV] = 30,
> + [CT_DPIF_TP_ATTR_TCP_ESTABLISHED] = 24 * 60 * 60,
> + [CT_DPIF_TP_ATTR_TCP_FIN_WAIT] = 15 * 60,
> + [CT_DPIF_TP_ATTR_TCP_TIME_WAIT] = 45,
> + [CT_DPIF_TP_ATTR_TCP_CLOSE] = 30,
> + [CT_DPIF_TP_ATTR_UDP_FIRST] = 60,
> + [CT_DPIF_TP_ATTR_UDP_SINGLE] = 60,
> + [CT_DPIF_TP_ATTR_UDP_MULTIPLE] = 30,
> + [CT_DPIF_TP_ATTR_ICMP_FIRST] = 60,
> + [CT_DPIF_TP_ATTR_ICMP_REPLY] = 30,
> +};
> +
> +static struct timeout_policy *
> +timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + struct timeout_policy *tp;
> + uint32_t hash;
> +
> + hash = hash_int(tp_id, ct->hash_basis);
> + HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> + if (tp->policy.id == tp_id) {
> + return tp;
> + }
> + }
> + return NULL;
> +}
> +
> +struct timeout_policy *
> +timeout_policy_get(struct conntrack *ct, int32_t tp_id)
> +{
> + struct timeout_policy *tp;
> +
> + ovs_mutex_lock(&ct->ct_lock);
> + tp = timeout_policy_lookup(ct, tp_id);
> + if (!tp) {
> + ovs_mutex_unlock(&ct->ct_lock);
> + return NULL;
> + }
> +
> + ovs_mutex_unlock(&ct->ct_lock);
> + return tp;
> +}
> +
> +static void
> +update_existing_tp(struct timeout_policy *tp_dst,
> + const struct timeout_policy *tp_src)
> +{
> + struct ct_dpif_timeout_policy *dst;
> + const struct ct_dpif_timeout_policy *src;
> + int i;
> +
> + dst = &tp_dst->policy;
> + src = &tp_src->policy;
> +
> + /* Set the value and present bit to dst if present
> + * bit in src is set.
> + */
> + for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
> + if (src->present & (1 << i)) {
> + dst->attrs[i] = src->attrs[i];
> + dst->present |= (1 << i);
> + }
> + }
> +}
> +
> +static void
> +init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
> +{
> + tp->policy.id = tp_id;
> + /* Initialize the timeout value to default, but not
> + * setting the present bit.
> + */
> + tp->policy.present = 0;
> + memcpy(tp->policy.attrs, ct_dpif_netdev_tp_def,
> + sizeof tp->policy.attrs);
> +}
> +
> +static void
> +timeout_policy_create(struct conntrack *ct,
> + struct timeout_policy *new_tp)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + uint32_t tp_id = new_tp->policy.id;
> + struct timeout_policy *tp;
> + uint32_t hash;
> +
> + tp = xzalloc(sizeof *tp);
> + init_default_tp(tp, tp_id);
> + update_existing_tp(tp, new_tp);
> + hash = hash_int(tp_id, ct->hash_basis);
> + hmap_insert(&ct->timeout_policies, &tp->node, hash);
> +}
> +
> +static void
> +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + hmap_remove(&ct->timeout_policies, &tp->node);
> + free(tp);
> +}
> +
> +static void
> +timeout_policy_delete__(struct conntrack *ct, uint32_t tp_id)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> + if (tp) {
> + timeout_policy_clean(ct, tp);
> + } else {
> + VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout "
> + "policy: id=%d", tp_id);
Should we return ENOENT and propagate the error code back to
dpif_netdev_ct_del_timeout_policy()?
> + }
> +}
> +
> +int
> +timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
> +{
> + ovs_mutex_lock(&ct->ct_lock);
> + timeout_policy_delete__(ct, tp_id);
> + ovs_mutex_unlock(&ct->ct_lock);
> + return 0;
> +}
> +
> +void
> +timeout_policy_init(struct conntrack *ct)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + struct timeout_policy tp;
> +
> + hmap_init(&ct->timeout_policies);
> +
> + /* Create default timeout policy. */
> + memset(&tp, 0, sizeof tp);
> + tp.policy.id = DEFAULT_TP_ID;
> + timeout_policy_create(ct, &tp);
> +}
> +
> +int
> +timeout_policy_update(struct conntrack *ct,
> + struct timeout_policy *new_tp)
> +{
> + int err = 0;
Looks like err is not used in this function ?
> + uint32_t tp_id = new_tp->policy.id;
> +
> + ovs_mutex_lock(&ct->ct_lock);
> + struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> + if (tp) {
> + timeout_policy_delete__(ct, tp_id);
> + }
> + timeout_policy_create(ct, new_tp);
> + ovs_mutex_unlock(&ct->ct_lock);
> + return err;
> +}
> +
> +static enum ct_dpif_tp_attr
> +tm_to_ct_dpif_tp(enum ct_timeout tm)
> +{
> + switch (tm) {
> + case CT_TM_TCP_FIRST_PACKET:
> + return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
> + case CT_TM_TCP_OPENING:
> + return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
> + case CT_TM_TCP_ESTABLISHED:
> + return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
> + case CT_TM_TCP_CLOSING:
> + return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
> + case CT_TM_TCP_FIN_WAIT:
> + return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
> + case CT_TM_TCP_CLOSED:
> + return CT_DPIF_TP_ATTR_TCP_CLOSE;
> + case CT_TM_OTHER_FIRST:
> + return CT_DPIF_TP_ATTR_UDP_FIRST;
> + case CT_TM_OTHER_BIDIR:
> + return CT_DPIF_TP_ATTR_UDP_SINGLE;
> + case CT_TM_OTHER_MULTIPLE:
> + return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
I think we would like to map CT_TM_OTHER_BIDIR to
CT_DPIF_TP_ATTR_UDP_MULTIPLE, and CT_TM_OTHER_MULTIPLE to
CT_DPIF_TP_ATTR_UDP_SINGLE.
Here is the corresponding timeout definitions in ovs-vswitchd.conf.db(5),
timeouts : udp_single: optional integer, in range 0 to 4,294,967,295
The timeout of the connection when conntrack only seen UDP
packet from the
source host, but the destination host has never sent one back.
timeouts : udp_multiple: optional integer, in range 0 to 4,294,967,295
The timeout of the connection when UDP packets have been
seen in both
directions.
> + case CT_TM_ICMP_FIRST:
> + return CT_DPIF_TP_ATTR_ICMP_FIRST;
> + case CT_TM_ICMP_REPLY:
> + return CT_DPIF_TP_ATTR_ICMP_REPLY;
> + case N_CT_TM:
> + default:
> + OVS_NOT_REACHED();
> + break;
> + }
> + OVS_NOT_REACHED();
> + return CT_DPIF_TP_ATTR_MAX;
> +}
> +
> +static void
> +conn_update_expiration__(struct conntrack *ct, struct conn *conn,
> + enum ct_timeout tm, long long now,
> + uint32_t tp_value)
> + OVS_REQUIRES(conn->lock)
> +{
> + ovs_mutex_unlock(&conn->lock);
> +
> + ovs_mutex_lock(&ct->ct_lock);
> + ovs_mutex_lock(&conn->lock);
> + if (!conn->cleaned) {
> + conn->expiration = now + tp_value * 1000;
> + ovs_list_remove(&conn->exp_node);
> + ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> + }
> + ovs_mutex_unlock(&conn->lock);
> + ovs_mutex_unlock(&ct->ct_lock);
> +
> + ovs_mutex_lock(&conn->lock);
> +}
> +
> +/* The conn entry lock must be held on entry and exit. */
> +void
> +conn_update_expiration(struct conntrack *ct, struct conn *conn,
> + enum ct_timeout tm, long long now)
> + OVS_REQUIRES(conn->lock)
> +{
> + struct timeout_policy *tp;
> + uint32_t val;
> +
> + ovs_mutex_lock(&ct->ct_lock);
> + tp = timeout_policy_lookup(ct, conn->tp_id);
> + if (tp) {
> + val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> + } else {
> + val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
> + }
> + ovs_mutex_unlock(&ct->ct_lock);
> +
> + VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
> + "val=%u sec.",
> + ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
> +
> + conn_update_expiration__(ct, conn, tm, now, val);
> +}
> +
> +static void
> +conn_init_expiration__(struct conntrack *ct, struct conn *conn,
> + enum ct_timeout tm, long long now,
> + uint32_t tp_value)
> +{
> + conn->expiration = now + tp_value * 1000;
> + ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> +}
> +
> +/* ct_lock must be held. */
> +void
> +conn_init_expiration(struct conntrack *ct, struct conn *conn,
> + enum ct_timeout tm, long long now)
> + OVS_REQUIRES(ct->ct_lock)
> +{
> + struct timeout_policy *tp;
> + uint32_t val;
> +
> + tp = timeout_policy_lookup(ct, conn->tp_id);
> + if (tp) {
> + val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> + } else {
> + val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
> + }
> +
> + VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.",
> + ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
> +
> + conn_init_expiration__(ct, conn, tm, now, val);
> +}
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ef14e83b5f06..5b3b97ae6beb 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -36,6 +36,7 @@
> #include "bitmap.h"
> #include "cmap.h"
> #include "conntrack.h"
> +#include "conntrack-tp.h"
> #include "coverage.h"
> #include "ct-dpif.h"
> #include "csum.h"
> @@ -7342,6 +7343,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> bool commit = false;
> unsigned int left;
> uint16_t zone = 0;
> + uint32_t tp_id = 0;
> const char *helper = NULL;
> const uint32_t *setmark = NULL;
> const struct ovs_key_ct_labels *setlabel = NULL;
> @@ -7377,8 +7379,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> * netlink events. */
> break;
> case OVS_CT_ATTR_TIMEOUT:
> - /* Userspace datapath does not support customized timeout
> - * policy yet. */
> + if (!str_to_uint(nl_attr_get_string(b), 10, &tp_id)) {
> + VLOG_WARN("Invalid Timeout Policy ID: %s.",
> + nl_attr_get_string(b));
> + tp_id = DEFAULT_TP_ID;
> + }
> break;
> case OVS_CT_ATTR_NAT: {
> const struct nlattr *b_nest;
> @@ -7464,7 +7469,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
> commit, zone, setmark, setlabel, aux->flow->tp_src,
> aux->flow->tp_dst, helper, nat_action_info_ref,
> - pmd->ctx.now / 1000);
> + pmd->ctx.now / 1000, tp_id);
> break;
> }
>
> @@ -7698,6 +7703,62 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
> }
>
> static int
> +dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
> + const struct ct_dpif_timeout_policy *dpif_tp)
> +{
> + struct timeout_policy tp;
> + struct dp_netdev *dp;
> +
> + dp = get_dp_netdev(dpif);
> + memcpy(&tp.policy, dpif_tp, sizeof tp.policy);
> + return timeout_policy_update(dp->conntrack, &tp);
> +}
> +
> +static int
> +dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
> + struct ct_dpif_timeout_policy *dpif_tp)
> +{
> + struct timeout_policy *tp;
> + struct dp_netdev *dp;
> + int err = 0;
> +
> + dp = get_dp_netdev(dpif);
> + tp = timeout_policy_get(dp->conntrack, tp_id);
> + if (!tp) {
> + return EINVAL;
I would suggest to return ENOENT here.
> + }
> + memcpy(dpif_tp, &tp->policy, sizeof tp->policy);
> + return err;
> +}
> +
> +static int
> +dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
> + uint32_t tp_id)
> +{
> + struct dp_netdev *dp;
> + int err = 0;
> +
> + dp = get_dp_netdev(dpif);
> + err = timeout_policy_delete(dp->conntrack, tp_id);
> + return err;
> +}
> +
> +static int
> +dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
> + uint32_t tp_id,
> + uint16_t dl_type OVS_UNUSED,
> + uint8_t nw_proto OVS_UNUSED,
> + char **tp_name, bool *is_generic)
> +{
> + struct ds ds = DS_EMPTY_INITIALIZER;
> +
> + ds_put_format(&ds, "%"PRIu32, tp_id);
> + *tp_name = ds_steal_cstr(&ds);
> + *is_generic = true;
> + return 0;
> +}
> +
> +static int
> dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
> {
> struct dp_netdev *dp = get_dp_netdev(dpif);
More information about the dev
mailing list