[ovs-dev] [PATCHv4 2/2] userspace: Add conntrack timeout policy support.

Yi-Hung Wei yihung.wei at gmail.com
Tue Apr 28 18:11:00 UTC 2020


On Mon, Apr 27, 2020 at 8:42 AM William Tu <u9012063 at gmail.com> wrote:
>
> Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> policy support") adds conntrack timeout policy for kernel datapath.
> This patch enables support for the userspace datapath.  I tested
> using the 'make check-system-userspace' which checks the timeout
> policies for ICMP and UDP cases.
>
> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
> v4: address feedback from Yi-Hung
>   - move default policy value to lib/conntrack-tp.c
>   - separate icmp bug patch
>   - refactor and fix include issues
>   - fix the clang lock analysis annotation
>   - keep clean interval to 5 seconds
>   - improve tests in system-traffic.at
>   - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/680158645
> ---

Thanks for v4. I only have a few minor comments below.

Thanks,

-Yi-Hung


> +++ b/lib/conntrack-tp.c
> @@ -0,0 +1,301 @@
> +/*
> + * Copyright (c) 2020 VMware, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "conntrack-private.h"
> +#include "conntrack-tp.h"
> +#include "ct-dpif.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +
> +static const char *ct_timeout_str[] = {
> +#define CT_TIMEOUT(NAME) #NAME,
> +    CT_TIMEOUTS
> +#undef CT_TIMEOUT
> +};
> +
> +/* Default timeout policy in seconds. */
> +static unsigned int ct_dpif_netdev_tp_def[] = {
> +    [CT_DPIF_TP_ATTR_TCP_SYN_SENT] = 30,
> +    [CT_DPIF_TP_ATTR_TCP_SYN_RECV] = 30,
> +    [CT_DPIF_TP_ATTR_TCP_ESTABLISHED] = 24 * 60 * 60,
> +    [CT_DPIF_TP_ATTR_TCP_FIN_WAIT] = 15 * 60,
> +    [CT_DPIF_TP_ATTR_TCP_TIME_WAIT] = 45,
> +    [CT_DPIF_TP_ATTR_TCP_CLOSE] = 30,
> +    [CT_DPIF_TP_ATTR_UDP_FIRST] = 60,
> +    [CT_DPIF_TP_ATTR_UDP_SINGLE] = 60,
> +    [CT_DPIF_TP_ATTR_UDP_MULTIPLE] = 30,
> +    [CT_DPIF_TP_ATTR_ICMP_FIRST] = 60,
> +    [CT_DPIF_TP_ATTR_ICMP_REPLY] = 30,
> +};
> +
> +static struct timeout_policy *
> +timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t hash;
> +
> +    hash = hash_int(tp_id, ct->hash_basis);
> +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> +        if (tp->policy.id == tp_id) {
> +            return tp;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +struct timeout_policy *
> +timeout_policy_get(struct conntrack *ct, int32_t tp_id)
> +{
> +    struct timeout_policy *tp;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    tp = timeout_policy_lookup(ct, tp_id);
> +    if (!tp) {
> +        ovs_mutex_unlock(&ct->ct_lock);
> +        return NULL;
> +    }
> +
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return tp;
> +}
> +
> +static void
> +update_existing_tp(struct timeout_policy *tp_dst,
> +                   const struct timeout_policy *tp_src)
> +{
> +    struct ct_dpif_timeout_policy *dst;
> +    const struct ct_dpif_timeout_policy *src;
> +    int i;
> +
> +    dst = &tp_dst->policy;
> +    src = &tp_src->policy;
> +
> +    /* Set the value and present bit to dst if present
> +     * bit in src is set.
> +     */
> +    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
> +        if (src->present & (1 << i)) {
> +            dst->attrs[i] = src->attrs[i];
> +            dst->present |= (1 << i);
> +        }
> +    }
> +}
> +
> +static void
> +init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
> +{
> +    tp->policy.id = tp_id;
> +    /* Initialize the timeout value to default, but not
> +     * setting the present bit.
> +     */
> +    tp->policy.present = 0;
> +    memcpy(tp->policy.attrs, ct_dpif_netdev_tp_def,
> +           sizeof tp->policy.attrs);
> +}
> +
> +static void
> +timeout_policy_create(struct conntrack *ct,
> +                      struct timeout_policy *new_tp)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    uint32_t tp_id = new_tp->policy.id;
> +    struct timeout_policy *tp;
> +    uint32_t hash;
> +
> +    tp = xzalloc(sizeof *tp);
> +    init_default_tp(tp, tp_id);
> +    update_existing_tp(tp, new_tp);
> +    hash = hash_int(tp_id, ct->hash_basis);
> +    hmap_insert(&ct->timeout_policies, &tp->node, hash);
> +}
> +
> +static void
> +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    hmap_remove(&ct->timeout_policies, &tp->node);
> +    free(tp);
> +}
> +
> +static void
> +timeout_policy_delete__(struct conntrack *ct, uint32_t tp_id)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> +    if (tp) {
> +        timeout_policy_clean(ct, tp);
> +    } else {
> +        VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout "
> +                     "policy: id=%d", tp_id);

Should we return ENOENT and propagate the error code back to
dpif_netdev_ct_del_timeout_policy()?


> +    }
> +}
> +
> +int
> +timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
> +{
> +    ovs_mutex_lock(&ct->ct_lock);
> +    timeout_policy_delete__(ct, tp_id);
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return 0;
> +}
> +
> +void
> +timeout_policy_init(struct conntrack *ct)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy tp;
> +
> +    hmap_init(&ct->timeout_policies);
> +
> +    /* Create default timeout policy. */
> +    memset(&tp, 0, sizeof tp);
> +    tp.policy.id = DEFAULT_TP_ID;
> +    timeout_policy_create(ct, &tp);
> +}
> +
> +int
> +timeout_policy_update(struct conntrack *ct,
> +                      struct timeout_policy *new_tp)
> +{
> +    int err = 0;

Looks like err is not used in this function ?


> +    uint32_t tp_id = new_tp->policy.id;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> +    if (tp) {
> +        timeout_policy_delete__(ct, tp_id);
> +    }
> +    timeout_policy_create(ct, new_tp);
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return err;
> +}
> +
> +static enum ct_dpif_tp_attr
> +tm_to_ct_dpif_tp(enum ct_timeout tm)
> +{
> +    switch (tm) {
> +    case CT_TM_TCP_FIRST_PACKET:
> +        return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
> +    case CT_TM_TCP_OPENING:
> +        return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
> +    case CT_TM_TCP_ESTABLISHED:
> +        return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
> +    case CT_TM_TCP_CLOSING:
> +        return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
> +    case CT_TM_TCP_FIN_WAIT:
> +        return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
> +    case CT_TM_TCP_CLOSED:
> +        return CT_DPIF_TP_ATTR_TCP_CLOSE;
> +    case CT_TM_OTHER_FIRST:
> +        return CT_DPIF_TP_ATTR_UDP_FIRST;
> +    case CT_TM_OTHER_BIDIR:
> +        return CT_DPIF_TP_ATTR_UDP_SINGLE;
> +    case CT_TM_OTHER_MULTIPLE:
> +        return CT_DPIF_TP_ATTR_UDP_MULTIPLE;

I think we would like to map CT_TM_OTHER_BIDIR to
CT_DPIF_TP_ATTR_UDP_MULTIPLE, and CT_TM_OTHER_MULTIPLE to
CT_DPIF_TP_ATTR_UDP_SINGLE.

Here is the corresponding timeout definitions in ovs-vswitchd.conf.db(5),

timeouts : udp_single: optional integer, in range 0 to 4,294,967,295
       The timeout of the connection when conntrack only seen UDP
packet from the
       source host, but the destination host has never sent one back.

timeouts : udp_multiple: optional integer, in range 0 to 4,294,967,295
       The  timeout  of  the  connection  when UDP packets have been
seen in both
       directions.


> +    case CT_TM_ICMP_FIRST:
> +        return CT_DPIF_TP_ATTR_ICMP_FIRST;
> +    case CT_TM_ICMP_REPLY:
> +        return CT_DPIF_TP_ATTR_ICMP_REPLY;
> +    case N_CT_TM:
> +    default:
> +        OVS_NOT_REACHED();
> +        break;
> +    }
> +    OVS_NOT_REACHED();
> +    return CT_DPIF_TP_ATTR_MAX;
> +}
> +
> +static void
> +conn_update_expiration__(struct conntrack *ct, struct conn *conn,
> +                         enum ct_timeout tm, long long now,
> +                         uint32_t tp_value)
> +    OVS_REQUIRES(conn->lock)
> +{
> +    ovs_mutex_unlock(&conn->lock);
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    ovs_mutex_lock(&conn->lock);
> +    if (!conn->cleaned) {
> +        conn->expiration = now + tp_value * 1000;
> +        ovs_list_remove(&conn->exp_node);
> +        ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> +    }
> +    ovs_mutex_unlock(&conn->lock);
> +    ovs_mutex_unlock(&ct->ct_lock);
> +
> +    ovs_mutex_lock(&conn->lock);
> +}
> +
> +/* The conn entry lock must be held on entry and exit. */
> +void
> +conn_update_expiration(struct conntrack *ct, struct conn *conn,
> +                       enum ct_timeout tm, long long now)
> +    OVS_REQUIRES(conn->lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t val;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    tp = timeout_policy_lookup(ct, conn->tp_id);
> +    if (tp) {
> +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> +    } else {
> +        val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
> +    }
> +    ovs_mutex_unlock(&ct->ct_lock);
> +
> +    VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
> +                "val=%u sec.",
> +                ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
> +
> +    conn_update_expiration__(ct, conn, tm, now, val);
> +}
> +
> +static void
> +conn_init_expiration__(struct conntrack *ct, struct conn *conn,
> +                       enum ct_timeout tm, long long now,
> +                       uint32_t tp_value)
> +{
> +    conn->expiration = now + tp_value * 1000;
> +    ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> +}
> +
> +/* ct_lock must be held. */
> +void
> +conn_init_expiration(struct conntrack *ct, struct conn *conn,
> +                     enum ct_timeout tm, long long now)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t val;
> +
> +    tp = timeout_policy_lookup(ct, conn->tp_id);
> +    if (tp) {
> +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> +    } else {
> +        val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
> +    }
> +
> +    VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.",
> +                ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
> +
> +    conn_init_expiration__(ct, conn, tm, now, val);
> +}



> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ef14e83b5f06..5b3b97ae6beb 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -36,6 +36,7 @@
>  #include "bitmap.h"
>  #include "cmap.h"
>  #include "conntrack.h"
> +#include "conntrack-tp.h"
>  #include "coverage.h"
>  #include "ct-dpif.h"
>  #include "csum.h"
> @@ -7342,6 +7343,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>          bool commit = false;
>          unsigned int left;
>          uint16_t zone = 0;
> +        uint32_t tp_id = 0;
>          const char *helper = NULL;
>          const uint32_t *setmark = NULL;
>          const struct ovs_key_ct_labels *setlabel = NULL;
> @@ -7377,8 +7379,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>                   * netlink events. */
>                  break;
>              case OVS_CT_ATTR_TIMEOUT:
> -                /* Userspace datapath does not support customized timeout
> -                 * policy yet. */
> +                if (!str_to_uint(nl_attr_get_string(b), 10, &tp_id)) {
> +                    VLOG_WARN("Invalid Timeout Policy ID: %s.",
> +                              nl_attr_get_string(b));
> +                    tp_id = DEFAULT_TP_ID;
> +                }
>                  break;
>              case OVS_CT_ATTR_NAT: {
>                  const struct nlattr *b_nest;
> @@ -7464,7 +7469,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>          conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
>                            commit, zone, setmark, setlabel, aux->flow->tp_src,
>                            aux->flow->tp_dst, helper, nat_action_info_ref,
> -                          pmd->ctx.now / 1000);
> +                          pmd->ctx.now / 1000, tp_id);
>          break;
>      }
>
> @@ -7698,6 +7703,62 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
>  }
>
>  static int
> +dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
> +                                  const struct ct_dpif_timeout_policy *dpif_tp)
> +{
> +    struct timeout_policy tp;
> +    struct dp_netdev *dp;
> +
> +    dp = get_dp_netdev(dpif);
> +    memcpy(&tp.policy, dpif_tp, sizeof tp.policy);
> +    return timeout_policy_update(dp->conntrack, &tp);
> +}
> +
> +static int
> +dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
> +                                  struct ct_dpif_timeout_policy *dpif_tp)
> +{
> +    struct timeout_policy *tp;
> +    struct dp_netdev *dp;
> +    int err = 0;
> +
> +    dp = get_dp_netdev(dpif);
> +    tp = timeout_policy_get(dp->conntrack, tp_id);
> +    if (!tp) {
> +        return EINVAL;

I would suggest to return ENOENT here.


> +    }
> +    memcpy(dpif_tp, &tp->policy, sizeof tp->policy);
> +    return err;
> +}
> +
> +static int
> +dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
> +                                  uint32_t tp_id)
> +{
> +    struct dp_netdev *dp;
> +    int err = 0;
> +
> +    dp = get_dp_netdev(dpif);
> +    err = timeout_policy_delete(dp->conntrack, tp_id);
> +    return err;
> +}
> +
> +static int
> +dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
> +                                       uint32_t tp_id,
> +                                       uint16_t dl_type OVS_UNUSED,
> +                                       uint8_t nw_proto OVS_UNUSED,
> +                                       char **tp_name, bool *is_generic)
> +{
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +    ds_put_format(&ds, "%"PRIu32, tp_id);
> +    *tp_name = ds_steal_cstr(&ds);
> +    *is_generic = true;
> +    return 0;
> +}
> +
> +static int
>  dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);


More information about the dev mailing list