[ovs-dev] [PATCH v4] conntrack: document all-zero IP SNAT behavior and add a test case

Aaron Conole aconole at redhat.com
Wed Jun 2 16:21:43 UTC 2021


Eelco Chaudron <echaudro at redhat.com> writes:

> Currently, conntrack in the kernel has an undocumented feature referred
> to as all-zero IP address SNAT. Basically, when a source port
> collision is detected during the commit, the source port will be
> translated to an ephemeral port. If there is no collision, no SNAT is
> performed.
>
> This patchset documents this behavior and adds a self-test to verify
> it's not changing. In addition, a datapath feature flag is added for
> the all-zero IP SNAT case. This will help applications on top of OVS,
> like OVN, to determine this feature can be used.
>
> Signed-off-by: Eelco Chaudron <echaudro at redhat.com>
> ---
> v4: Added datapath support flag for all-zero SNAT.
> v3: Renamed NULL SNAT to all-zero IP SNAT.
> v2: Fixed NULL SNAT to only work in the -rpl state to be inline with
>     OpenShift-SDN's behavior.
>
>  lib/ct-dpif.c                    |    8 +++++++
>  lib/ct-dpif.h                    |    6 +++++
>  lib/dpif-netdev.c                |    1 +
>  lib/dpif-netlink.c               |   11 +++++++++
>  lib/dpif-provider.h              |    5 ++++
>  lib/ovs-actions.xml              |   10 ++++++++
>  ofproto/ofproto-dpif.c           |   22 +++++++++++++++++-
>  ofproto/ofproto-dpif.h           |    5 +++-
>  tests/system-kmod-macros.at      |    7 ++++++
>  tests/system-traffic.at          |   46 ++++++++++++++++++++++++++++++++++++++
>  tests/system-userspace-macros.at |   10 ++++++++
>  vswitchd/vswitch.xml             |    9 +++++++
>  12 files changed, 138 insertions(+), 2 deletions(-)
>
> diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
> index 6a5ba052d..cfc2315e3 100644
> --- a/lib/ct-dpif.c
> +++ b/lib/ct-dpif.c
> @@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
>                  dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
>              : EOPNOTSUPP);
>  }
> +
> +int
> +ct_dpif_get_features(struct dpif *dpif, enum ct_features *features)
> +{

NIT-mode:

Are these features or capabilities?  I ask because we may want to add
support for things like tcp loose mode, etc, and not sure if there would
need to be a corresponding set function (to enable / disable), and how
that should look.  I'm okay with keeping it as-is for here and adding
the corresponding set function later, but it would seem strange to try
and "set" support for all-zero snat, etc.

> +    return (dpif->dpif_class->ct_get_features
> +            ? dpif->dpif_class->ct_get_features(dpif, features)
> +            : EOPNOTSUPP);
> +}
> diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
> index 88f4c7e28..b59cba962 100644
> --- a/lib/ct-dpif.h
> +++ b/lib/ct-dpif.h
> @@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy {
>                                                   * timeout attribute values */
>  };
>  
> +/* Conntrack Features. */
> +enum ct_features {
> +    CONNTRACK_F_ZERO_SNAT = 1 << 0,  /* All-zero SNAT support. */
> +};
> +
>  int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
>                         const uint16_t *zone, int *);
>  int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
> @@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
>  int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
>                                      uint16_t dl_type, uint8_t nw_proto,
>                                      char **tp_name, bool *is_generic);
> +int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features);
>  
>  #endif /* CT_DPIF_H */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 816945375..49afdcc3c 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -8502,6 +8502,7 @@ const struct dpif_class dpif_netdev_class = {
>      NULL,                       /* ct_timeout_policy_dump_next */
>      NULL,                       /* ct_timeout_policy_dump_done */
>      dpif_netdev_ct_get_timeout_policy_name,
> +    NULL,                       /* ct_get_features */
>      dpif_netdev_ipf_set_enabled,
>      dpif_netdev_ipf_set_min_frag,
>      dpif_netdev_ipf_set_max_nfrags,
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index ceb56c685..623902b70 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -3161,6 +3161,16 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
>      return 0;
>  }
>  
> +static int
> +dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
> +                             enum ct_features *features)
> +{
> +    if (features != NULL) {
> +        *features = CONNTRACK_F_ZERO_SNAT;
> +    }
> +    return 0;
> +}
> +
>  #define CT_DPIF_NL_TP_TCP_MAPPINGS                              \
>      CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT)         \
>      CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV)         \
> @@ -4003,6 +4013,7 @@ const struct dpif_class dpif_netlink_class = {
>      dpif_netlink_ct_timeout_policy_dump_next,
>      dpif_netlink_ct_timeout_policy_dump_done,
>      dpif_netlink_ct_get_timeout_policy_name,
> +    dpif_netlink_ct_get_features,
>      NULL,                       /* ipf_set_enabled */
>      NULL,                       /* ipf_set_min_frag */
>      NULL,                       /* ipf_set_max_nfrags */
> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
> index b817fceac..59e0a3a9d 100644
> --- a/lib/dpif-provider.h
> +++ b/lib/dpif-provider.h
> @@ -81,6 +81,7 @@ struct ct_dpif_dump_state;
>  struct ct_dpif_entry;
>  struct ct_dpif_tuple;
>  struct ct_dpif_timeout_policy;
> +enum ct_features;
>  
>  /* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in
>   * sync with 'ipf_proto_status' and 'ipf_status', but more
> @@ -562,6 +563,10 @@ struct dpif_class {
>                                        uint16_t dl_type, uint8_t nw_proto,
>                                        char **tp_name, bool *is_generic);
>  
> +    /* Stores the conntrack features supported by 'dpif' into features.
> +     * The value is a bitmap of CONNTRACK_F_* bits. */
> +    int (*ct_get_features)(struct dpif *, enum ct_features *features);
> +
>      /* IP Fragmentation. */
>  
>      /* Disables or enables conntrack fragment reassembly.  The default
> diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
> index a2778de4b..a0070e6c6 100644
> --- a/lib/ovs-actions.xml
> +++ b/lib/ovs-actions.xml
> @@ -1833,6 +1833,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
>              connection, will behave the same as a bare <code>nat</code>.
>            </p>
>  
> +          <p>
> +            For SNAT, there is a special case when the <code>src</code> IP
> +            address is configured as all 0's, i.e.,
> +            <code>nat(src=0.0.0.0)</code>. In this case, when a source port
> +            collision is detected during the commit, the source port will be
> +            translated to an ephemeral port. If there is no collision, no SNAT
> +            is performed. Note that this is currently only implemented in the
> +            Linux kernel datapath.
> +          </p>
> +
>            <p>
>              Open vSwitch 2.6 introduced <code>nat</code>.  Linux 4.6 was the
>              earliest upstream kernel that implemented <code>ct</code> support for
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index fd0b2fdea..5ef711a7c 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer)
>      return !error;
>  }
>  
> +/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */
> +static bool
> +dpif_supports_ct_zero_snat(struct dpif_backer *backer)
> +{
> +    enum ct_features features;
> +    bool supported = false;
> +
> +    if (!ct_dpif_get_features(backer->dpif, &features)) {
> +        if (features & CONNTRACK_F_ZERO_SNAT) {
> +            supported = true;
> +        }
> +    }
> +    VLOG_INFO("%s: Datapath %s ct_zero_snat",
> +              dpif_name(backer->dpif), (supported) ? "supports"
> +                                                   : "does not support");
> +    return supported;
> +}
> +
>  /* Tests whether 'backer''s datapath supports the
>   * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
>  static bool
> @@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer)
>      backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
>      backer->rt_support.explicit_drop_action =
>          dpif_supports_explicit_drop_action(backer->dpif);
> -    backer->rt_support.lb_output_action=
> +    backer->rt_support.lb_output_action =
>          dpif_supports_lb_output_action(backer->dpif);
> +    backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer);
>  
>      /* Flow fields. */
>      backer->rt_support.odp.ct_state = check_ct_state(backer);
> @@ -5603,6 +5622,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap)
>      smap_add(cap, "explicit_drop_action",
>               s.explicit_drop_action ? "true" :"false");
>      smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
> +    smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false");
>  }
>  
>  /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
> diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
> index b41c3d82a..191cfcb0d 100644
> --- a/ofproto/ofproto-dpif.h
> +++ b/ofproto/ofproto-dpif.h
> @@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
>      DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action")  \
>                                                                              \
>      /* True if the datapath supports balance_tcp optimization */            \
> -    DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")
> +    DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\
> +                                                                            \
> +    /* True if the datapath supports all-zero IP SNAT. */                   \
> +    DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT")
>  
>  
>  /* Stores the various features which the corresponding backer supports. */
> diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
> index 15628a7c6..812e7d99b 100644
> --- a/tests/system-kmod-macros.at
> +++ b/tests/system-kmod-macros.at
> @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
>  #
>  m4_define([CHECK_CONNTRACK_NAT])
>  
> +# CHECK_CONNTRACK_ZEROIP_SNAT()
> +#
> +# Perform requirements checks for running conntrack all-zero IP SNAT tests.
> +# The kernel always supports all-zero IP SNAT, so no check is needed.
> +#
> +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT])
> +
>  # CHECK_CONNTRACK_TIMEOUT()
>  #
>  # Perform requirements checks for running conntrack customized timeout tests.
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index fb5b9a36d..3d7f4751e 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -4433,6 +4433,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +
> +AT_SETUP([conntrack - all-zero IP SNAT])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +CHECK_CONNTRACK()
> +CHECK_CONNTRACK_ZEROIP_SNAT()
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
> +
> +OVS_START_L7([at_ns1], [http])
> +
> +AT_DATA([flows.txt], [dnl
> +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
> +table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
> +table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10)
> +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
> +table=0,priority=10,arp,action=normal
> +table=0,priority=1,action=drop
> +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat)
> +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
> +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
> +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
> +])
> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
> +
> +dnl - Test to make sure src nat is NOT done when not needed
> +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log])
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl
> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
> +])
> +
> +dnl - Test to make sure src nat is done when needed
> +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log])
> +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log])
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], [0], [dnl
> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
> +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +
>  AT_SETUP([conntrack - simple DNAT])
>  CHECK_CONNTRACK()
>  CHECK_CONNTRACK_NAT()
> diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
> index 34f82cee3..9f0d38dfb 100644
> --- a/tests/system-userspace-macros.at
> +++ b/tests/system-userspace-macros.at
> @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
>  #
>  m4_define([CHECK_CONNTRACK_NAT])
>  
> +# CHECK_CONNTRACK_ZEROIP_SNAT()
> +#
> +# Perform requirements checks for running conntrack all-zero IP SNAT tests.
> +# The userspace datapath does not support all-zero IP SNAT.
> +#
> +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT],
> +[
> +    AT_SKIP_IF([:])
> +])

I didn't check too close, but maybe it's possible to make this check the
capabilities bits and then it could be extended to everywhere.

> +
>  # CHECK_CONNTRACK_TIMEOUT()
>  #
>  # Perform requirements checks for running conntrack customized timeout tests.
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 4597a215d..d8ea287d5 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -6126,6 +6126,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
>          True if the datapath supports OVS_ACTION_ATTR_DROP.  If false,
>          explicit drop action will not be sent to the datapath.
>        </column>
> +      <column name="capabilities" key="ct_zero_snat"
> +              type='{"type": "boolean"}'>
> +        True if the datapath supports all-zero SNAT. This is a special case
> +        if the <code>src</code> IP address is configured as all 0's, i.e.,
> +        <code>nat(src=0.0.0.0)</code>. In this case, when a source port
> +        collision is detected during the commit, the source port will be
> +        translated to an ephemeral port. If there is no collision, no SNAT
> +        is performed.
> +      </column>
>      </group>
>  
>      <group title="Common Columns">



More information about the dev mailing list