[ovs-dev] [patch_v4 2/6] Parse NAT netlink for userspace datapath.
Darrell Ball
dball at vmware.com
Wed Feb 8 02:06:42 UTC 2017
On 1/27/17, 5:57 PM, "ovs-dev-bounces at openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces at openvswitch.org on behalf of diproiettod at ovn.org> wrote:
2017-01-24 10:50 GMT-08:00 Darrell Ball <dlu998 at gmail.com>:
> Signed-off-by: Darrell Ball <dlu998 at gmail.com>
> ---
> lib/conntrack-private.h | 9 ------
> lib/conntrack.c | 3 +-
> lib/conntrack.h | 31 +++++++++++++++++-
> lib/dpif-netdev.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++---
> tests/test-conntrack.c | 8 +++--
> 5 files changed, 118 insertions(+), 18 deletions(-)
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 013f19f..493865f 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -29,15 +29,6 @@
> #include "packets.h"
> #include "unaligned.h"
>
> -struct ct_addr {
> - union {
> - ovs_16aligned_be32 ipv4;
> - union ovs_16aligned_in6_addr ipv6;
> - ovs_be32 ipv4_aligned;
> - struct in6_addr ipv6_aligned;
> - };
> -};
> -
> struct ct_endpoint {
> struct ct_addr addr;
> union {
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 9bea3d9..bae42a3 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -273,7 +273,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
> ovs_be16 dl_type, bool commit, uint16_t zone,
> const uint32_t *setmark,
> const struct ovs_key_ct_labels *setlabel,
> - const char *helper)
> + const char *helper,
> + const struct nat_action_info_t *nat_action_info OVS_UNUSED)
> {
> struct dp_packet **pkts = pkt_batch->packets;
> size_t cnt = pkt_batch->count;
> diff --git a/lib/conntrack.h b/lib/conntrack.h
> index 254f61c..cbdfb91 100644
> --- a/lib/conntrack.h
> +++ b/lib/conntrack.h
> @@ -26,6 +26,8 @@
> #include "openvswitch/thread.h"
> #include "openvswitch/types.h"
> #include "ovs-atomic.h"
> +#include "ovs-thread.h"
> +#include "packets.h"
>
> /* Userspace connection tracker
> * ============================
> @@ -61,6 +63,32 @@ struct dp_packet_batch;
>
> struct conntrack;
>
> +struct ct_addr {
> + union {
> + ovs_16aligned_be32 ipv4;
> + union ovs_16aligned_in6_addr ipv6;
> + ovs_be32 ipv4_aligned;
> + struct in6_addr ipv6_aligned;
> + };
> +};
> +
> +// Both NAT_ACTION_* and NAT_ACTION_*_PORT can be set
We normally don't use // comments
That’s meant to be a temporary comment in the context of OVS.
> +enum nat_action_e {
> + NAT_ACTION = 1 << 0,
> + NAT_ACTION_SRC = 1 << 1,
> + NAT_ACTION_SRC_PORT = 1 << 2,
> + NAT_ACTION_DST = 1 << 3,
> + NAT_ACTION_DST_PORT = 1 << 4,
> +};
This is indented by tabs, instead of 4 whitespaces.
ack
Is NAT_ACTION really necessary? I think it should always be set when
nat_action_info is != NULL, so we can probably remove it.
There is not much if any semantic benefit to keeping it; I just removed it.
> +
> +struct nat_action_info_t {
> + struct ct_addr min_addr;
> + struct ct_addr max_addr;
> + uint16_t min_port;
> + uint16_t max_port;
Tabs
ack
> + uint16_t nat_action;
> +};
> +
> void conntrack_init(struct conntrack *);
> void conntrack_destroy(struct conntrack *);
>
> @@ -68,7 +96,8 @@ int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
> ovs_be16 dl_type, bool commit,
> uint16_t zone, const uint32_t *setmark,
> const struct ovs_key_ct_labels *setlabel,
> - const char *helper);
> + const char *helper,
> + const struct nat_action_info_t *nat_action_info);
>
> struct conntrack_dump {
> struct conntrack *ct;
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 3901129..a71c766 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -97,7 +97,8 @@ static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
> static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
>
> #define DP_NETDEV_CS_SUPPORTED_MASK (CS_NEW | CS_ESTABLISHED | CS_RELATED \
> - | CS_INVALID | CS_REPLY_DIR | CS_TRACKED)
> + | CS_INVALID | CS_REPLY_DIR | CS_TRACKED \
> + | CS_SRC_NAT | CS_DST_NAT)
> #define DP_NETDEV_CS_UNSUPPORTED_MASK (~(uint32_t)DP_NETDEV_CS_SUPPORTED_MASK)
>
> static struct odp_support dp_netdev_support = {
> @@ -4681,7 +4682,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> const char *helper = NULL;
> const uint32_t *setmark = NULL;
> const struct ovs_key_ct_labels *setlabel = NULL;
> -
> + struct nat_action_info_t nat_action_info;
> + bool nat = false;
> + memset(&nat_action_info, 0, sizeof nat_action_info);
As discussed offline, can this memset be moved inside the OVS_CT_ATTR_NAT case?
There is some performance benefit to the non-nat CT case to moving it and very minor loss
of code clarity, so I moved it. Thanks
> NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a),
> nl_attr_get_size(a)) {
> enum ovs_ct_attr sub_type = nl_attr_type(b);
> @@ -4702,15 +4705,89 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> case OVS_CT_ATTR_LABELS:
> setlabel = nl_attr_get(b);
> break;
> - case OVS_CT_ATTR_NAT:
> + case OVS_CT_ATTR_NAT: {
> + const struct nlattr *b_nest;
> + unsigned int left_nest;
> + bool ip_min_specified = false;
> + bool proto_num_min_specified = false;
> + bool ip_max_specified = false;
> + bool proto_num_max_specified = false;
> +
> + NL_NESTED_FOR_EACH_UNSAFE (b_nest, left_nest, b) {
> + enum ovs_nat_attr sub_type_nest = nl_attr_type(b_nest);
> +
> + switch(sub_type_nest) {
> + case OVS_NAT_ATTR_SRC:
> + case OVS_NAT_ATTR_DST:
> + nat = true;
> + nat_action_info.nat_action |= NAT_ACTION;
> + nat_action_info.nat_action |=
> + ((sub_type_nest == OVS_NAT_ATTR_SRC)
> + ? NAT_ACTION_SRC : NAT_ACTION_DST);
> + break;
> + case OVS_NAT_ATTR_IP_MIN:
> + memcpy(&nat_action_info.min_addr,
> + (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
> + ip_min_specified = true;
> + break;
> + case OVS_NAT_ATTR_IP_MAX:
> + memcpy(&nat_action_info.max_addr,
> + (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
> + ip_max_specified = true;
> + break;
> + case OVS_NAT_ATTR_PROTO_MIN:
> + nat_action_info.min_port = nl_attr_get_u16(b_nest);
> + proto_num_min_specified = true;
> + break;
> + case OVS_NAT_ATTR_PROTO_MAX:
> + nat_action_info.max_port = nl_attr_get_u16(b_nest);
> + proto_num_max_specified = true;
> + break;
> + case OVS_NAT_ATTR_PERSISTENT:
> + case OVS_NAT_ATTR_PROTO_HASH:
> + case OVS_NAT_ATTR_PROTO_RANDOM:
> + break;
> + case OVS_NAT_ATTR_UNSPEC:
> + case __OVS_NAT_ATTR_MAX:
> + OVS_NOT_REACHED();
> + }
> + }
> +
> + if (!nat_action_info.nat_action) {
> + nat_action_info.nat_action = NAT_ACTION;
> + }
> + if (ip_min_specified && !ip_max_specified) {
> + memcpy(&nat_action_info.max_addr,
> + &nat_action_info.min_addr,
> + sizeof(nat_action_info.max_addr));
> + }
> + if (proto_num_min_specified && !proto_num_max_specified) {
> + nat_action_info.max_port = nat_action_info.min_port;
> + }
> + if (proto_num_min_specified || proto_num_max_specified) {
> + if (nat_action_info.nat_action & NAT_ACTION_SRC) {
> + nat_action_info.nat_action |= NAT_ACTION_SRC_PORT;
> + } else if (nat_action_info.nat_action & NAT_ACTION_DST) {
> + nat_action_info.nat_action |= NAT_ACTION_DST_PORT;
> + }
> + }
> +
> + /* Additional sanity checks can be added. */
> + break;
> + }
> +
> case OVS_CT_ATTR_UNSPEC:
> case __OVS_CT_ATTR_MAX:
> OVS_NOT_REACHED();
> }
> }
>
> + if (nat && !commit) {
> + VLOG_WARN("NAT specified without commit.");
> + }
> +
The indentation here looks weird.
ack
Can we rate limit the log message?
As discussed offline, the log message can only be hit via
1) A configuration error
2) The configuration error should only be possible via a backdoor
Access (dpctl)
Hence not much real impact
If it is exercised under such a case, the user probably wants to know
asap.
Hence I kept it as is.
I did find a pre-existing log message in the file, which is more concerning that I will fix
separately.
> conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit,
> - zone, setmark, setlabel, helper);
> + zone, setmark, setlabel, helper, &nat_action_info);
> break;
> }
>
> diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
> index 803e2b9..a8b2e48 100644
> --- a/tests/test-conntrack.c
> +++ b/tests/test-conntrack.c
> @@ -91,7 +91,8 @@ ct_thread_main(void *aux_)
> pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
> ovs_barrier_block(&barrier);
> for (i = 0; i < n_pkts; i += batch_size) {
> - conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL);
> + conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL,
> + NULL, NULL);
> }
> ovs_barrier_block(&barrier);
> destroy_packets(pkt_batch);
> @@ -176,14 +177,15 @@ pcap_batch_execute_conntrack(struct conntrack *ct,
>
> if (flow.dl_type != dl_type) {
> conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
> - NULL);
> + NULL, NULL);
> dp_packet_batch_init(&new_batch);
> }
> new_batch.packets[new_batch.count++] = pkt;
> }
>
> if (new_batch.count) {
> - conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL);
> + conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
> + NULL, NULL);
> }
>
> }
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=Szp03gLqIqErEYswv2Uc9FKb7t1iD4lKzK3r-vXDMvQ&s=YZ5aCBmfgi5dEWKm4KEIGt9zGDnJa1Kmy4a0xwVkCAY&e=
2017-01-24 20:40 GMT-08:00 Darrell Ball <dlu998 at gmail.com>:
> Signed-off-by: Darrell Ball <dlu998 at gmail.com>
> ---
> lib/conntrack-private.h | 9 ------
> lib/conntrack.c | 3 +-
> lib/conntrack.h | 29 ++++++++++++++++-
> lib/dpif-netdev.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++---
> tests/test-conntrack.c | 8 +++--
> 5 files changed, 116 insertions(+), 18 deletions(-)
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 013f19f..493865f 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -29,15 +29,6 @@
> #include "packets.h"
> #include "unaligned.h"
>
> -struct ct_addr {
> - union {
> - ovs_16aligned_be32 ipv4;
> - union ovs_16aligned_in6_addr ipv6;
> - ovs_be32 ipv4_aligned;
> - struct in6_addr ipv6_aligned;
> - };
> -};
> -
> struct ct_endpoint {
> struct ct_addr addr;
> union {
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 9bea3d9..0a611a2 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -273,7 +273,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
> ovs_be16 dl_type, bool commit, uint16_t zone,
> const uint32_t *setmark,
> const struct ovs_key_ct_labels *setlabel,
> - const char *helper)
> + const char *helper,
> + const struct nat_action_info_t *nat_action_info OVS_UNUSED)
> {
> struct dp_packet **pkts = pkt_batch->packets;
> size_t cnt = pkt_batch->count;
> diff --git a/lib/conntrack.h b/lib/conntrack.h
> index 254f61c..471e529 100644
> --- a/lib/conntrack.h
> +++ b/lib/conntrack.h
> @@ -26,6 +26,7 @@
> #include "openvswitch/thread.h"
> #include "openvswitch/types.h"
> #include "ovs-atomic.h"
> +#include "packets.h"
>
> /* Userspace connection tracker
> * ============================
> @@ -61,6 +62,31 @@ struct dp_packet_batch;
>
> struct conntrack;
>
> +struct ct_addr {
> + union {
> + ovs_16aligned_be32 ipv4;
> + union ovs_16aligned_in6_addr ipv6;
> + ovs_be32 ipv4_aligned;
> + struct in6_addr ipv6_aligned;
> + };
> +};
> +
> +enum nat_action_e {
> + NAT_ACTION = 1 << 0,
> + NAT_ACTION_SRC = 1 << 1,
> + NAT_ACTION_SRC_PORT = 1 << 2,
> + NAT_ACTION_DST = 1 << 3,
> + NAT_ACTION_DST_PORT = 1 << 4,
> +};
> +
> +struct nat_action_info_t {
> + struct ct_addr min_addr;
> + struct ct_addr max_addr;
> + uint16_t min_port;
> + uint16_t max_port;
> + uint16_t nat_action;
> +};
> +
> void conntrack_init(struct conntrack *);
> void conntrack_destroy(struct conntrack *);
>
> @@ -68,7 +94,8 @@ int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
> ovs_be16 dl_type, bool commit,
> uint16_t zone, const uint32_t *setmark,
> const struct ovs_key_ct_labels *setlabel,
> - const char *helper);
> + const char *helper,
> + const struct nat_action_info_t *nat_action_info);
>
> struct conntrack_dump {
> struct conntrack *ct;
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 42631bc..0ca4291 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -97,7 +97,8 @@ static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
> static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
>
> #define DP_NETDEV_CS_SUPPORTED_MASK (CS_NEW | CS_ESTABLISHED | CS_RELATED \
> - | CS_INVALID | CS_REPLY_DIR | CS_TRACKED)
> + | CS_INVALID | CS_REPLY_DIR | CS_TRACKED \
> + | CS_SRC_NAT | CS_DST_NAT)
> #define DP_NETDEV_CS_UNSUPPORTED_MASK (~(uint32_t)DP_NETDEV_CS_SUPPORTED_MASK)
>
> static struct odp_support dp_netdev_support = {
> @@ -4688,7 +4689,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> const char *helper = NULL;
> const uint32_t *setmark = NULL;
> const struct ovs_key_ct_labels *setlabel = NULL;
> -
> + struct nat_action_info_t nat_action_info;
> + bool nat = false;
> + memset(&nat_action_info, 0, sizeof nat_action_info);
> NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a),
> nl_attr_get_size(a)) {
> enum ovs_ct_attr sub_type = nl_attr_type(b);
> @@ -4709,15 +4712,89 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> case OVS_CT_ATTR_LABELS:
> setlabel = nl_attr_get(b);
> break;
> - case OVS_CT_ATTR_NAT:
> + case OVS_CT_ATTR_NAT: {
> + const struct nlattr *b_nest;
> + unsigned int left_nest;
> + bool ip_min_specified = false;
> + bool proto_num_min_specified = false;
> + bool ip_max_specified = false;
> + bool proto_num_max_specified = false;
> +
> + NL_NESTED_FOR_EACH_UNSAFE (b_nest, left_nest, b) {
> + enum ovs_nat_attr sub_type_nest = nl_attr_type(b_nest);
> +
> + switch(sub_type_nest) {
> + case OVS_NAT_ATTR_SRC:
> + case OVS_NAT_ATTR_DST:
> + nat = true;
> + nat_action_info.nat_action |= NAT_ACTION;
> + nat_action_info.nat_action |=
> + ((sub_type_nest == OVS_NAT_ATTR_SRC)
> + ? NAT_ACTION_SRC : NAT_ACTION_DST);
> + break;
> + case OVS_NAT_ATTR_IP_MIN:
> + memcpy(&nat_action_info.min_addr,
> + (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
> + ip_min_specified = true;
> + break;
> + case OVS_NAT_ATTR_IP_MAX:
> + memcpy(&nat_action_info.max_addr,
> + (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
> + ip_max_specified = true;
> + break;
> + case OVS_NAT_ATTR_PROTO_MIN:
> + nat_action_info.min_port = nl_attr_get_u16(b_nest);
> + proto_num_min_specified = true;
> + break;
> + case OVS_NAT_ATTR_PROTO_MAX:
> + nat_action_info.max_port = nl_attr_get_u16(b_nest);
> + proto_num_max_specified = true;
> + break;
> + case OVS_NAT_ATTR_PERSISTENT:
> + case OVS_NAT_ATTR_PROTO_HASH:
> + case OVS_NAT_ATTR_PROTO_RANDOM:
> + break;
> + case OVS_NAT_ATTR_UNSPEC:
> + case __OVS_NAT_ATTR_MAX:
> + OVS_NOT_REACHED();
> + }
> + }
> +
> + if (!nat_action_info.nat_action) {
> + nat_action_info.nat_action = NAT_ACTION;
> + }
> + if (ip_min_specified && !ip_max_specified) {
> + memcpy(&nat_action_info.max_addr,
> + &nat_action_info.min_addr,
> + sizeof(nat_action_info.max_addr));
> + }
> + if (proto_num_min_specified && !proto_num_max_specified) {
> + nat_action_info.max_port = nat_action_info.min_port;
> + }
> + if (proto_num_min_specified || proto_num_max_specified) {
> + if (nat_action_info.nat_action & NAT_ACTION_SRC) {
> + nat_action_info.nat_action |= NAT_ACTION_SRC_PORT;
> + } else if (nat_action_info.nat_action & NAT_ACTION_DST) {
> + nat_action_info.nat_action |= NAT_ACTION_DST_PORT;
> + }
> + }
> +
> + /* Additional sanity checks can be added. */
> + break;
> + }
> +
> case OVS_CT_ATTR_UNSPEC:
> case __OVS_CT_ATTR_MAX:
> OVS_NOT_REACHED();
> }
> }
>
> + if (nat && !commit) {
> + VLOG_WARN("NAT specified without commit.");
> + }
> +
> conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit,
> - zone, setmark, setlabel, helper);
> + zone, setmark, setlabel, helper, &nat_action_info);
> break;
> }
>
> diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
> index 803e2b9..a8b2e48 100644
> --- a/tests/test-conntrack.c
> +++ b/tests/test-conntrack.c
> @@ -91,7 +91,8 @@ ct_thread_main(void *aux_)
> pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
> ovs_barrier_block(&barrier);
> for (i = 0; i < n_pkts; i += batch_size) {
> - conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL);
> + conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL,
> + NULL, NULL);
> }
> ovs_barrier_block(&barrier);
> destroy_packets(pkt_batch);
> @@ -176,14 +177,15 @@ pcap_batch_execute_conntrack(struct conntrack *ct,
>
> if (flow.dl_type != dl_type) {
> conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
> - NULL);
> + NULL, NULL);
> dp_packet_batch_init(&new_batch);
> }
> new_batch.packets[new_batch.count++] = pkt;
> }
>
> if (new_batch.count) {
> - conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL);
> + conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
> + NULL, NULL);
> }
>
> }
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=Szp03gLqIqErEYswv2Uc9FKb7t1iD4lKzK3r-vXDMvQ&s=YZ5aCBmfgi5dEWKm4KEIGt9zGDnJa1Kmy4a0xwVkCAY&e=
_______________________________________________
dev mailing list
dev at openvswitch.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=Szp03gLqIqErEYswv2Uc9FKb7t1iD4lKzK3r-vXDMvQ&s=YZ5aCBmfgi5dEWKm4KEIGt9zGDnJa1Kmy4a0xwVkCAY&e=
More information about the dev
mailing list