[ovs-dev] [PATCH v2 13/22] datapath: Add original direction conntrack tuple to sw_flow_key.
Joe Stringer
joe at ovn.org
Fri Mar 3 01:57:01 UTC 2017
On 28 February 2017 at 17:17, Jarno Rajahalme <jarno at ovn.org> wrote:
> Upstream commit:
>
> commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc
> Author: Jarno Rajahalme <jarno at ovn.org>
> Date: Thu Feb 9 11:21:59 2017 -0800
>
> openvswitch: Add original direction conntrack tuple to sw_flow_key.
>
> Add the fields of the conntrack original direction 5-tuple to struct
> sw_flow_key. The new fields are initially marked as non-existent, and
> are populated whenever a conntrack action is executed and either finds
> or generates a conntrack entry. This means that these fields exist
> for all packets that were not rejected by conntrack as untrackable.
>
> The original tuple fields in the sw_flow_key are filled from the
> original direction tuple of the conntrack entry relating to the
> current packet, or from the original direction tuple of the master
> conntrack entry, if the current conntrack entry has a master.
> Generally, expected connections of connections having an assigned
> helper (e.g., FTP), have a master conntrack entry.
>
> The main purpose of the new conntrack original tuple fields is to
> allow matching on them for policy decision purposes, with the premise
> that the admissibility of tracked connections reply packets (as well
> as original direction packets), and both direction packets of any
> related connections may be based on ACL rules applying to the master
> connection's original direction 5-tuple. This also makes it easier to
> make policy decisions when the actual packet headers might have been
> transformed by NAT, as the original direction 5-tuple represents the
> packet headers before any such transformation.
>
> When using the original direction 5-tuple the admissibility of return
> and/or related packets need not be based on the mere existence of a
> conntrack entry, allowing separation of admission policy from the
> established conntrack state. While existence of a conntrack entry is
> required for admission of the return or related packets, policy
> changes can render connections that were initially admitted to be
> rejected or dropped afterwards. If the admission of the return and
> related packets was based on mere conntrack state (e.g., connection
> being in an established state), a policy change that would make the
> connection rejected or dropped would need to find and delete all
> conntrack entries affected by such a change. When using the original
> direction 5-tuple matching the affected conntrack entries can be
> allowed to time out instead, as the established state of the
> connection would not need to be the basis for packet admission any
> more.
>
> It should be noted that the directionality of related connections may
> be the same or different than that of the master connection, and
> neither the original direction 5-tuple nor the conntrack state bits
> carry this information. If needed, the directionality of the master
> connection can be stored in master's conntrack mark or labels, which
> are automatically inherited by the expected related connections.
>
> The fact that neither ARP nor ND packets are trackable by conntrack
> allows mutual exclusion between ARP/ND and the new conntrack original
> tuple fields. Hence, the IP addresses are overlaid in union with ARP
> and ND fields. This allows the sw_flow_key to not grow much due to
> this patch, but it also means that we must be careful to never use the
> new key fields with ARP or ND packets. ARP is easy to distinguish and
> keep mutually exclusive based on the ethernet type, but ND being an
> ICMPv6 protocol requires a bit more attention.
>
> Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
> Acked-by: Joe Stringer <joe at ovn.org>
> Acked-by: Pravin B Shelar <pshelar at ovn.org>
> Signed-off-by: David S. Miller <davem at davemloft.net>
>
> Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
> ---
I had to roll in the following incremental (derived from your later
patch) to fix the build with this commit:
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 1f6812a6dd02..50bbafaa0231 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -381,6 +381,8 @@ odp_execute_set_action(struct dp_packet *packet,
const struct nlattr *a)
case OVS_KEY_ATTR_VLAN:
case OVS_KEY_ATTR_TCP_FLAGS:
case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
@@ -476,6 +478,8 @@ odp_execute_masked_set_action(struct dp_packet *packet,
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IN_PORT:
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 41067385e821..1f1512ae47fd 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -150,6 +150,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr,
char *namebuf, size_t bufsize)
case OVS_KEY_ATTR_CT_ZONE: return "ct_zone";
case OVS_KEY_ATTR_CT_MARK: return "ct_mark";
case OVS_KEY_ATTR_CT_LABELS: return "ct_label";
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: return "ct_tuple4";
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: return "ct_tuple6";
case OVS_KEY_ATTR_TUNNEL: return "tunnel";
case OVS_KEY_ATTR_IN_PORT: return "in_port";
case OVS_KEY_ATTR_ETHERNET: return "eth";
@@ -1874,6 +1876,8 @@ static const struct attr_len_tbl
ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] =
[OVS_KEY_ATTR_CT_ZONE] = { .len = 2 },
[OVS_KEY_ATTR_CT_MARK] = { .len = 4 },
[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
+ [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { .len = sizeof(struct
ovs_key_ct_tuple_ipv4) },
+ [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct
ovs_key_ct_tuple_ipv6) },
};
/* Returns the correct length of the payload for a flow key attribute of the
@@ -2823,6 +2827,40 @@ format_odp_key_attr(const struct nlattr *a,
const struct nlattr *ma,
break;
}
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: {
+ const struct ovs_key_ct_tuple_ipv4 *key = nl_attr_get(a);
+ const struct ovs_key_ct_tuple_ipv4 *mask = ma ? nl_attr_get(ma) : NULL;
+
+ format_ipv4(ds, "src", key->ipv4_src, MASK(mask, ipv4_src), verbose);
+ format_ipv4(ds, "dst", key->ipv4_dst, MASK(mask, ipv4_dst), verbose);
+ format_u8u(ds, "proto", key->ipv4_proto, MASK(mask, ipv4_proto),
+ verbose);
+ format_be16(ds, "tp_src", key->src_port, MASK(mask, src_port),
+ verbose);
+ format_be16(ds, "tp_dst", key->dst_port, MASK(mask, dst_port),
+ verbose);
+ ds_chomp(ds, ',');
+ break;
+ }
+
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: {
+ const struct ovs_key_ct_tuple_ipv6 *key = nl_attr_get(a);
+ const struct ovs_key_ct_tuple_ipv6 *mask = ma ? nl_attr_get(ma) : NULL;
+
+ format_in6_addr(ds, "src", &key->ipv6_src, MASK(mask, ipv6_src),
+ verbose);
+ format_in6_addr(ds, "dst", &key->ipv6_dst, MASK(mask, ipv6_dst),
+ verbose);
+ format_u8u(ds, "proto", key->ipv6_proto, MASK(mask, ipv6_proto),
+ verbose);
+ format_be16(ds, "src_port", key->src_port, MASK(mask, src_port),
+ verbose);
+ format_be16(ds, "dst_port", key->dst_port, MASK(mask, dst_port),
+ verbose);
+ ds_chomp(ds, ',');
+ break;
+ }
+
case OVS_KEY_ATTR_TUNNEL:
format_odp_tun_attr(a, ma, ds, verbose);
break;
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index 520b8dd196bb..69cdf69f4b39 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -1025,6 +1025,8 @@ sflow_read_set_action(const struct nlattr *attr,
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
> datapath/actions.c | 2 +
> datapath/conntrack.c | 86 +++++++++++++++++++++--
> datapath/conntrack.h | 10 ++-
> datapath/flow.c | 34 +++++++--
> datapath/flow.h | 49 ++++++++++---
> datapath/flow_netlink.c | 85 ++++++++++++++++------
> datapath/flow_netlink.h | 7 +-
> datapath/linux/compat/include/linux/openvswitch.h | 18 +++++
> 8 files changed, 246 insertions(+), 45 deletions(-)
>
> diff --git a/datapath/actions.c b/datapath/actions.c
> index 82833d0..71ec14c 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -1011,6 +1011,8 @@ static int execute_masked_set_action(struct sk_buff *skb,
> case OVS_KEY_ATTR_CT_ZONE:
> case OVS_KEY_ATTR_CT_MARK:
> case OVS_KEY_ATTR_CT_LABELS:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
> err = -EINVAL;
> break;
> }
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index 16a7773..d8309c9 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -163,6 +163,20 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
> memset(labels, 0, OVS_CT_LABELS_LEN);
> }
>
> +static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
> + const struct nf_conntrack_tuple *orig,
> + u8 icmp_proto)
> +{
> + key->ct.orig_proto = orig->dst.protonum;
> + if (orig->dst.protonum == icmp_proto) {
> + key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
> + key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
> + } else {
> + key->ct.orig_tp.src = orig->src.u.all;
> + key->ct.orig_tp.dst = orig->dst.u.all;
> + }
> +}
> +
> static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
> const struct nf_conntrack_zone *zone,
> const struct nf_conn *ct)
> @@ -171,6 +185,35 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
> key->ct.zone = zone->id;
> key->ct.mark = ovs_ct_get_mark(ct);
> ovs_ct_get_labels(ct, &key->ct.labels);
> +
> + if (ct) {
> + const struct nf_conntrack_tuple *orig;
> +
> + /* Use the master if we have one. */
> + if (ct->master)
> + ct = ct->master;
> + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
> +
> + /* IP version must match with the master connection. */
> + if (key->eth.type == htons(ETH_P_IP) &&
> + nf_ct_l3num(ct) == NFPROTO_IPV4) {
> + key->ipv4.ct_orig.src = orig->src.u3.ip;
> + key->ipv4.ct_orig.dst = orig->dst.u3.ip;
> + __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
> + return;
> + } else if (key->eth.type == htons(ETH_P_IPV6) &&
> + !sw_flow_key_is_nd(key) &&
> + nf_ct_l3num(ct) == NFPROTO_IPV6) {
> + key->ipv6.ct_orig.src = orig->src.u3.in6;
> + key->ipv6.ct_orig.dst = orig->dst.u3.in6;
> + __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
> + return;
> + }
> + }
> + /* Clear 'ct.orig_proto' to mark the non-existence of conntrack
> + * original direction key fields.
> + */
> + key->ct.orig_proto = 0;
> }
>
> /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
> @@ -224,24 +267,55 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
> ovs_ct_update_key(skb, NULL, key, false, false);
> }
>
> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
> +#define IN6_ADDR_INITIALIZER(ADDR) \
> + { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
> + (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
> +
> +int ovs_ct_put_key(const struct sw_flow_key *swkey,
> + const struct sw_flow_key *output, struct sk_buff *skb)
> {
> - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
> + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state))
> return -EMSGSIZE;
>
> if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
> - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
> + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone))
> return -EMSGSIZE;
>
> if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
> - nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
> + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
> return -EMSGSIZE;
>
> if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> - nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
> - &key->ct.labels))
> + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
> + &output->ct.labels))
> return -EMSGSIZE;
>
> + if (swkey->ct.orig_proto) {
> + if (swkey->eth.type == htons(ETH_P_IP)) {
> + struct ovs_key_ct_tuple_ipv4 orig = {
> + output->ipv4.ct_orig.src,
> + output->ipv4.ct_orig.dst,
> + output->ct.orig_tp.src,
> + output->ct.orig_tp.dst,
> + output->ct.orig_proto,
> + };
> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
> + sizeof(orig), &orig))
> + return -EMSGSIZE;
> + } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
> + struct ovs_key_ct_tuple_ipv6 orig = {
> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
> + output->ct.orig_tp.src,
> + output->ct.orig_tp.dst,
> + output->ct.orig_proto,
> + };
> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
> + sizeof(orig), &orig))
> + return -EMSGSIZE;
> + }
> + }
> +
> return 0;
> }
>
> diff --git a/datapath/conntrack.h b/datapath/conntrack.h
> index 15dbf0a..2bd753d 100644
> --- a/datapath/conntrack.h
> +++ b/datapath/conntrack.h
> @@ -33,7 +33,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
> const struct ovs_conntrack_info *);
>
> void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
> +int ovs_ct_put_key(const struct sw_flow_key *swkey,
> + const struct sw_flow_key *output, struct sk_buff *skb);
> void ovs_ct_free_action(const struct nlattr *a);
>
> #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
> @@ -80,9 +81,14 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,
> key->ct.zone = 0;
> key->ct.mark = 0;
> memset(&key->ct.labels, 0, sizeof(key->ct.labels));
> + /* Clear 'ct.orig_proto' to mark the non-existence of original
> + * direction key fields.
> + */
> + key->ct.orig_proto = 0;
> }
>
> -static inline int ovs_ct_put_key(const struct sw_flow_key *key,
> +static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
> + const struct sw_flow_key *output,
> struct sk_buff *skb)
> {
> return 0;
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 390286c..d663960 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -696,6 +696,8 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
> int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> struct sk_buff *skb, struct sw_flow_key *key)
> {
> + int err;
> +
> /* Extract metadata from packet. */
> if (tun_info) {
> key->tun_proto = ip_tunnel_info_af(tun_info);
> @@ -719,25 +721,49 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> key->phy.priority = skb->priority;
> key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
> key->phy.skb_mark = skb->mark;
> - ovs_ct_fill_key(skb, key);
> key->ovs_flow_hash = 0;
> key->recirc_id = 0;
>
> - return key_extract(skb, key);
> + err = key_extract(skb, key);
> + if (!err)
> + ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
> + return err;
> }
>
> int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
> struct sk_buff *skb,
> struct sw_flow_key *key, bool log)
> {
> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
> + u64 attrs = 0;
> int err;
>
> + err = parse_flow_nlattrs(attr, a, &attrs, log);
> + if (err)
> + return -EINVAL;
> +
> memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
>
> /* Extract metadata from netlink attributes. */
> - err = ovs_nla_get_flow_metadata(net, attr, key, log);
> + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
> if (err)
> return err;
>
> - return key_extract(skb, key);
> + err = key_extract(skb, key);
> + if (err)
> + return err;
> +
> + /* Check that we have conntrack original direction tuple metadata only
> + * for packets for which it makes sense. Otherwise the key may be
> + * corrupted due to overlapping key fields.
> + */
> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
> + key->eth.type != htons(ETH_P_IP))
> + return -EINVAL;
> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
> + (key->eth.type != htons(ETH_P_IPV6) ||
> + sw_flow_key_is_nd(key)))
> + return -EINVAL;
> +
> + return 0;
> }
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 2dd0696..d4124c6 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2007-2015 Nicira, Inc.
> + * Copyright (c) 2007-2017 Nicira, Inc.
> *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of version 2 of the GNU General Public
> @@ -94,10 +94,16 @@ struct sw_flow_key {
> __be32 src; /* IP source address. */
> __be32 dst; /* IP destination address. */
> } addr;
> - struct {
> - u8 sha[ETH_ALEN]; /* ARP source hardware address. */
> - u8 tha[ETH_ALEN]; /* ARP target hardware address. */
> - } arp;
> + union {
> + struct {
> + __be32 src;
> + __be32 dst;
> + } ct_orig; /* Conntrack original direction fields. */
> + struct {
> + u8 sha[ETH_ALEN]; /* ARP source hardware address. */
> + u8 tha[ETH_ALEN]; /* ARP target hardware address. */
> + } arp;
> + };
> } ipv4;
> struct {
> struct {
> @@ -105,23 +111,44 @@ struct sw_flow_key {
> struct in6_addr dst; /* IPv6 destination address. */
> } addr;
> __be32 label; /* IPv6 flow label. */
> - struct {
> - struct in6_addr target; /* ND target address. */
> - u8 sll[ETH_ALEN]; /* ND source link layer address. */
> - u8 tll[ETH_ALEN]; /* ND target link layer address. */
> - } nd;
> + union {
> + struct {
> + struct in6_addr src;
> + struct in6_addr dst;
> + } ct_orig; /* Conntrack original direction fields. */
> + struct {
> + struct in6_addr target; /* ND target address. */
> + u8 sll[ETH_ALEN]; /* ND source link layer address. */
> + u8 tll[ETH_ALEN]; /* ND target link layer address. */
> + } nd;
> + };
> } ipv6;
> };
> struct {
> /* Connection tracking fields. */
> + u8 state;
> + u8 orig_proto; /* CT orig tuple IP protocol. */
> u16 zone;
> u32 mark;
> - u8 state;
> + struct {
> + __be16 src; /* CT orig tuple tp src port. */
> + __be16 dst; /* CT orig tuple tp dst port. */
> + } orig_tp;
> +
> struct ovs_key_ct_labels labels;
> } ct;
>
> } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>
> +static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
> +{
> + return key->eth.type == htons(ETH_P_IPV6) &&
> + key->ip.proto == NEXTHDR_ICMP &&
> + key->tp.dst == 0 &&
> + (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
> + key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
> +}
> +
> struct sw_flow_key_range {
> unsigned short int start;
> unsigned short int end;
> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
> index 0f32664..5fac207 100644
> --- a/datapath/flow_netlink.c
> +++ b/datapath/flow_netlink.c
> @@ -131,7 +131,9 @@ static bool match_validate(const struct sw_flow_match *match,
> * pass the validation tests.
> */
> mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
> | (1ULL << OVS_KEY_ATTR_IPV6)
> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
> | (1ULL << OVS_KEY_ATTR_TCP)
> | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
> | (1ULL << OVS_KEY_ATTR_UDP)
> @@ -163,8 +165,10 @@ static bool match_validate(const struct sw_flow_match *match,
>
> if (match->key->eth.type == htons(ETH_P_IP)) {
> key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
> - if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
> + }
>
> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
> if (match->key->ip.proto == IPPROTO_UDP) {
> @@ -198,8 +202,10 @@ static bool match_validate(const struct sw_flow_match *match,
>
> if (match->key->eth.type == htons(ETH_P_IPV6)) {
> key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
> - if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
> + }
>
> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
> if (match->key->ip.proto == IPPROTO_UDP) {
> @@ -232,6 +238,12 @@ static bool match_validate(const struct sw_flow_match *match,
> htons(NDISC_NEIGHBOUR_SOLICITATION) ||
> match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
> key_expected |= 1ULL << OVS_KEY_ATTR_ND;
> + /* Original direction conntrack tuple
> + * uses the same space as the ND fields
> + * in the key, so both are not allowed
> + * at the same time.
> + */
> + mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
> if (match->mask && (match->mask->key.tp.src == htons(0xff)))
> mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
> }
> @@ -284,7 +296,7 @@ size_t ovs_key_attr_size(void)
> /* Whenever adding new OVS_KEY_ FIELDS, we should consider
> * updating this function.
> */
> - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
> + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
>
> return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
> + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
> @@ -297,6 +309,7 @@ size_t ovs_key_attr_size(void)
> + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
> + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
> + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
> + + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
> + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
> + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
> + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
> @@ -357,6 +370,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
> [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
> [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
> [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
> + .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
> + .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
> };
>
> static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
> @@ -432,9 +449,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr,
> return __parse_flow_nlattrs(attr, a, attrsp, log, true);
> }
>
> -static int parse_flow_nlattrs(const struct nlattr *attr,
> - const struct nlattr *a[], u64 *attrsp,
> - bool log)
> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
> + u64 *attrsp, bool log)
> {
> return __parse_flow_nlattrs(attr, a, attrsp, log, false);
> }
> @@ -900,6 +916,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
> sizeof(*cl), is_mask);
> *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
> }
> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
> + const struct ovs_key_ct_tuple_ipv4 *ct;
> +
> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
> +
> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv4_proto, is_mask);
> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
> + }
> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
> + const struct ovs_key_ct_tuple_ipv6 *ct;
> +
> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
> +
> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
> + sizeof(match->key->ipv6.ct_orig.src),
> + is_mask);
> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
> + sizeof(match->key->ipv6.ct_orig.dst),
> + is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv6_proto, is_mask);
> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
> + }
> return 0;
> }
>
> @@ -1377,9 +1421,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
>
> /**
> * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
> - * @key: Receives extracted in_port, priority, tun_key and skb_mark.
> - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
> - * sequence.
> + * @net: Network namespace.
> + * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
> + * metadata.
> + * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
> + * attributes.
> + * @attrs: Bit mask for the netlink attributes included in @a.
> * @log: Boolean to allow kernel error logging. Normally true, but when
> * probing for feature compatibility this should be passed in as false to
> * suppress unnecessary error logging.
> @@ -1388,25 +1435,23 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
> * take the same form accepted by flow_from_nlattrs(), but only enough of it to
> * get the metadata, that is, the parts of the flow key that cannot be
> * extracted from the packet itself.
> + *
> + * This must be called before the packet key fields are filled in 'key'.
> */
>
> -int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
> - struct sw_flow_key *key,
> - bool log)
> +int ovs_nla_get_flow_metadata(struct net *net,
> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
> + u64 attrs, struct sw_flow_key *key, bool log)
> {
> - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
> struct sw_flow_match match;
> - u64 attrs = 0;
> - int err;
> -
> - err = parse_flow_nlattrs(attr, a, &attrs, log);
> - if (err)
> - return -EINVAL;
>
> memset(&match, 0, sizeof(match));
> match.key = key;
>
> memset(&key->ct, 0, sizeof(key->ct));
> + memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
> + memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
> +
> key->phy.in_port = DP_MAX_PORTS;
>
> return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
> @@ -1455,7 +1500,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
> goto nla_put_failure;
>
> - if (ovs_ct_put_key(output, skb))
> + if (ovs_ct_put_key(swkey, output, skb))
> goto nla_put_failure;
>
> nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
> diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
> index 1c4208b..8d04d07 100644
> --- a/datapath/flow_netlink.h
> +++ b/datapath/flow_netlink.h
> @@ -45,8 +45,11 @@ void ovs_match_init(struct sw_flow_match *match,
>
> int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
> int attr, bool is_mask, struct sk_buff *);
> -int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *,
> - struct sw_flow_key *, bool log);
> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
> + u64 *attrsp, bool log);
> +int ovs_nla_get_flow_metadata(struct net *net,
> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
> + u64 attrs, struct sw_flow_key *key, bool log);
>
> int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
> int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
> index d185860..23f8845 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -356,6 +356,8 @@ enum ovs_key_attr {
> OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
> OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
> OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */
> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
>
> #ifdef __KERNEL__
> /* Only used within kernel data path. */
> @@ -496,6 +498,22 @@ struct ovs_key_ct_labels {
>
> #define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
>
> +struct ovs_key_ct_tuple_ipv4 {
> + __be32 ipv4_src;
> + __be32 ipv4_dst;
> + __be16 src_port;
> + __be16 dst_port;
> + __u8 ipv4_proto;
> +};
> +
> +struct ovs_key_ct_tuple_ipv6 {
> + __be32 ipv6_src[4];
> + __be32 ipv6_dst[4];
> + __be16 src_port;
> + __be16 dst_port;
> + __u8 ipv6_proto;
> +};
> +
> /**
> * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
> * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
> --
> 2.1.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list