[ovs-dev] [PATCH v2 13/22] datapath: Add original direction conntrack tuple to sw_flow_key.
Jarno Rajahalme
jarno at ovn.org
Fri Mar 3 18:49:44 UTC 2017
> On Mar 2, 2017, at 5:57 PM, Joe Stringer <joe at ovn.org> wrote:
>
> On 28 February 2017 at 17:17, Jarno Rajahalme <jarno at ovn.org <mailto:jarno at ovn.org>> wrote:
>> Upstream commit:
>>
>> commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc
>> Author: Jarno Rajahalme <jarno at ovn.org>
>> Date: Thu Feb 9 11:21:59 2017 -0800
>>
>> openvswitch: Add original direction conntrack tuple to sw_flow_key.
>>
>> Add the fields of the conntrack original direction 5-tuple to struct
>> sw_flow_key. The new fields are initially marked as non-existent, and
>> are populated whenever a conntrack action is executed and either finds
>> or generates a conntrack entry. This means that these fields exist
>> for all packets that were not rejected by conntrack as untrackable.
>>
>> The original tuple fields in the sw_flow_key are filled from the
>> original direction tuple of the conntrack entry relating to the
>> current packet, or from the original direction tuple of the master
>> conntrack entry, if the current conntrack entry has a master.
>> Generally, expected connections of connections having an assigned
>> helper (e.g., FTP), have a master conntrack entry.
>>
>> The main purpose of the new conntrack original tuple fields is to
>> allow matching on them for policy decision purposes, with the premise
>> that the admissibility of tracked connections reply packets (as well
>> as original direction packets), and both direction packets of any
>> related connections may be based on ACL rules applying to the master
>> connection's original direction 5-tuple. This also makes it easier to
>> make policy decisions when the actual packet headers might have been
>> transformed by NAT, as the original direction 5-tuple represents the
>> packet headers before any such transformation.
>>
>> When using the original direction 5-tuple the admissibility of return
>> and/or related packets need not be based on the mere existence of a
>> conntrack entry, allowing separation of admission policy from the
>> established conntrack state. While existence of a conntrack entry is
>> required for admission of the return or related packets, policy
>> changes can render connections that were initially admitted to be
>> rejected or dropped afterwards. If the admission of the return and
>> related packets was based on mere conntrack state (e.g., connection
>> being in an established state), a policy change that would make the
>> connection rejected or dropped would need to find and delete all
>> conntrack entries affected by such a change. When using the original
>> direction 5-tuple matching the affected conntrack entries can be
>> allowed to time out instead, as the established state of the
>> connection would not need to be the basis for packet admission any
>> more.
>>
>> It should be noted that the directionality of related connections may
>> be the same or different than that of the master connection, and
>> neither the original direction 5-tuple nor the conntrack state bits
>> carry this information. If needed, the directionality of the master
>> connection can be stored in master's conntrack mark or labels, which
>> are automatically inherited by the expected related connections.
>>
>> The fact that neither ARP nor ND packets are trackable by conntrack
>> allows mutual exclusion between ARP/ND and the new conntrack original
>> tuple fields. Hence, the IP addresses are overlaid in union with ARP
>> and ND fields. This allows the sw_flow_key to not grow much due to
>> this patch, but it also means that we must be careful to never use the
>> new key fields with ARP or ND packets. ARP is easy to distinguish and
>> keep mutually exclusive based on the ethernet type, but ND being an
>> ICMPv6 protocol requires a bit more attention.
>>
>> Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
>> Acked-by: Joe Stringer <joe at ovn.org>
>> Acked-by: Pravin B Shelar <pshelar at ovn.org>
>> Signed-off-by: David S. Miller <davem at davemloft.net>
>>
>> Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
>> ---
>
> I had to roll in the following incremental (derived from your later
> patch) to fix the build with this commit:
>
Right, I forgot to mention that I left these patches separate knowing that they will not compile individually.
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 1f6812a6dd02..50bbafaa0231 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -381,6 +381,8 @@ odp_execute_set_action(struct dp_packet *packet,
> const struct nlattr *a)
> case OVS_KEY_ATTR_VLAN:
> case OVS_KEY_ATTR_TCP_FLAGS:
> case OVS_KEY_ATTR_CT_STATE:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
> case OVS_KEY_ATTR_CT_ZONE:
> case OVS_KEY_ATTR_CT_MARK:
> case OVS_KEY_ATTR_CT_LABELS:
> @@ -476,6 +478,8 @@ odp_execute_masked_set_action(struct dp_packet *packet,
> case OVS_KEY_ATTR_CT_ZONE:
> case OVS_KEY_ATTR_CT_MARK:
> case OVS_KEY_ATTR_CT_LABELS:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
> case OVS_KEY_ATTR_ENCAP:
> case OVS_KEY_ATTR_ETHERTYPE:
> case OVS_KEY_ATTR_IN_PORT:
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 41067385e821..1f1512ae47fd 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -150,6 +150,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr,
> char *namebuf, size_t bufsize)
> case OVS_KEY_ATTR_CT_ZONE: return "ct_zone";
> case OVS_KEY_ATTR_CT_MARK: return "ct_mark";
> case OVS_KEY_ATTR_CT_LABELS: return "ct_label";
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: return "ct_tuple4";
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: return "ct_tuple6";
> case OVS_KEY_ATTR_TUNNEL: return "tunnel";
> case OVS_KEY_ATTR_IN_PORT: return "in_port";
> case OVS_KEY_ATTR_ETHERNET: return "eth";
> @@ -1874,6 +1876,8 @@ static const struct attr_len_tbl
> ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] =
> [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 },
> [OVS_KEY_ATTR_CT_MARK] = { .len = 4 },
> [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { .len = sizeof(struct
> ovs_key_ct_tuple_ipv4) },
> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct
> ovs_key_ct_tuple_ipv6) },
> };
>
> /* Returns the correct length of the payload for a flow key attribute of the
> @@ -2823,6 +2827,40 @@ format_odp_key_attr(const struct nlattr *a,
> const struct nlattr *ma,
> break;
> }
>
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: {
> + const struct ovs_key_ct_tuple_ipv4 *key = nl_attr_get(a);
> + const struct ovs_key_ct_tuple_ipv4 *mask = ma ? nl_attr_get(ma) : NULL;
> +
> + format_ipv4(ds, "src", key->ipv4_src, MASK(mask, ipv4_src), verbose);
> + format_ipv4(ds, "dst", key->ipv4_dst, MASK(mask, ipv4_dst), verbose);
> + format_u8u(ds, "proto", key->ipv4_proto, MASK(mask, ipv4_proto),
> + verbose);
> + format_be16(ds, "tp_src", key->src_port, MASK(mask, src_port),
> + verbose);
> + format_be16(ds, "tp_dst", key->dst_port, MASK(mask, dst_port),
> + verbose);
> + ds_chomp(ds, ',');
> + break;
> + }
> +
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: {
> + const struct ovs_key_ct_tuple_ipv6 *key = nl_attr_get(a);
> + const struct ovs_key_ct_tuple_ipv6 *mask = ma ? nl_attr_get(ma) : NULL;
> +
> + format_in6_addr(ds, "src", &key->ipv6_src, MASK(mask, ipv6_src),
> + verbose);
> + format_in6_addr(ds, "dst", &key->ipv6_dst, MASK(mask, ipv6_dst),
> + verbose);
> + format_u8u(ds, "proto", key->ipv6_proto, MASK(mask, ipv6_proto),
> + verbose);
> + format_be16(ds, "src_port", key->src_port, MASK(mask, src_port),
> + verbose);
> + format_be16(ds, "dst_port", key->dst_port, MASK(mask, dst_port),
> + verbose);
> + ds_chomp(ds, ',');
> + break;
> + }
> +
> case OVS_KEY_ATTR_TUNNEL:
> format_odp_tun_attr(a, ma, ds, verbose);
> break;
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index 520b8dd196bb..69cdf69f4b39 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -1025,6 +1025,8 @@ sflow_read_set_action(const struct nlattr *attr,
> case OVS_KEY_ATTR_CT_ZONE:
> case OVS_KEY_ATTR_CT_MARK:
> case OVS_KEY_ATTR_CT_LABELS:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
> case OVS_KEY_ATTR_UNSPEC:
> case __OVS_KEY_ATTR_MAX:
> default:
>
>> datapath/actions.c | 2 +
>> datapath/conntrack.c | 86 +++++++++++++++++++++--
>> datapath/conntrack.h | 10 ++-
>> datapath/flow.c | 34 +++++++--
>> datapath/flow.h | 49 ++++++++++---
>> datapath/flow_netlink.c | 85 ++++++++++++++++------
>> datapath/flow_netlink.h | 7 +-
>> datapath/linux/compat/include/linux/openvswitch.h | 18 +++++
>> 8 files changed, 246 insertions(+), 45 deletions(-)
>>
>> diff --git a/datapath/actions.c b/datapath/actions.c
>> index 82833d0..71ec14c 100644
>> --- a/datapath/actions.c
>> +++ b/datapath/actions.c
>> @@ -1011,6 +1011,8 @@ static int execute_masked_set_action(struct sk_buff *skb,
>> case OVS_KEY_ATTR_CT_ZONE:
>> case OVS_KEY_ATTR_CT_MARK:
>> case OVS_KEY_ATTR_CT_LABELS:
>> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
>> + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
>> err = -EINVAL;
>> break;
>> }
>> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
>> index 16a7773..d8309c9 100644
>> --- a/datapath/conntrack.c
>> +++ b/datapath/conntrack.c
>> @@ -163,6 +163,20 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
>> memset(labels, 0, OVS_CT_LABELS_LEN);
>> }
>>
>> +static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
>> + const struct nf_conntrack_tuple *orig,
>> + u8 icmp_proto)
>> +{
>> + key->ct.orig_proto = orig->dst.protonum;
>> + if (orig->dst.protonum == icmp_proto) {
>> + key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
>> + key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
>> + } else {
>> + key->ct.orig_tp.src = orig->src.u.all;
>> + key->ct.orig_tp.dst = orig->dst.u.all;
>> + }
>> +}
>> +
>> static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
>> const struct nf_conntrack_zone *zone,
>> const struct nf_conn *ct)
>> @@ -171,6 +185,35 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
>> key->ct.zone = zone->id;
>> key->ct.mark = ovs_ct_get_mark(ct);
>> ovs_ct_get_labels(ct, &key->ct.labels);
>> +
>> + if (ct) {
>> + const struct nf_conntrack_tuple *orig;
>> +
>> + /* Use the master if we have one. */
>> + if (ct->master)
>> + ct = ct->master;
>> + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
>> +
>> + /* IP version must match with the master connection. */
>> + if (key->eth.type == htons(ETH_P_IP) &&
>> + nf_ct_l3num(ct) == NFPROTO_IPV4) {
>> + key->ipv4.ct_orig.src = orig->src.u3.ip;
>> + key->ipv4.ct_orig.dst = orig->dst.u3.ip;
>> + __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
>> + return;
>> + } else if (key->eth.type == htons(ETH_P_IPV6) &&
>> + !sw_flow_key_is_nd(key) &&
>> + nf_ct_l3num(ct) == NFPROTO_IPV6) {
>> + key->ipv6.ct_orig.src = orig->src.u3.in6;
>> + key->ipv6.ct_orig.dst = orig->dst.u3.in6;
>> + __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
>> + return;
>> + }
>> + }
>> + /* Clear 'ct.orig_proto' to mark the non-existence of conntrack
>> + * original direction key fields.
>> + */
>> + key->ct.orig_proto = 0;
>> }
>>
>> /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
>> @@ -224,24 +267,55 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
>> ovs_ct_update_key(skb, NULL, key, false, false);
>> }
>>
>> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
>> +#define IN6_ADDR_INITIALIZER(ADDR) \
>> + { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
>> + (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
>> +
>> +int ovs_ct_put_key(const struct sw_flow_key *swkey,
>> + const struct sw_flow_key *output, struct sk_buff *skb)
>> {
>> - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
>> + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state))
>> return -EMSGSIZE;
>>
>> if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
>> - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
>> + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone))
>> return -EMSGSIZE;
>>
>> if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
>> - nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
>> + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
>> return -EMSGSIZE;
>>
>> if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>> - nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
>> - &key->ct.labels))
>> + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
>> + &output->ct.labels))
>> return -EMSGSIZE;
>>
>> + if (swkey->ct.orig_proto) {
>> + if (swkey->eth.type == htons(ETH_P_IP)) {
>> + struct ovs_key_ct_tuple_ipv4 orig = {
>> + output->ipv4.ct_orig.src,
>> + output->ipv4.ct_orig.dst,
>> + output->ct.orig_tp.src,
>> + output->ct.orig_tp.dst,
>> + output->ct.orig_proto,
>> + };
>> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
>> + sizeof(orig), &orig))
>> + return -EMSGSIZE;
>> + } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
>> + struct ovs_key_ct_tuple_ipv6 orig = {
>> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
>> + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
>> + output->ct.orig_tp.src,
>> + output->ct.orig_tp.dst,
>> + output->ct.orig_proto,
>> + };
>> + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
>> + sizeof(orig), &orig))
>> + return -EMSGSIZE;
>> + }
>> + }
>> +
>> return 0;
>> }
>>
>> diff --git a/datapath/conntrack.h b/datapath/conntrack.h
>> index 15dbf0a..2bd753d 100644
>> --- a/datapath/conntrack.h
>> +++ b/datapath/conntrack.h
>> @@ -33,7 +33,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
>> const struct ovs_conntrack_info *);
>>
>> void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
>> -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
>> +int ovs_ct_put_key(const struct sw_flow_key *swkey,
>> + const struct sw_flow_key *output, struct sk_buff *skb);
>> void ovs_ct_free_action(const struct nlattr *a);
>>
>> #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
>> @@ -80,9 +81,14 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,
>> key->ct.zone = 0;
>> key->ct.mark = 0;
>> memset(&key->ct.labels, 0, sizeof(key->ct.labels));
>> + /* Clear 'ct.orig_proto' to mark the non-existence of original
>> + * direction key fields.
>> + */
>> + key->ct.orig_proto = 0;
>> }
>>
>> -static inline int ovs_ct_put_key(const struct sw_flow_key *key,
>> +static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
>> + const struct sw_flow_key *output,
>> struct sk_buff *skb)
>> {
>> return 0;
>> diff --git a/datapath/flow.c b/datapath/flow.c
>> index 390286c..d663960 100644
>> --- a/datapath/flow.c
>> +++ b/datapath/flow.c
>> @@ -696,6 +696,8 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
>> int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>> struct sk_buff *skb, struct sw_flow_key *key)
>> {
>> + int err;
>> +
>> /* Extract metadata from packet. */
>> if (tun_info) {
>> key->tun_proto = ip_tunnel_info_af(tun_info);
>> @@ -719,25 +721,49 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>> key->phy.priority = skb->priority;
>> key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
>> key->phy.skb_mark = skb->mark;
>> - ovs_ct_fill_key(skb, key);
>> key->ovs_flow_hash = 0;
>> key->recirc_id = 0;
>>
>> - return key_extract(skb, key);
>> + err = key_extract(skb, key);
>> + if (!err)
>> + ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
>> + return err;
>> }
>>
>> int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
>> struct sk_buff *skb,
>> struct sw_flow_key *key, bool log)
>> {
>> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
>> + u64 attrs = 0;
>> int err;
>>
>> + err = parse_flow_nlattrs(attr, a, &attrs, log);
>> + if (err)
>> + return -EINVAL;
>> +
>> memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
>>
>> /* Extract metadata from netlink attributes. */
>> - err = ovs_nla_get_flow_metadata(net, attr, key, log);
>> + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
>> if (err)
>> return err;
>>
>> - return key_extract(skb, key);
>> + err = key_extract(skb, key);
>> + if (err)
>> + return err;
>> +
>> + /* Check that we have conntrack original direction tuple metadata only
>> + * for packets for which it makes sense. Otherwise the key may be
>> + * corrupted due to overlapping key fields.
>> + */
>> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
>> + key->eth.type != htons(ETH_P_IP))
>> + return -EINVAL;
>> + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
>> + (key->eth.type != htons(ETH_P_IPV6) ||
>> + sw_flow_key_is_nd(key)))
>> + return -EINVAL;
>> +
>> + return 0;
>> }
>> diff --git a/datapath/flow.h b/datapath/flow.h
>> index 2dd0696..d4124c6 100644
>> --- a/datapath/flow.h
>> +++ b/datapath/flow.h
>> @@ -1,5 +1,5 @@
>> /*
>> - * Copyright (c) 2007-2015 Nicira, Inc.
>> + * Copyright (c) 2007-2017 Nicira, Inc.
>> *
>> * This program is free software; you can redistribute it and/or
>> * modify it under the terms of version 2 of the GNU General Public
>> @@ -94,10 +94,16 @@ struct sw_flow_key {
>> __be32 src; /* IP source address. */
>> __be32 dst; /* IP destination address. */
>> } addr;
>> - struct {
>> - u8 sha[ETH_ALEN]; /* ARP source hardware address. */
>> - u8 tha[ETH_ALEN]; /* ARP target hardware address. */
>> - } arp;
>> + union {
>> + struct {
>> + __be32 src;
>> + __be32 dst;
>> + } ct_orig; /* Conntrack original direction fields. */
>> + struct {
>> + u8 sha[ETH_ALEN]; /* ARP source hardware address. */
>> + u8 tha[ETH_ALEN]; /* ARP target hardware address. */
>> + } arp;
>> + };
>> } ipv4;
>> struct {
>> struct {
>> @@ -105,23 +111,44 @@ struct sw_flow_key {
>> struct in6_addr dst; /* IPv6 destination address. */
>> } addr;
>> __be32 label; /* IPv6 flow label. */
>> - struct {
>> - struct in6_addr target; /* ND target address. */
>> - u8 sll[ETH_ALEN]; /* ND source link layer address. */
>> - u8 tll[ETH_ALEN]; /* ND target link layer address. */
>> - } nd;
>> + union {
>> + struct {
>> + struct in6_addr src;
>> + struct in6_addr dst;
>> + } ct_orig; /* Conntrack original direction fields. */
>> + struct {
>> + struct in6_addr target; /* ND target address. */
>> + u8 sll[ETH_ALEN]; /* ND source link layer address. */
>> + u8 tll[ETH_ALEN]; /* ND target link layer address. */
>> + } nd;
>> + };
>> } ipv6;
>> };
>> struct {
>> /* Connection tracking fields. */
>> + u8 state;
>> + u8 orig_proto; /* CT orig tuple IP protocol. */
>> u16 zone;
>> u32 mark;
>> - u8 state;
>> + struct {
>> + __be16 src; /* CT orig tuple tp src port. */
>> + __be16 dst; /* CT orig tuple tp dst port. */
>> + } orig_tp;
>> +
>> struct ovs_key_ct_labels labels;
>> } ct;
>>
>> } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>>
>> +static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
>> +{
>> + return key->eth.type == htons(ETH_P_IPV6) &&
>> + key->ip.proto == NEXTHDR_ICMP &&
>> + key->tp.dst == 0 &&
>> + (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
>> + key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
>> +}
>> +
>> struct sw_flow_key_range {
>> unsigned short int start;
>> unsigned short int end;
>> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
>> index 0f32664..5fac207 100644
>> --- a/datapath/flow_netlink.c
>> +++ b/datapath/flow_netlink.c
>> @@ -131,7 +131,9 @@ static bool match_validate(const struct sw_flow_match *match,
>> * pass the validation tests.
>> */
>> mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
>> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
>> | (1ULL << OVS_KEY_ATTR_IPV6)
>> + | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
>> | (1ULL << OVS_KEY_ATTR_TCP)
>> | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
>> | (1ULL << OVS_KEY_ATTR_UDP)
>> @@ -163,8 +165,10 @@ static bool match_validate(const struct sw_flow_match *match,
>>
>> if (match->key->eth.type == htons(ETH_P_IP)) {
>> key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
>> - if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
>> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
>> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
>> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
>> + }
>>
>> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
>> if (match->key->ip.proto == IPPROTO_UDP) {
>> @@ -198,8 +202,10 @@ static bool match_validate(const struct sw_flow_match *match,
>>
>> if (match->key->eth.type == htons(ETH_P_IPV6)) {
>> key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
>> - if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
>> + if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
>> mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
>> + mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
>> + }
>>
>> if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
>> if (match->key->ip.proto == IPPROTO_UDP) {
>> @@ -232,6 +238,12 @@ static bool match_validate(const struct sw_flow_match *match,
>> htons(NDISC_NEIGHBOUR_SOLICITATION) ||
>> match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
>> key_expected |= 1ULL << OVS_KEY_ATTR_ND;
>> + /* Original direction conntrack tuple
>> + * uses the same space as the ND fields
>> + * in the key, so both are not allowed
>> + * at the same time.
>> + */
>> + mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
>> if (match->mask && (match->mask->key.tp.src == htons(0xff)))
>> mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
>> }
>> @@ -284,7 +296,7 @@ size_t ovs_key_attr_size(void)
>> /* Whenever adding new OVS_KEY_ FIELDS, we should consider
>> * updating this function.
>> */
>> - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
>> + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
>>
>> return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
>> + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
>> @@ -297,6 +309,7 @@ size_t ovs_key_attr_size(void)
>> + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
>> + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
>> + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
>> + + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
>> + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
>> + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
>> + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
>> @@ -357,6 +370,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
>> [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
>> [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
>> [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
>> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
>> + .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
>> + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
>> + .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
>> };
>>
>> static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
>> @@ -432,9 +449,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr,
>> return __parse_flow_nlattrs(attr, a, attrsp, log, true);
>> }
>>
>> -static int parse_flow_nlattrs(const struct nlattr *attr,
>> - const struct nlattr *a[], u64 *attrsp,
>> - bool log)
>> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
>> + u64 *attrsp, bool log)
>> {
>> return __parse_flow_nlattrs(attr, a, attrsp, log, false);
>> }
>> @@ -900,6 +916,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
>> sizeof(*cl), is_mask);
>> *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
>> }
>> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
>> + const struct ovs_key_ct_tuple_ipv4 *ct;
>> +
>> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
>> +
>> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
>> + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv4_proto, is_mask);
>> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
>> + }
>> + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
>> + const struct ovs_key_ct_tuple_ipv6 *ct;
>> +
>> + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
>> +
>> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
>> + sizeof(match->key->ipv6.ct_orig.src),
>> + is_mask);
>> + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
>> + sizeof(match->key->ipv6.ct_orig.dst),
>> + is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
>> + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv6_proto, is_mask);
>> + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
>> + }
>> return 0;
>> }
>>
>> @@ -1377,9 +1421,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
>>
>> /**
>> * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
>> - * @key: Receives extracted in_port, priority, tun_key and skb_mark.
>> - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
>> - * sequence.
>> + * @net: Network namespace.
>> + * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
>> + * metadata.
>> + * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
>> + * attributes.
>> + * @attrs: Bit mask for the netlink attributes included in @a.
>> * @log: Boolean to allow kernel error logging. Normally true, but when
>> * probing for feature compatibility this should be passed in as false to
>> * suppress unnecessary error logging.
>> @@ -1388,25 +1435,23 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
>> * take the same form accepted by flow_from_nlattrs(), but only enough of it to
>> * get the metadata, that is, the parts of the flow key that cannot be
>> * extracted from the packet itself.
>> + *
>> + * This must be called before the packet key fields are filled in 'key'.
>> */
>>
>> -int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
>> - struct sw_flow_key *key,
>> - bool log)
>> +int ovs_nla_get_flow_metadata(struct net *net,
>> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
>> + u64 attrs, struct sw_flow_key *key, bool log)
>> {
>> - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
>> struct sw_flow_match match;
>> - u64 attrs = 0;
>> - int err;
>> -
>> - err = parse_flow_nlattrs(attr, a, &attrs, log);
>> - if (err)
>> - return -EINVAL;
>>
>> memset(&match, 0, sizeof(match));
>> match.key = key;
>>
>> memset(&key->ct, 0, sizeof(key->ct));
>> + memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
>> + memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
>> +
>> key->phy.in_port = DP_MAX_PORTS;
>>
>> return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
>> @@ -1455,7 +1500,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
>> if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
>> goto nla_put_failure;
>>
>> - if (ovs_ct_put_key(output, skb))
>> + if (ovs_ct_put_key(swkey, output, skb))
>> goto nla_put_failure;
>>
>> nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
>> diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
>> index 1c4208b..8d04d07 100644
>> --- a/datapath/flow_netlink.h
>> +++ b/datapath/flow_netlink.h
>> @@ -45,8 +45,11 @@ void ovs_match_init(struct sw_flow_match *match,
>>
>> int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
>> int attr, bool is_mask, struct sk_buff *);
>> -int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *,
>> - struct sw_flow_key *, bool log);
>> +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
>> + u64 *attrsp, bool log);
>> +int ovs_nla_get_flow_metadata(struct net *net,
>> + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
>> + u64 attrs, struct sw_flow_key *key, bool log);
>>
>> int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
>> int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
>> index d185860..23f8845 100644
>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>> @@ -356,6 +356,8 @@ enum ovs_key_attr {
>> OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
>> OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
>> OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */
>> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
>> + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
>>
>> #ifdef __KERNEL__
>> /* Only used within kernel data path. */
>> @@ -496,6 +498,22 @@ struct ovs_key_ct_labels {
>>
>> #define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
>>
>> +struct ovs_key_ct_tuple_ipv4 {
>> + __be32 ipv4_src;
>> + __be32 ipv4_dst;
>> + __be16 src_port;
>> + __be16 dst_port;
>> + __u8 ipv4_proto;
>> +};
>> +
>> +struct ovs_key_ct_tuple_ipv6 {
>> + __be32 ipv6_src[4];
>> + __be32 ipv6_dst[4];
>> + __be16 src_port;
>> + __be16 dst_port;
>> + __u8 ipv6_proto;
>> +};
>> +
>> /**
>> * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
>> * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
>> --
>> 2.1.4
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org <mailto:dev at openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
More information about the dev
mailing list