[ovs-dev] [PATCH] datapath: Hash and compare only the part of sw_flow_key actually used.

Andrew Evans aevans at nicira.com
Sat May 14 03:54:26 UTC 2011


Currently the whole flow key struct is hashed on every packet received from the
network or userspace. The whole struct is also compared byte-for-byte when
doing flow table lookups. This consumes a fair percentage of CPU time, and most
of the time part of the structure is unused (e.g. the IPv6 fields when handling
IPv4 traffic; the IPv4 fields when handling Ethernet frames).

This commit reorders the fields in the flow key struct to put the least
commonly used elements at the end and changes the hash and comparison functions
to look only at the part that contains data.

Signed-off-by: Andrew Evans <aevans at nicira.com>
---
 datapath/actions.c  |    6 +-
 datapath/datapath.c |   31 ++--
 datapath/flow.c     |  502 +++++++++++++++++++++++++++++----------------------
 datapath/flow.h     |   70 +++++---
 datapath/table.c    |   14 +-
 datapath/table.h    |    4 +-
 datapath/tunnel.c   |   21 ++-
 7 files changed, 383 insertions(+), 265 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index b6b7135..abc06e6 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -114,13 +114,13 @@ static struct sk_buff *modify_vlan_tci(struct sk_buff *skb, __be16 tci)
 
 static bool is_ip(struct sk_buff *skb)
 {
-	return (OVS_CB(skb)->flow->key.dl_type == htons(ETH_P_IP) &&
+	return (OVS_CB(skb)->flow->key.eth.dl_type == htons(ETH_P_IP) &&
 		skb->transport_header > skb->network_header);
 }
 
 static __sum16 *get_l4_checksum(struct sk_buff *skb)
 {
-	u8 nw_proto = OVS_CB(skb)->flow->key.nw_proto;
+	u8 nw_proto = OVS_CB(skb)->flow->key.ip.nw_proto;
 	int transport_len = skb->len - skb_transport_offset(skb);
 	if (nw_proto == IPPROTO_TCP) {
 		if (likely(transport_len >= sizeof(struct tcphdr)))
@@ -230,7 +230,7 @@ static bool is_spoofed_arp(struct sk_buff *skb)
 {
 	struct arp_eth_header *arp;
 
-	if (OVS_CB(skb)->flow->key.dl_type != htons(ETH_P_ARP))
+	if (OVS_CB(skb)->flow->key.eth.dl_type != htons(ETH_P_ARP))
 		return false;
 
 	if (skb_network_offset(skb) + sizeof(struct arp_eth_header) > skb->len)
diff --git a/datapath/datapath.c b/datapath/datapath.c
index fc00d78..826d899 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -273,10 +273,11 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	if (!OVS_CB(skb)->flow) {
 		struct sw_flow_key key;
 		struct tbl_node *flow_node;
+		int key_len;
 		bool is_frag;
 
 		/* Extract flow from 'skb' into 'key'. */
-		error = flow_extract(skb, p->port_no, &key, &is_frag);
+		error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
 		if (unlikely(error)) {
 			kfree_skb(skb);
 			return;
@@ -289,8 +290,8 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 		}
 
 		/* Look up flow. */
-		flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
-					flow_hash(&key), flow_cmp);
+		flow_node = tbl_lookup(rcu_dereference(dp->table), &key, key_len,
+				       flow_hash(&key, key_len), flow_cmp);
 		if (unlikely(!flow_node)) {
 			struct dp_upcall_info upcall;
 
@@ -651,6 +652,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	bool is_frag;
 	int len;
 	int err;
+	int key_len;
 
 	err = -EINVAL;
 	if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
@@ -687,10 +689,10 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
 
-	err = flow_extract(packet, -1, &flow->key, &is_frag);
+	err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
 	if (err)
 		goto err_flow_put;
-	flow->tbl_node.hash = flow_hash(&flow->key);
+	flow->tbl_node.hash = flow_hash(&flow->key, key_len);
 
 	acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
 	err = PTR_ERR(acts);
@@ -935,12 +937,13 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	struct tbl *table;
 	u32 hash;
 	int error;
+	int key_len;
 
 	/* Extract key. */
 	error = -EINVAL;
 	if (!a[ODP_FLOW_ATTR_KEY])
 		goto error;
-	error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+	error = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
 	if (error)
 		goto error;
 
@@ -959,9 +962,9 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	if (!dp)
 		goto error;
 
-	hash = flow_hash(&key);
+	hash = flow_hash(&key, key_len);
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, hash, flow_cmp);
+	flow_node = tbl_lookup(table, &key, key_len, hash, flow_cmp);
 	if (!flow_node) {
 		struct sw_flow_actions *acts;
 
@@ -1071,10 +1074,11 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct tbl *table;
 	int err;
+	int key_len;
 
 	if (!a[ODP_FLOW_ATTR_KEY])
 		return -EINVAL;
-	err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+	err = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
 	if (err)
 		return err;
 
@@ -1083,7 +1087,8 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		return -ENODEV;
 
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+	flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
+			       flow_cmp);
 	if (!flow_node)
 		return -ENOENT;
 
@@ -1106,10 +1111,11 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct tbl *table;
 	int err;
+	int key_len;
 
 	if (!a[ODP_FLOW_ATTR_KEY])
 		return flush_flows(odp_header->dp_ifindex);
-	err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+	err = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
 	if (err)
 		return err;
 
@@ -1118,7 +1124,8 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
  		return -ENODEV;
 
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+	flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
+			       flow_cmp);
 	if (!flow_node)
 		return -ENOENT;
 	flow = flow_cast(flow_node);
diff --git a/datapath/flow.c b/datapath/flow.c
index d678979..6969a70 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -114,7 +114,11 @@ u64 flow_used_time(unsigned long flow_jiffies)
 	return cur_ms - idle_ms;
 }
 
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+#define SW_FLOW_KEY_OFFSET(field)		\
+	offsetof(struct sw_flow_key, field) +	\
+	FIELD_SIZEOF(struct sw_flow_key, field)
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_lenp)
 {
 	unsigned int nh_ofs = skb_network_offset(skb);
 	unsigned int nh_len;
@@ -123,6 +127,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 	uint8_t nexthdr;
 	int err;
 
+	*key_lenp = SW_FLOW_KEY_OFFSET(ipv6.addr);
+
 	err = check_header(skb, nh_ofs + sizeof(*nh));
 	if (unlikely(err))
 		return err;
@@ -131,10 +137,10 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 	nexthdr = nh->nexthdr;
 	payload_ofs = (u8 *)(nh + 1) - skb->data;
 
-	ipv6_addr_copy(&key->ipv6_src, &nh->saddr);
-	ipv6_addr_copy(&key->ipv6_dst, &nh->daddr);
-	key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
-	key->nw_proto = NEXTHDR_NONE;
+	key->ip.nw_proto = NEXTHDR_NONE;
+	key->ip.nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+	ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
+	ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
 
 	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
 	if (unlikely(payload_ofs < 0))
@@ -142,7 +148,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 
 	nh_len = payload_ofs - nh_ofs;
 	skb_set_transport_header(skb, nh_ofs + nh_len);
-	key->nw_proto = nexthdr;
+	key->ip.nw_proto = nexthdr;
 	return nh_len;
 }
 
@@ -159,8 +165,8 @@ void flow_used(struct sw_flow *flow, struct sk_buff *skb)
 {
 	u8 tcp_flags = 0;
 
-	if (flow->key.dl_type == htons(ETH_P_IP) &&
-	    flow->key.nw_proto == IPPROTO_TCP) {
+	if (flow->key.eth.dl_type == htons(ETH_P_IP) &&
+	    flow->key.ip.nw_proto == IPPROTO_TCP) {
 		u8 *tcp = (u8 *)tcp_hdr(skb);
 		tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
 	}
@@ -277,7 +283,7 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 		return -ENOMEM;
 
 	qp = (struct qtag_prefix *) skb->data;
-	key->dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
+	key->eth.dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
 	__skb_pull(skb, sizeof(struct qtag_prefix));
 
 	return 0;
@@ -318,15 +324,18 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-			int nh_len)
+			int *key_lenp, int nh_len)
 {
 	struct icmp6hdr *icmp = icmp6_hdr(skb);
+	int error = 0;
+	int key_len;
 
 	/* The ICMPv6 type and code fields use the 16-bit transport port
 	 * fields, so we need to store them in 16-bit network byte order.
 	 */
-	key->tp_src = htons(icmp->icmp6_type);
-	key->tp_dst = htons(icmp->icmp6_code);
+	key->ipv6.tp.src = htons(icmp->icmp6_type);
+	key->ipv6.tp.dst = htons(icmp->icmp6_code);
+	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 
 	if (icmp->icmp6_code == 0 &&
 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -335,16 +344,21 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		struct nd_msg *nd;
 		int offset;
 
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
 		/* In order to process neighbor discovery options, we need the
 		 * entire packet.
 		 */
 		if (unlikely(icmp_len < sizeof(*nd)))
-			return 0;
-		if (unlikely(skb_linearize(skb)))
-			return -ENOMEM;
+			goto out;
+		if (unlikely(skb_linearize(skb))) {
+			error = -ENOMEM;
+			goto out;
+		}
 
 		nd = (struct nd_msg *)skb_transport_header(skb);
-		ipv6_addr_copy(&key->nd_target, &nd->target);
+		ipv6_addr_copy(&key->ipv6.nd.target, &nd->target);
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
 
 		icmp_len -= sizeof(*nd);
 		offset = 0;
@@ -361,15 +375,15 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			 */
 			if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
 			    && opt_len == 8) {
-				if (unlikely(!is_zero_ether_addr(key->arp_sha)))
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
 					goto invalid;
-				memcpy(key->arp_sha,
+				memcpy(key->ipv6.nd.sll,
 				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 				   && opt_len == 8) {
-				if (unlikely(!is_zero_ether_addr(key->arp_tha)))
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
 					goto invalid;
-				memcpy(key->arp_tha,
+				memcpy(key->ipv6.nd.tll,
 				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
 			}
 
@@ -378,14 +392,16 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		}
 	}
 
-	return 0;
+	goto out;
 
 invalid:
-	memset(&key->nd_target, 0, sizeof(key->nd_target));
-	memset(key->arp_sha, 0, sizeof(key->arp_sha));
-	memset(key->arp_tha, 0, sizeof(key->arp_tha));
+	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
+	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
+	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
 
-	return 0;
+out:
+	*key_lenp = key_len;
+	return error;
 }
 
 /**
@@ -394,6 +410,7 @@ invalid:
  * Ethernet header
  * @in_port: port number on which @skb was received.
  * @key: output flow key
+ * @key_lenp: length of output flow key
  * @is_frag: set to 1 if @skb contains an IPv4 fragment, or to 0 if @skb does
  * not contain an IPv4 packet or if it is not a fragment.
  *
@@ -414,13 +431,15 @@ invalid:
  *      For other key->dl_type values it is left untouched.
  */
 int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
-		 bool *is_frag)
+		 int *key_lenp, bool *is_frag)
 {
+	int error = 0;
+	int key_len = SW_FLOW_KEY_OFFSET(eth);
 	struct ethhdr *eth;
 
 	memset(key, 0, sizeof(*key));
-	key->tun_id = OVS_CB(skb)->tun_id;
-	key->in_port = in_port;
+	key->eth.tun_id = OVS_CB(skb)->tun_id;
+	key->eth.in_port = in_port;
 	*is_frag = false;
 
 	skb_reset_mac_header(skb);
@@ -429,17 +448,19 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	 * header in the linear data area.
 	 */
 	eth = eth_hdr(skb);
-	memcpy(key->dl_src, eth->h_source, ETH_ALEN);
-	memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
+	memcpy(key->eth.dl_src, eth->h_source, ETH_ALEN);
+	memcpy(key->eth.dl_dst, eth->h_dest, ETH_ALEN);
+
+	/* dl_type, dl_vlan, dl_vlan_pcp. */
 	__skb_pull(skb, 2 * ETH_ALEN);
 
 	if (vlan_tx_tag_present(skb))
-		key->dl_tci = htons(vlan_get_tci(skb));
+		key->eth.dl_tci = htons(vlan_get_tci(skb));
 	else if (eth->h_proto == htons(ETH_P_8021Q))
 		if (unlikely(parse_vlan(skb, key)))
 			return -ENOMEM;
 
-	key->dl_type = parse_ethertype(skb);
+	key->eth.dl_type = parse_ethertype(skb);
 	if (unlikely(key->dl_type == htons(0)))
 		return -ENOMEM;
 
@@ -447,54 +468,58 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/* Network layer. */
-	if (key->dl_type == htons(ETH_P_IP)) {
+	if (key->eth.dl_type == htons(ETH_P_IP)) {
 		struct iphdr *nh;
-		int error;
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
 
 		error = check_iphdr(skb);
 		if (unlikely(error)) {
 			if (error == -EINVAL) {
 				skb->transport_header = skb->network_header;
-				return 0;
+				error = 0;
 			}
-			return error;
+			goto out;
 		}
 
 		nh = ip_hdr(skb);
-		key->ipv4_src = nh->saddr;
-		key->ipv4_dst = nh->daddr;
-		key->nw_tos = nh->tos & ~INET_ECN_MASK;
-		key->nw_proto = nh->protocol;
+		key->ipv4.addr.src = nh->saddr;
+		key->ipv4.addr.dst = nh->daddr;
+		key->ip.nw_tos = nh->tos & ~INET_ECN_MASK;
+		key->ip.nw_proto = nh->protocol;
 
 		/* Transport layer. */
 		if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) &&
 		    !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) {
-			if (key->nw_proto == IPPROTO_TCP) {
+			if (key->ip.nw_proto == IPPROTO_TCP) {
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				if (tcphdr_ok(skb)) {
 					struct tcphdr *tcp = tcp_hdr(skb);
-					key->tp_src = tcp->source;
-					key->tp_dst = tcp->dest;
+					key->ipv4.tp.src = tcp->source;
+					key->ipv4.tp.dst = tcp->dest;
 				}
-			} else if (key->nw_proto == IPPROTO_UDP) {
+			} else if (key->ip.nw_proto == IPPROTO_UDP) {
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				if (udphdr_ok(skb)) {
 					struct udphdr *udp = udp_hdr(skb);
-					key->tp_src = udp->source;
-					key->tp_dst = udp->dest;
+					key->ipv4.tp.src = udp->source;
+					key->ipv4.tp.dst = udp->dest;
 				}
-			} else if (key->nw_proto == IPPROTO_ICMP) {
+			} else if (key->ip.nw_proto == IPPROTO_ICMP) {
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				if (icmphdr_ok(skb)) {
 					struct icmphdr *icmp = icmp_hdr(skb);
 					/* The ICMP type and code fields use the 16-bit
 					 * transport port fields, so we need to store them
 					 * in 16-bit network byte order. */
-					key->tp_src = htons(icmp->type);
-					key->tp_dst = htons(icmp->code);
+					key->ipv4.tp.src = htons(icmp->type);
+					key->ipv4.tp.dst = htons(icmp->code);
 				}
 			}
 		} else
 			*is_frag = true;
 
-	} else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+	} else if (key->eth.dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
 		struct arp_eth_header *arp;
 
 		arp = (struct arp_eth_header *)skb_network_header(skb);
@@ -505,70 +530,79 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				&& arp->ar_pln == 4) {
 
 			/* We only match on the lower 8 bits of the opcode. */
-			if (ntohs(arp->ar_op) <= 0xff)
-				key->nw_proto = ntohs(arp->ar_op);
-
-			if (key->nw_proto == ARPOP_REQUEST
-					|| key->nw_proto == ARPOP_REPLY) {
-				memcpy(&key->ipv4_src, arp->ar_sip, sizeof(key->ipv4_src));
-				memcpy(&key->ipv4_dst, arp->ar_tip, sizeof(key->ipv4_dst));
-				memcpy(key->arp_sha, arp->ar_sha, ETH_ALEN);
-				memcpy(key->arp_tha, arp->ar_tha, ETH_ALEN);
+			if (ntohs(arp->ar_op) <= 0xff) {
+				key->ip.nw_proto = ntohs(arp->ar_op);
+			}
+
+			if (key->ip.nw_proto == ARPOP_REQUEST
+					|| key->ip.nw_proto == ARPOP_REPLY) {
+				memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+				memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+				memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
+				memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
 			}
 		}
-	} else if (key->dl_type == htons(ETH_P_IPV6)) {
+	} else if (key->eth.dl_type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
-		nh_len = parse_ipv6hdr(skb, key);
+		nh_len = parse_ipv6hdr(skb, key, &key_len);
 		if (unlikely(nh_len < 0)) {
-			if (nh_len == -EINVAL) {
+			if (nh_len == -EINVAL)
 				skb->transport_header = skb->network_header;
-				return 0;
-			}
-			return nh_len;
+			else
+				error = nh_len;
+			goto out;
 		}
 
 		/* Transport layer. */
-		if (key->nw_proto == NEXTHDR_TCP) {
+		if (key->ip.nw_proto == NEXTHDR_TCP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
-				key->tp_src = tcp->source;
-				key->tp_dst = tcp->dest;
+				key->ipv6.tp.src = tcp->source;
+				key->ipv6.tp.dst = tcp->dest;
 			}
-		} else if (key->nw_proto == NEXTHDR_UDP) {
+		} else if (key->ip.nw_proto == NEXTHDR_UDP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
-				key->tp_src = udp->source;
-				key->tp_dst = udp->dest;
+				key->ipv6.tp.src = udp->source;
+				key->ipv6.tp.dst = udp->dest;
 			}
-		} else if (key->nw_proto == NEXTHDR_ICMP) {
+		} else if (key->ip.nw_proto == NEXTHDR_ICMP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (icmp6hdr_ok(skb)) {
-				int error = parse_icmpv6(skb, key, nh_len);
+				error = parse_icmpv6(skb, key, &key_len, nh_len);
 				if (error < 0)
-					return error;
+					goto out;
 			}
 		}
 	}
-	return 0;
+
+out:
+	*key_lenp = key_len;
+	return error;
 }
 
-u32 flow_hash(const struct sw_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key, int key_len)
 {
-	return jhash2((u32*)key, sizeof(*key) / sizeof(u32), hash_seed);
+	return jhash2((u32*)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed);
 }
 
-int flow_cmp(const struct tbl_node *node, void *key2_)
+int flow_cmp(const struct tbl_node *node, void *key2_, int len)
 {
 	const struct sw_flow_key *key1 = &flow_cast(node)->key;
 	const struct sw_flow_key *key2 = key2_;
 
-	return !memcmp(key1, key2, sizeof(struct sw_flow_key));
+	return !memcmp(key1, key2, len);
 }
 
 /**
  * flow_from_nlattrs - parses Netlink attributes into a flow key.
  * @swkey: receives the extracted flow key.
- * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
+ * @key_lenp: number of bytes used in @swkey.
+ * @attr: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
  * sequence.
  *
  * This state machine accepts the following forms, with [] for optional
@@ -577,14 +611,18 @@ int flow_cmp(const struct tbl_node *node, void *key2_)
  * [tun_id] in_port ethernet [8021q] [ethertype \
  *              [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
  */
-int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
+int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *attr)
 {
+	int error = 0;
 	const struct nlattr *nla;
 	u16 prev_type;
 	int rem;
+	int key_len;
 
 	memset(swkey, 0, sizeof(*swkey));
-	swkey->dl_type = htons(ETH_P_802_2);
+	swkey->eth.dl_type = htons(ETH_P_802_2);
+	key_len = SW_FLOW_KEY_OFFSET(eth);
 
 	prev_type = ODP_KEY_ATTR_UNSPEC;
 	nla_for_each_nested(nla, attr, rem) {
@@ -618,185 +656,215 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
                 int type = nla_type(nla);
 
                 if (type > ODP_KEY_ATTR_MAX || nla_len(nla) != key_lens[type])
-                        return -EINVAL;
+                        goto invalid;
 
 #define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
 		switch (TRANSITION(prev_type, type)) {
 		case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_TUN_ID):
-			swkey->tun_id = nla_get_be64(nla);
+			swkey->eth.tun_id = nla_get_be64(nla);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_IN_PORT):
 		case TRANSITION(ODP_KEY_ATTR_TUN_ID, ODP_KEY_ATTR_IN_PORT):
 			if (nla_get_u32(nla) >= DP_MAX_PORTS)
-				return -EINVAL;
-			swkey->in_port = nla_get_u32(nla);
+				goto invalid;
+			swkey->eth.in_port = nla_get_u32(nla);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IN_PORT, ODP_KEY_ATTR_ETHERNET):
 			eth_key = nla_data(nla);
-			memcpy(swkey->dl_src, eth_key->eth_src, ETH_ALEN);
-			memcpy(swkey->dl_dst, eth_key->eth_dst, ETH_ALEN);
+			memcpy(swkey->eth.dl_src, eth_key->eth_src, ETH_ALEN);
+			memcpy(swkey->eth.dl_dst, eth_key->eth_dst, ETH_ALEN);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ETHERNET, ODP_KEY_ATTR_8021Q):
 			q_key = nla_data(nla);
 			/* Only standard 0x8100 VLANs currently supported. */
 			if (q_key->q_tpid != htons(ETH_P_8021Q))
-				return -EINVAL;
+				goto invalid;
 			if (q_key->q_tci & htons(VLAN_TAG_PRESENT))
-				return -EINVAL;
-			swkey->dl_tci = q_key->q_tci | htons(VLAN_TAG_PRESENT);
+				goto invalid;
+			swkey->eth.dl_tci = q_key->q_tci | htons(VLAN_TAG_PRESENT);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_8021Q, ODP_KEY_ATTR_ETHERTYPE):
 		case TRANSITION(ODP_KEY_ATTR_ETHERNET, ODP_KEY_ATTR_ETHERTYPE):
-			swkey->dl_type = nla_get_be16(nla);
-			if (ntohs(swkey->dl_type) < 1536)
-				return -EINVAL;
+			swkey->eth.dl_type = nla_get_be16(nla);
+			if (ntohs(swkey->eth.dl_type) < 1536)
+				goto invalid;
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_IPV4):
-			if (swkey->dl_type != htons(ETH_P_IP))
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+			if (swkey->eth.dl_type != htons(ETH_P_IP))
+				goto invalid;
 			ipv4_key = nla_data(nla);
-			swkey->ipv4_src = ipv4_key->ipv4_src;
-			swkey->ipv4_dst = ipv4_key->ipv4_dst;
-			swkey->nw_proto = ipv4_key->ipv4_proto;
-			swkey->nw_tos = ipv4_key->ipv4_tos;
-			if (swkey->nw_tos & INET_ECN_MASK)
-				return -EINVAL;
+			swkey->ip.nw_proto = ipv4_key->ipv4_proto;
+			swkey->ip.nw_tos = ipv4_key->ipv4_tos;
+			swkey->ipv4.addr.src = ipv4_key->ipv4_src;
+			swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
+			if (swkey->ip.nw_tos & INET_ECN_MASK)
+				goto invalid;
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_IPV6):
-			if (swkey->dl_type != htons(ETH_P_IPV6))
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.addr);
+			if (swkey->eth.dl_type != htons(ETH_P_IPV6))
+				goto invalid;
 			ipv6_key = nla_data(nla);
-			memcpy(&swkey->ipv6_src, ipv6_key->ipv6_src,
-					sizeof(swkey->ipv6_src));
-			memcpy(&swkey->ipv6_dst, ipv6_key->ipv6_dst,
-					sizeof(swkey->ipv6_dst));
-			swkey->nw_proto = ipv6_key->ipv6_proto;
-			swkey->nw_tos = ipv6_key->ipv6_tos;
-			if (swkey->nw_tos & INET_ECN_MASK)
-				return -EINVAL;
+			swkey->ip.nw_proto = ipv6_key->ipv6_proto;
+			swkey->ip.nw_tos = ipv6_key->ipv6_tos;
+			memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
+					sizeof(swkey->ipv6.addr.src));
+			memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
+					sizeof(swkey->ipv6.addr.dst));
+			if (swkey->ip.nw_tos & INET_ECN_MASK)
+				goto invalid;
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_TCP):
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (swkey->ip.nw_proto != IPPROTO_TCP)
+				goto invalid;
+			tcp_key = nla_data(nla);
+			swkey->ipv4.tp.src = tcp_key->tcp_src;
+			swkey->ipv4.tp.dst = tcp_key->tcp_dst;
+			break;
+
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_TCP):
-			if (swkey->nw_proto != IPPROTO_TCP)
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (swkey->ip.nw_proto != IPPROTO_TCP)
+				goto invalid;
 			tcp_key = nla_data(nla);
-			swkey->tp_src = tcp_key->tcp_src;
-			swkey->tp_dst = tcp_key->tcp_dst;
+			swkey->ipv6.tp.src = tcp_key->tcp_src;
+			swkey->ipv6.tp.dst = tcp_key->tcp_dst;
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_UDP):
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (swkey->ip.nw_proto != IPPROTO_UDP)
+				goto invalid;
+			udp_key = nla_data(nla);
+			swkey->ipv4.tp.src = udp_key->udp_src;
+			swkey->ipv4.tp.dst = udp_key->udp_dst;
+			break;
+
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_UDP):
-			if (swkey->nw_proto != IPPROTO_UDP)
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (swkey->ip.nw_proto != IPPROTO_UDP)
+				goto invalid;
 			udp_key = nla_data(nla);
-			swkey->tp_src = udp_key->udp_src;
-			swkey->tp_dst = udp_key->udp_dst;
+			swkey->ipv6.tp.src = udp_key->udp_src;
+			swkey->ipv6.tp.dst = udp_key->udp_dst;
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_ICMP):
-			if (swkey->nw_proto != IPPROTO_ICMP)
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (swkey->ip.nw_proto != IPPROTO_ICMP)
+				goto invalid;
 			icmp_key = nla_data(nla);
-			swkey->tp_src = htons(icmp_key->icmp_type);
-			swkey->tp_dst = htons(icmp_key->icmp_code);
+			swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
+			swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_ICMPV6):
-			if (swkey->nw_proto != IPPROTO_ICMPV6)
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (swkey->ip.nw_proto != IPPROTO_ICMPV6)
+				goto invalid;
 			icmpv6_key = nla_data(nla);
-			swkey->tp_src = htons(icmpv6_key->icmpv6_type);
-			swkey->tp_dst = htons(icmpv6_key->icmpv6_code);
+			swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
+			swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_ARP):
-			if (swkey->dl_type != htons(ETH_P_ARP))
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+			if (swkey->eth.dl_type != htons(ETH_P_ARP))
+				goto invalid;
 			arp_key = nla_data(nla);
-			swkey->ipv4_src = arp_key->arp_sip;
-			swkey->ipv4_dst = arp_key->arp_tip;
+			swkey->ipv4.addr.src = arp_key->arp_sip;
+			swkey->ipv4.addr.dst = arp_key->arp_tip;
 			if (arp_key->arp_op & htons(0xff00))
-				return -EINVAL;
-			swkey->nw_proto = ntohs(arp_key->arp_op);
-			memcpy(swkey->arp_sha, arp_key->arp_sha, ETH_ALEN);
-			memcpy(swkey->arp_tha, arp_key->arp_tha, ETH_ALEN);
+				goto invalid;
+			swkey->ip.nw_proto = ntohs(arp_key->arp_op);
+			memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
+			memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ICMPV6, ODP_KEY_ATTR_ND):
-			if (swkey->tp_src != htons(NDISC_NEIGHBOUR_SOLICITATION)
-			    && swkey->tp_src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
-				return -EINVAL;
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+			if (swkey->ipv6.tp.src != htons(NDISC_NEIGHBOUR_SOLICITATION)
+			    && swkey->ipv6.tp.src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+				goto invalid;
 			nd_key = nla_data(nla);
-			memcpy(&swkey->nd_target, nd_key->nd_target,
-					sizeof(swkey->nd_target));
-			memcpy(swkey->arp_sha, nd_key->nd_sll, ETH_ALEN);
-			memcpy(swkey->arp_tha, nd_key->nd_tll, ETH_ALEN);
+			memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
+					sizeof(swkey->ipv6.nd.target));
+			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
+			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
 			break;
 
 		default:
-			return -EINVAL;
+			goto invalid;
 		}
 
 		prev_type = type;
 	}
 	if (rem)
-		return -EINVAL;
+		goto invalid;
 
 	switch (prev_type) {
 	case ODP_KEY_ATTR_UNSPEC:
-		return -EINVAL;
+		goto invalid;
 
 	case ODP_KEY_ATTR_TUN_ID:
 	case ODP_KEY_ATTR_IN_PORT:
-		return -EINVAL;
+		goto invalid;
 
 	case ODP_KEY_ATTR_ETHERNET:
 	case ODP_KEY_ATTR_8021Q:
-		return 0;
+		goto ok;
 
 	case ODP_KEY_ATTR_ETHERTYPE:
-		if (swkey->dl_type == htons(ETH_P_IP) ||
-		    swkey->dl_type == htons(ETH_P_ARP))
-			return -EINVAL;
-		return 0;
+		if (swkey->eth.dl_type == htons(ETH_P_IP) ||
+		    swkey->eth.dl_type == htons(ETH_P_ARP))
+			goto invalid;
+		goto ok;
 
 	case ODP_KEY_ATTR_IPV4:
-		if (swkey->nw_proto == IPPROTO_TCP ||
-		    swkey->nw_proto == IPPROTO_UDP ||
-		    swkey->nw_proto == IPPROTO_ICMP)
-			return -EINVAL;
-		return 0;
+		if (swkey->ip.nw_proto == IPPROTO_TCP ||
+		    swkey->ip.nw_proto == IPPROTO_UDP ||
+		    swkey->ip.nw_proto == IPPROTO_ICMP)
+			goto invalid;
+		goto ok;
 
 	case ODP_KEY_ATTR_IPV6:
-		if (swkey->nw_proto == IPPROTO_TCP ||
-		    swkey->nw_proto == IPPROTO_UDP ||
-		    swkey->nw_proto == IPPROTO_ICMPV6)
-			return -EINVAL;
-		return 0;
+		if (swkey->ip.nw_proto == IPPROTO_TCP ||
+		    swkey->ip.nw_proto == IPPROTO_UDP ||
+		    swkey->ip.nw_proto == IPPROTO_ICMPV6)
+			goto invalid;
+		goto ok;
 
 	case ODP_KEY_ATTR_ICMPV6:
-		if (swkey->tp_src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-		    swkey->tp_src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
-			return -EINVAL;
-		return 0;
+		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+			goto invalid;
+		goto ok;
 
 	case ODP_KEY_ATTR_TCP:
 	case ODP_KEY_ATTR_UDP:
 	case ODP_KEY_ATTR_ICMP:
 	case ODP_KEY_ATTR_ARP:
 	case ODP_KEY_ATTR_ND:
-		return 0;
+		goto ok;
 	}
 
+invalid:
 	WARN_ON_ONCE(1);
-	return -EINVAL;
+	error = -EINVAL;
+
+ok:
+	WARN_ON_ONCE(!key_len && !error);
+	*key_lenp = key_len;
+	return error;
 }
 
 int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
@@ -809,32 +877,32 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	 * types are added. */
 	BUILD_BUG_ON(__ODP_KEY_ATTR_MAX != 14);
 
-	if (swkey->tun_id != cpu_to_be64(0))
-		NLA_PUT_BE64(skb, ODP_KEY_ATTR_TUN_ID, swkey->tun_id);
+	if (swkey->eth.tun_id != cpu_to_be64(0))
+		NLA_PUT_BE64(skb, ODP_KEY_ATTR_TUN_ID, swkey->eth.tun_id);
 
-	NLA_PUT_U32(skb, ODP_KEY_ATTR_IN_PORT, swkey->in_port);
+	NLA_PUT_U32(skb, ODP_KEY_ATTR_IN_PORT, swkey->eth.in_port);
 
 	nla = nla_reserve(skb, ODP_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 	if (!nla)
 		goto nla_put_failure;
 	eth_key = nla_data(nla);
-	memcpy(eth_key->eth_src, swkey->dl_src, ETH_ALEN);
-	memcpy(eth_key->eth_dst, swkey->dl_dst, ETH_ALEN);
+	memcpy(eth_key->eth_src, swkey->eth.dl_src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, swkey->eth.dl_dst, ETH_ALEN);
 
-	if (swkey->dl_tci != htons(0)) {
+	if (swkey->eth.dl_tci != htons(0)) {
 		struct odp_key_8021q q_key;
 
 		q_key.q_tpid = htons(ETH_P_8021Q);
-		q_key.q_tci = swkey->dl_tci & ~htons(VLAN_TAG_PRESENT);
+		q_key.q_tci = swkey->eth.dl_tci & ~htons(VLAN_TAG_PRESENT);
 		NLA_PUT(skb, ODP_KEY_ATTR_8021Q, sizeof(q_key), &q_key);
 	}
 
-	if (swkey->dl_type == htons(ETH_P_802_2))
+	if (swkey->eth.dl_type == htons(ETH_P_802_2))
 		return 0;
 
-	NLA_PUT_BE16(skb, ODP_KEY_ATTR_ETHERTYPE, swkey->dl_type);
+	NLA_PUT_BE16(skb, ODP_KEY_ATTR_ETHERTYPE, swkey->eth.dl_type);
 
-	if (swkey->dl_type == htons(ETH_P_IP)) {
+	if (swkey->eth.dl_type == htons(ETH_P_IP)) {
 		struct odp_key_ipv4 *ipv4_key;
 
 		nla = nla_reserve(skb, ODP_KEY_ATTR_IPV4, sizeof(*ipv4_key));
@@ -842,11 +910,11 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			goto nla_put_failure;
 		ipv4_key = nla_data(nla);
 		memset(ipv4_key, 0, sizeof(struct odp_key_ipv4));
-		ipv4_key->ipv4_src = swkey->ipv4_src;
-		ipv4_key->ipv4_dst = swkey->ipv4_dst;
-		ipv4_key->ipv4_proto = swkey->nw_proto;
-		ipv4_key->ipv4_tos = swkey->nw_tos;
-	} else if (swkey->dl_type == htons(ETH_P_IPV6)) {
+		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
+		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = swkey->ip.nw_proto;
+		ipv4_key->ipv4_tos = swkey->ip.nw_tos;
+	} else if (swkey->eth.dl_type == htons(ETH_P_IPV6)) {
 		struct odp_key_ipv6 *ipv6_key;
 
 		nla = nla_reserve(skb, ODP_KEY_ATTR_IPV6, sizeof(*ipv6_key));
@@ -854,13 +922,13 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			goto nla_put_failure;
 		ipv6_key = nla_data(nla);
 		memset(ipv6_key, 0, sizeof(struct odp_key_ipv6));
-		memcpy(ipv6_key->ipv6_src, &swkey->ipv6_src,
+		memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
 				sizeof(ipv6_key->ipv6_src));
-		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6_dst,
+		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
 				sizeof(ipv6_key->ipv6_dst));
-		ipv6_key->ipv6_proto = swkey->nw_proto;
-		ipv6_key->ipv6_tos = swkey->nw_tos;
-	} else if (swkey->dl_type == htons(ETH_P_ARP)) {
+		ipv6_key->ipv6_proto = swkey->ip.nw_proto;
+		ipv6_key->ipv6_tos = swkey->ip.nw_tos;
+	} else if (swkey->eth.dl_type == htons(ETH_P_ARP)) {
 		struct odp_key_arp *arp_key;
 
 		nla = nla_reserve(skb, ODP_KEY_ATTR_ARP, sizeof(*arp_key));
@@ -868,46 +936,56 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			goto nla_put_failure;
 		arp_key = nla_data(nla);
 		memset(arp_key, 0, sizeof(struct odp_key_arp));
-		arp_key->arp_sip = swkey->ipv4_src;
-		arp_key->arp_tip = swkey->ipv4_dst;
-		arp_key->arp_op = htons(swkey->nw_proto);
-		memcpy(arp_key->arp_sha, swkey->arp_sha, ETH_ALEN);
-		memcpy(arp_key->arp_tha, swkey->arp_tha, ETH_ALEN);
+		arp_key->arp_sip = swkey->ipv4.addr.src;
+		arp_key->arp_tip = swkey->ipv4.addr.dst;
+		arp_key->arp_op = htons(swkey->ip.nw_proto);
+		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
+		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
 	}
 
-	if (swkey->dl_type == htons(ETH_P_IP) ||
-	    swkey->dl_type == htons(ETH_P_IPV6)) {
+	if (swkey->eth.dl_type == htons(ETH_P_IP) ||
+	    swkey->eth.dl_type == htons(ETH_P_IPV6)) {
 
-		if (swkey->nw_proto == IPPROTO_TCP) {
+		if (swkey->ip.nw_proto == IPPROTO_TCP) {
 			struct odp_key_tcp *tcp_key;
 
 			nla = nla_reserve(skb, ODP_KEY_ATTR_TCP, sizeof(*tcp_key));
 			if (!nla)
 				goto nla_put_failure;
 			tcp_key = nla_data(nla);
-			tcp_key->tcp_src = swkey->tp_src;
-			tcp_key->tcp_dst = swkey->tp_dst;
-		} else if (swkey->nw_proto == IPPROTO_UDP) {
+			if (swkey->eth.dl_type == htons(ETH_P_IP)) {
+				tcp_key->tcp_src = swkey->ipv4.tp.src;
+				tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.dl_type == htons(ETH_P_IPV6)) {
+				tcp_key->tcp_src = swkey->ipv6.tp.src;
+				tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->ip.nw_proto == IPPROTO_UDP) {
 			struct odp_key_udp *udp_key;
 
 			nla = nla_reserve(skb, ODP_KEY_ATTR_UDP, sizeof(*udp_key));
 			if (!nla)
 				goto nla_put_failure;
 			udp_key = nla_data(nla);
-			udp_key->udp_src = swkey->tp_src;
-			udp_key->udp_dst = swkey->tp_dst;
-		} else if (swkey->dl_type == htons(ETH_P_IP) &&
-			   swkey->nw_proto == IPPROTO_ICMP) {
+			if (swkey->eth.dl_type == htons(ETH_P_IP)) {
+				udp_key->udp_src = swkey->ipv4.tp.src;
+				udp_key->udp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.dl_type == htons(ETH_P_IPV6)) {
+				udp_key->udp_src = swkey->ipv6.tp.src;
+				udp_key->udp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->eth.dl_type == htons(ETH_P_IP) &&
+			   swkey->ip.nw_proto == IPPROTO_ICMP) {
 			struct odp_key_icmp *icmp_key;
 
 			nla = nla_reserve(skb, ODP_KEY_ATTR_ICMP, sizeof(*icmp_key));
 			if (!nla)
 				goto nla_put_failure;
 			icmp_key = nla_data(nla);
-			icmp_key->icmp_type = ntohs(swkey->tp_src);
-			icmp_key->icmp_code = ntohs(swkey->tp_dst);
-		} else if (swkey->dl_type == htons(ETH_P_IPV6) &&
-			   swkey->nw_proto == IPPROTO_ICMPV6) {
+			icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
+			icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+		} else if (swkey->eth.dl_type == htons(ETH_P_IPV6) &&
+			   swkey->ip.nw_proto == IPPROTO_ICMPV6) {
 			struct odp_key_icmpv6 *icmpv6_key;
 
 			nla = nla_reserve(skb, ODP_KEY_ATTR_ICMPV6,
@@ -915,8 +993,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 			if (!nla)
 				goto nla_put_failure;
 			icmpv6_key = nla_data(nla);
-			icmpv6_key->icmpv6_type = ntohs(swkey->tp_src);
-			icmpv6_key->icmpv6_code = ntohs(swkey->tp_dst);
+			icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
+			icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
 
 			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
 			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -926,10 +1004,10 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				if (!nla)
 					goto nla_put_failure;
 				nd_key = nla_data(nla);
-				memcpy(nd_key->nd_target, &swkey->nd_target,
+				memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
 							sizeof(nd_key->nd_target));
-				memcpy(nd_key->nd_sll, swkey->arp_sha, ETH_ALEN);
-				memcpy(nd_key->nd_tll, swkey->arp_tha, ETH_ALEN);
+				memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
+				memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
 			}
 		}
 	}
diff --git a/datapath/flow.h b/datapath/flow.h
index 5c23279..4056a21 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -31,29 +31,51 @@ struct sw_flow_actions {
 };
 
 struct sw_flow_key {
-	__be64	tun_id;     /* Encapsulating tunnel ID. */
+	struct {
+		__be64 tun_id;			/* Encapsulating tunnel ID. */
+		u16    in_port;			/* Input switch port. */
+		u8     dl_src[ETH_ALEN];	/* Ethernet source address. */
+		u8     dl_dst[ETH_ALEN];	/* Ethernet destination address. */
+		__be16 dl_tci;			/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		__be16 dl_type;			/* Ethernet frame type. */
+	} eth;
+	struct {
+		u8     nw_proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+		u8     nw_tos;			/* IP ToS (DSCP field, 6 bits). */
+	} ip;
 	union {
 		struct {
-			__be32	ipv4_src;	 /* IPv4 source address. */
-			__be32	ipv4_dst;	 /* IPv4 destination address. */
-		};
+			struct {
+				__be32 src;	/* IP source address. */
+				__be32 dst;	/* IP destination address. */
+			} addr;
+			union {
+				struct {
+					__be16 src;		/* TCP/UDP source port. */
+					__be16 dst;		/* TCP/UDP destination port. */
+				} tp;
+				struct {
+					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+				} arp;
+			};
+		} ipv4;
 		struct {
-			struct in6_addr	ipv6_src; /* IPv6 source address. */
-			struct in6_addr ipv6_dst; /* IPv6 source address. */
-		};
+			struct {
+				struct in6_addr src;	/* IPv6 source address. */
+				struct in6_addr dst;	/* IPv6 destination address. */
+			} addr;
+			struct {
+				__be16 src;		/* TCP/UDP source port. */
+				__be16 dst;		/* TCP/UDP destination port. */
+			} tp;
+			struct {
+				struct in6_addr target;	/* ND target address. */
+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
+			} nd;
+		} ipv6;
 	};
-	struct in6_addr	nd_target; /* IPv6 ND target address. */
-	u16	in_port;    /* Input switch port. */
-	__be16	dl_tci;	    /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
-	__be16	dl_type;    /* Ethernet frame type. */
-	__be16	tp_src;	    /* TCP/UDP source port. */
-	__be16	tp_dst;	    /* TCP/UDP destination port. */
-	u8	dl_src[ETH_ALEN]; /* Ethernet source address. */
-	u8	dl_dst[ETH_ALEN]; /* Ethernet destination address. */
-	u8	nw_proto;   /* IP protocol or lower 8 bits of ARP opcode. */
-	u8	nw_tos;	    /* IP ToS (DSCP field, 6 bits). */
-	u8	arp_sha[ETH_ALEN]; /* ARP/ND source hardware address. */
-	u8	arp_tha[ETH_ALEN]; /* ARP/ND target hardware address. */
 };
 
 struct sw_flow {
@@ -101,12 +123,13 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
+		 int *key_lenp, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 u64 flow_used_time(unsigned long flow_jiffies);
 
-u32 flow_hash(const struct sw_flow_key *);
-int flow_cmp(const struct tbl_node *, void *target);
+u32 flow_hash(const struct sw_flow_key *, int key_lenp);
+int flow_cmp(const struct tbl_node *, void *target, int len);
 
 /* Upper bound on the length of a nlattr-formatted flow key.  The longest
  * nlattr-formatted flow key would be:
@@ -127,7 +150,8 @@ int flow_cmp(const struct tbl_node *, void *target);
 #define FLOW_BUFSIZE 132
 
 int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *);
+int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *);
 
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
diff --git a/datapath/table.c b/datapath/table.c
index 47fa016..725845d 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -178,14 +178,14 @@ static struct tbl_bucket __rcu **find_bucket(struct tbl *table, u32 hash)
 	return &table->buckets[l1][l2];
 }
 
-static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash,
-			 int (*cmp)(const struct tbl_node *, void *))
+static int search_bucket(const struct tbl_bucket *bucket, void *target, int len, u32 hash,
+			 int (*cmp)(const struct tbl_node *, void *, int len))
 {
 	int i;
 
 	for (i = 0; i < bucket->n_objs; i++) {
 		struct tbl_node *obj = bucket->objs[i];
-		if (obj->hash == hash && likely(cmp(obj, target)))
+		if (obj->hash == hash && likely(cmp(obj, target, len)))
 			return i;
 	}
 
@@ -197,6 +197,8 @@ static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash
  * @table: hash table to search
  * @target: identifier for the object that is being searched for, will be
  * provided as an argument to @cmp when making comparisions
+ * @len: length of @target in bytes, will be provided as an argument to @cmp
+ * when making comparisons
  * @hash: hash of @target
  * @cmp: comparision function to match objects with the given hash, returns
  * nonzero if the objects match, zero otherwise
@@ -204,8 +206,8 @@ static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash
  * Searches @table for an object identified by @target.  Returns the tbl_node
  * contained in the object if successful, otherwise %NULL.
  */
-struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash,
-			    int (*cmp)(const struct tbl_node *, void *))
+struct tbl_node *tbl_lookup(struct tbl *table, void *target, int len, u32 hash,
+			    int (*cmp)(const struct tbl_node *, void *, int))
 {
 	struct tbl_bucket __rcu **bucketp = find_bucket(table, hash);
 	struct tbl_bucket *bucket = get_bucket(*bucketp);
@@ -214,7 +216,7 @@ struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash,
 	if (!bucket)
 		return NULL;
 
-	index = search_bucket(bucket, target, hash, cmp);
+	index = search_bucket(bucket, target, len, hash, cmp);
 	if (index < 0)
 		return NULL;
 
diff --git a/datapath/table.h b/datapath/table.h
index 22574be..3a0c2a6 100644
--- a/datapath/table.h
+++ b/datapath/table.h
@@ -55,8 +55,8 @@ struct tbl {
 
 struct tbl *tbl_create(unsigned int n_buckets);
 void tbl_destroy(struct tbl *, void (*destructor)(struct tbl_node *));
-struct tbl_node *tbl_lookup(struct tbl *, void *target, u32 hash,
-			    int (*cmp)(const struct tbl_node *, void *target));
+struct tbl_node *tbl_lookup(struct tbl *, void *target, int len, u32 hash,
+			    int (*cmp)(const struct tbl_node *, void *target, int len));
 int tbl_insert(struct tbl *, struct tbl_node *, u32 hash);
 int tbl_remove(struct tbl *, struct tbl_node *);
 unsigned int tbl_count(struct tbl *);
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 70a4cd7..3263223 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -186,7 +186,7 @@ struct port_lookup_key {
  * Modifies 'target' to store the rcu_dereferenced pointer that was used to do
  * the comparision.
  */
-static int port_cmp(const struct tbl_node *node, void *target)
+static int port_cmp(const struct tbl_node *node, void *target, int unused)
 {
 	const struct tnl_vport *tnl_vport = tnl_vport_table_cast(node);
 	struct port_lookup_key *lookup = target;
@@ -337,7 +337,8 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
 		lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_MATCH;
 
 		if (key_local_remote_ports) {
-			tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+			tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+					      port_hash(&lookup), port_cmp);
 			if (tbl_node)
 				goto found;
 		}
@@ -345,7 +346,8 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
 		if (key_remote_ports) {
 			lookup.saddr = 0;
 
-			tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+			tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+					      port_hash(&lookup), port_cmp);
 			if (tbl_node)
 				goto found;
 
@@ -358,7 +360,8 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
 		lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_EXACT;
 
 		if (local_remote_ports) {
-			tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+			tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+					      port_hash(&lookup), port_cmp);
 			if (tbl_node)
 				goto found;
 		}
@@ -366,7 +369,8 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
 		if (remote_ports) {
 			lookup.saddr = 0;
 
-			tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+			tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+					      port_hash(&lookup), port_cmp);
 			if (tbl_node)
 				goto found;
 		}
@@ -940,6 +944,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		struct sk_buff *skb;
 		bool is_frag;
 		int err;
+		int flow_key_len;
 
 		dst_vport = internal_dev_get_vport(rt_dst(rt).dev);
 		if (!dst_vport)
@@ -952,14 +957,16 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		__skb_put(skb, cache->len);
 		memcpy(skb->data, get_cached_header(cache), cache->len);
 
-		err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag);
+		err = flow_extract(skb, dst_vport->port_no, &flow_key,
+				   &flow_key_len, &is_frag);
 
 		kfree_skb(skb);
 		if (err || is_frag)
 			goto done;
 
 		flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table),
-				       &flow_key, flow_hash(&flow_key),
+				       &flow_key, flow_key_len,
+				       flow_hash(&flow_key, flow_key_len),
 				       flow_cmp);
 		if (flow_node) {
 			struct sw_flow *flow = flow_cast(flow_node);
-- 
1.7.4.1




More information about the dev mailing list