[ovs-dev] [PATCH 2/2] datapath: Hash only the part of struct sw_flow_key populated by flow_extract().

Andrew Evans aevans at nicira.com
Fri Apr 29 22:04:15 UTC 2011


On Fri, 2011-04-29 at 11:31 -0700, Jesse Gross wrote:
> On Fri, Apr 29, 2011 at 1:22 AM, Andrew Evans <aevans at nicira.com> wrote:
> > On 4/28/11 1:37 PM, Jesse Gross wrote:
> >> On Mon, Apr 25, 2011 at 6:11 PM, Andrew Evans <aevans at nicira.com> wrote:
> > @@ -204,8 +204,8 @@ static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash
> >  * Searches @table for an object identified by @target.  Returns the tbl_node
> >  * contained in the object if successful, otherwise %NULL.
> >  */
> > -struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash,
> > -                           int (*cmp)(const struct tbl_node *, void *))
> > +struct tbl_node *tbl_lookup(struct tbl *table, void *target, int len, u32 hash,
> > +                           int (*cmp)(const struct tbl_node *, void *, int))
> 
> Can you update the comment above this function for the new argument?

Yes, thanks for catching that.

--- cut here ---
diff --git a/datapath/table.c b/datapath/table.c
index 4bc91d7..725845d 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -197,6 +197,8 @@ static int search_bucket(const struct tbl_bucket *bucket, void *target, int len,
  * @table: hash table to search
  * @target: identifier for the object that is being searched for, will be
  * provided as an argument to @cmp when making comparisions
+ * @len: length of @target in bytes, will be provided as an argument to @cmp
+ * when making comparisons
  * @hash: hash of @target
  * @cmp: comparision function to match objects with the given hash, returns
  * nonzero if the objects match, zero otherwise
--- cut here ---

> >>>  static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
> >>> -                       int nh_len)
> >>> +                       int *key_len, int nh_len)
> >>>  {
> >>>        struct icmp6hdr *icmp = icmp6_hdr(skb);
> >>> +       int saved_key_len = *key_len;
> >>>
> >>>        /* The ICMPv6 type and code fields use the 16-bit transport port
> >>>         * fields, so we need to store them in 16-bit network byte order.
> >>>         */
> >>>        key->ipv6.tp.src = htons(icmp->icmp6_type);
> >>>        key->ipv6.tp.dst = htons(icmp->icmp6_code);
> >>> +       *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
> >>
> >> Don't we need to store the key_len at this point, not before?  If we
> >> jump to the invalid block then we don't get rid of type and code.
> 
> Were you intending to fix this in the diff below?  I don't see how it
> rolls back the length for invalid packets.
> 
Oops. I meant to consolidate all the function return points to one. I think this patch now
ensures that *key_lenp will reflect the portion of the flow key containing valid data when
control returns to the caller.

--- cut here ---
diff --git a/datapath/flow.c b/datapath/flow.c
index 7eb5d79..913a834 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -313,6 +313,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			int *key_lenp, int nh_len)
 {
 	struct icmp6hdr *icmp = icmp6_hdr(skb);
+	int error = 0;
 	int key_len;
 
 	/* The ICMPv6 type and code fields use the 16-bit transport port
@@ -333,9 +334,11 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		 * entire packet.
 		 */
 		if (unlikely(icmp_len < sizeof(*nd)))
-			return 0;
-		if (unlikely(skb_linearize(skb)))
-			return -ENOMEM;
+			goto out;
+		if (unlikely(skb_linearize(skb))) {
+			error = -ENOMEM;
+			goto out;
+		}
 
 		nd = (struct nd_msg *)skb_transport_header(skb);
 		ipv6_addr_copy(&key->ipv6.nd.target, &nd->target);
@@ -386,7 +389,7 @@ invalid:
 
 out:
 	*key_lenp = key_len;
-	return 0;
+	return error;
 }
 
 /**
--- cut here ---


> > (I would have used the max() macro to prevent moving key_len backward,
> > but the embedded pointer type comparison generated warnings that I
> > couldn't easily work around.)
> 
> There's also max_t() which allows you to specify a type and usually
> solves those problems.

Thanks, that's better I think:

--- cut here ---
diff --git a/datapath/flow.c b/datapath/flow.c
index 913a834..1602362 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -363,16 +363,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 					goto invalid;
 				memcpy(key->ipv6.nd.sll,
 				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
-				if (key_len < SW_FLOW_KEY_OFFSET(ipv6.nd.sll))
-					key_len = SW_FLOW_KEY_OFFSET(ipv6.nd.sll);
+				key_len = max_t(int, key_len, SW_FLOW_KEY_OFFSET(ipv6.nd.sll));
 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 				   && opt_len == 8) {
 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
 					goto invalid;
 				memcpy(key->ipv6.nd.tll,
 				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
-				if (key_len < SW_FLOW_KEY_OFFSET(ipv6.nd.tll))
-					key_len = SW_FLOW_KEY_OFFSET(ipv6.nd.tll);
+				key_len = max_t(int, key_len, SW_FLOW_KEY_OFFSET(ipv6.nd.tll));
 			}
 
 			icmp_len -= opt_len;
--- cut here ---

>                        if (nh_len == -EINVAL) {
> >                                skb->transport_header = skb->network_header;
> > -                               return 0;
> > +                       } else {
> > +                               error = nh_len;
> >                        }
> 
> We can drop the braces now.

Ok, done.

>>> @@ -744,6 +777,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
> >>>                        swkey->nw_proto = ntohs(arp_key->arp_op);
> >>>                        memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
> >>>                        memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
> >>> +                       *key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
> >>
> >> Can't we just use SW_FLOW_KEY_OFFSET(ipv4.arp) rather than relying on
> >> remembering the last element?
> >
> > We could. That does make sense, though I think that it might make it
> > harder to notice the need to change the field name in the future if new
> > fields are added or fields are rearranged. Do you think that's not worth
> > worrying about?
> 
> I would just do in places where all the fields are always going to be
> assigned at the same time, for example here with the ARP SHA and THA.
> For that it seems like it would make it more robust against new fields
> or reordering.

Makes sense.

--- cut here ---
diff --git a/datapath/flow.c b/datapath/flow.c
index 1602362..aa2c740 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -321,7 +321,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 	 */
 	key->ipv6.tp.src = htons(icmp->icmp6_type);
 	key->ipv6.tp.dst = htons(icmp->icmp6_code);
-	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 
 	if (icmp->icmp6_code == 0 &&
 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -500,14 +500,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 					struct tcphdr *tcp = tcp_hdr(skb);
 					key->ipv4.tp.src = tcp->source;
 					key->ipv4.tp.dst = tcp->dest;
-					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				}
 			} else if (key->nw_proto == IPPROTO_UDP) {
 				if (udphdr_ok(skb)) {
 					struct udphdr *udp = udp_hdr(skb);
 					key->ipv4.tp.src = udp->source;
 					key->ipv4.tp.dst = udp->dest;
-					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				}
 			} else if (key->nw_proto == IPPROTO_ICMP) {
 				if (icmphdr_ok(skb)) {
@@ -517,7 +517,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 					 * in 16-bit network byte order. */
 					key->ipv4.tp.src = htons(icmp->type);
 					key->ipv4.tp.dst = htons(icmp->code);
-					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+					key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 				}
 			}
 		} else
@@ -545,7 +545,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				memcpy(&key->ipv4.dst, arp->ar_tip, sizeof(key->ipv4.dst));
 				memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
 				memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
-				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
 			}
 		}
 	} else if (key->dl_type == htons(ETH_P_IPV6)) {
@@ -553,11 +553,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 
 		nh_len = parse_ipv6hdr(skb, key, &key_len);
 		if (unlikely(nh_len < 0)) {
-			if (nh_len == -EINVAL) {
+			if (nh_len == -EINVAL)
 				skb->transport_header = skb->network_header;
-			} else {
+			else
 				error = nh_len;
-			}
 			goto out;
 		}
 
@@ -567,14 +566,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				struct tcphdr *tcp = tcp_hdr(skb);
 				key->ipv6.tp.src = tcp->source;
 				key->ipv6.tp.dst = tcp->dest;
-				key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+				key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			}
 		} else if (key->nw_proto == NEXTHDR_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
 				key->ipv6.tp.src = udp->source;
 				key->ipv6.tp.dst = udp->dest;
-				key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+				key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			}
 		} else if (key->nw_proto == NEXTHDR_ICMP) {
 			if (icmp6hdr_ok(skb)) {
@@ -730,7 +729,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			tcp_key = nla_data(nla);
 			swkey->ipv4.tp.src = tcp_key->tcp_src;
 			swkey->ipv4.tp.dst = tcp_key->tcp_dst;
-			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_TCP):
@@ -739,7 +738,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			tcp_key = nla_data(nla);
 			swkey->ipv6.tp.src = tcp_key->tcp_src;
 			swkey->ipv6.tp.dst = tcp_key->tcp_dst;
-			*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_UDP):
@@ -748,7 +747,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			udp_key = nla_data(nla);
 			swkey->ipv4.tp.src = udp_key->udp_src;
 			swkey->ipv4.tp.dst = udp_key->udp_dst;
-			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_UDP):
@@ -757,7 +756,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			udp_key = nla_data(nla);
 			swkey->ipv6.tp.src = udp_key->udp_src;
 			swkey->ipv6.tp.dst = udp_key->udp_dst;
-			*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_ICMP):
@@ -766,7 +765,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			icmp_key = nla_data(nla);
 			swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
 			swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
-			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_ICMPV6):
@@ -788,7 +787,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 			swkey->nw_proto = ntohs(arp_key->arp_op);
 			memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
 			memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
-			*key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
 			break;
 
 		case TRANSITION(ODP_KEY_ATTR_ICMPV6, ODP_KEY_ATTR_ND):
@@ -800,7 +799,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 					sizeof(swkey->ipv6.nd.target));
 			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
 			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
-			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd.tll);
+			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
 			break;
 
 		default:
@@ -852,7 +851,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
 		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
 			return -EINVAL;
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 		return 0;
 
 	case ODP_KEY_ATTR_TCP:
--- cut here ---





More information about the dev mailing list