[ovs-dev] [PATCH] RFC: Pass more packet and flow key info to userspace.

Jarno Rajahalme jarno.rajahalme at nsn.com
Wed Jan 23 05:48:13 UTC 2013


Add OVS_PACKET_ATTR_KEY_INFO to relieve userspace from re-computing
data already computed within the kernel datapath.  In the typical
case of an upcall with perfect key fitness between kernel and
userspace this eliminates flow_extract() and flow_hash() calls in
handle_miss_upcalls().

Additional bookkeeping within the kernel datapath is minimal.
Kernel flow insertion also saves one flow key hash computation.

Removed setting the packet's l7 pointer for ICMP packets, as this was
never used.

Signed-off-by: Jarno Rajahalme <jarno.rajahalme at nsn.com>
---

This likely requires some discussion, but it took a while for me to
understand why each packet miss upcall would require flow_extract()
right after the flow key has been obtained from odp attributes.

The answer turned out to be:

1) Kernel may not know about all the header fields userspace wants to
   examine.  This is signaled with ODP_FIT_TOO_LITTLE.

2) We may have modified the packet during ofproto_receive().  This is
   signaled with ODP_FIT_TOO_MUCH.

3) The upcall may be due to userspace action rather than miss.  In
   this case the packet may have been modified by preceding actions,
   and the kernel provided key may thus be invalid.

In the case of unmodified packets with perfect fitness the only useful
work flow_extract() does is the setting of the l2/3/4/7 pointers for
the packet.  Most of this information has already been parsed by the
kernel, so passing this up would avoid unnecessary work in userspace.

Same applies to the flow key hash use in handle_miss_upcalls().  The 
hash value is only needed for pooling packets together during upcall
processing.  Kernel computed hash value should be OK for this for 
packets with perfect fitness, even though userspace might not know
how to compute the same hash value.

Extensibility would be easier with separate attributes, so maybe that
should be considered if this kind of thing is to be done.

  Jarno

---
 datapath/actions.c          |    5 ++++
 datapath/datapath.c         |   33 ++++++++++++++++++-----
 datapath/datapath.h         |    4 +++
 datapath/flow.c             |   22 ++++++++++++++-
 datapath/flow.h             |    7 ++++-
 include/linux/openvswitch.h |    9 +++++++
 lib/dpif-linux.c            |   10 +++++++
 lib/dpif.h                  |    4 +++
 lib/flow.c                  |   63 +++++++++++++++++++++++++++++++++++--------
 lib/flow.h                  |    7 ++++-
 lib/packets.c               |   51 +++++++++++++++++++----------------
 ofproto/ofproto-dpif.c      |   41 +++++++++++++++++++---------
 12 files changed, 199 insertions(+), 57 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index f638ffc..1031840 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -381,6 +381,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 	upcall.key = &OVS_CB(skb)->flow->key;
 	upcall.userdata = NULL;
 	upcall.portid = 0;
+	upcall.hash = OVS_CB(skb)->flow->hash;
+	upcall.key_len = OVS_CB(skb)->flow->key_len;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -515,16 +517,19 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
+			OVS_CB(skb)->flow->key_len = 0; /* mark modified */
 			err = push_vlan(skb, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
 				return err;
 			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
+			OVS_CB(skb)->flow->key_len = 0; /* mark modified */
 			err = pop_vlan(skb);
 			break;
 
 		case OVS_ACTION_ATTR_SET:
+			OVS_CB(skb)->flow->key_len = 0; /* mark modified */
 			err = execute_set_action(skb, nla_data(a), tun_key);
 			break;
 
diff --git a/datapath/datapath.c b/datapath/datapath.c
index b731c20..d41be5f 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -210,6 +210,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	if (!OVS_CB(skb)->flow) {
 		struct sw_flow_key key;
 		int key_len;
+		u32 hash;
 
 		/* Extract flow from 'skb' into 'key'. */
 		error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
@@ -220,7 +221,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
 		/* Look up flow. */
 		flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table),
-					   &key, key_len);
+					   &key, key_len, &hash);
 		if (unlikely(!flow)) {
 			struct dp_upcall_info upcall;
 
@@ -228,6 +229,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 			upcall.key = &key;
 			upcall.userdata = NULL;
 			upcall.portid = p->upcall_portid;
+			upcall.hash = hash;
+			upcall.key_len = key_len;
+
 			ovs_dp_upcall(dp, skb, &upcall);
 			consume_skb(skb);
 			stats_counter = &stats->n_missed;
@@ -327,6 +331,10 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 
 			later_info = *upcall_info;
 			later_info.key = &later_key;
+			if (later_info.key_len)
+				later_info.hash = ovs_flow_key_hash(&later_key,
+							 upcall_info->key_len);
+
 			upcall_info = &later_info;
 		}
 	} while ((skb = skb->next));
@@ -358,7 +366,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 		nskb = skb_clone(skb, GFP_ATOMIC);
 		if (!nskb)
 			return -ENOMEM;
-		
+
 		err = vlan_deaccel_tag(nskb);
 		if (err)
 			return err;
@@ -374,8 +382,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 	len = sizeof(struct ovs_header);
 	len += nla_total_size(skb->len);
 	len += nla_total_size(FLOW_BUFSIZE);
+	len += nla_total_size(sizeof (struct ovs_key_info));
 	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
-		len += nla_total_size(8);
+		len += nla_total_size(8); /* possible USERDATA */
 
 	user_skb = genlmsg_new(len, GFP_ATOMIC);
 	if (!user_skb) {
@@ -391,6 +400,15 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
 	nla_nest_end(user_skb, nla);
 
+	if (upcall_info->key_len) {
+		/* We have valid info on the packet */
+		struct ovs_key_info info;
+		info.hash = upcall_info->hash;
+		info.network_offset = skb_network_offset(skb);
+		info.transport_offset = skb_transport_offset(skb);
+		nla_put(user_skb, OVS_PACKET_ATTR_KEY_INFO, sizeof info, &info);
+	}
+
 	if (upcall_info->userdata)
 		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
 			    nla_get_u64(upcall_info->userdata));
@@ -1183,6 +1201,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow_actions *acts = NULL;
 	int error;
 	int key_len;
+	u32 hash;
 
 	/* Extract key. */
 	error = -EINVAL;
@@ -1213,7 +1232,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_kfree;
 
 	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len, &hash);
 	if (!flow) {
 		/* Bail out if we're not allowed to create a new flow. */
 		error = -ENOENT;
@@ -1243,7 +1262,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		rcu_assign_pointer(flow->sf_acts, acts);
 
 		/* Put flow in bucket. */
-		ovs_flow_tbl_insert(table, flow, &key, key_len);
+		ovs_flow_tbl_insert_w_hash(table, flow, &key, key_len, hash);
 
 		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 						info->snd_seq,
@@ -1318,7 +1337,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		return -ENODEV;
 
 	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len, NULL);
 	if (!flow)
 		return -ENOENT;
 
@@ -1354,7 +1373,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		return err;
 
 	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len, NULL);
 	if (!flow)
 		return -ENOENT;
 
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 2b93348..7ef594d 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -125,12 +125,16 @@ struct ovs_skb_cb {
  * @portid: Netlink PID to which packet should be sent.  If @portid is 0 then no
  * packet is sent and the packet is accounted in the datapath's @n_lost
  * counter.
+ * @hash: flow key hash value computed over key, valid if key_len != 0.
+ * @key_len: key length needed for hash recalculation if key is changed.
  */
 struct dp_upcall_info {
 	u8 cmd;
 	const struct sw_flow_key *key;
 	const struct nlattr *userdata;
 	u32 portid;
+	u32 hash;
+	int key_len;
 };
 
 /**
diff --git a/datapath/flow.c b/datapath/flow.c
index fad9e19..f8f2b5e 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -789,8 +789,14 @@ static int flow_key_start(struct sw_flow_key *key)
 		return offsetof(struct sw_flow_key, phy);
 }
 
+u32 ovs_flow_key_hash(struct sw_flow_key *key, int key_len)
+{
+	return ovs_flow_hash(key, flow_key_start(key), key_len);
+}
+
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				struct sw_flow_key *key, int key_len)
+				    struct sw_flow_key *key, int key_len,
+				    u32 *hashp)
 {
 	struct sw_flow *flow;
 	struct hlist_node *n;
@@ -811,13 +817,27 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
 			return flow;
 		}
 	}
+
+	if (hashp)
+		*hashp = hash;
+
 	return NULL;
 }
 
+void ovs_flow_tbl_insert_w_hash(struct flow_table *table, struct sw_flow *flow,
+				struct sw_flow_key *key, int key_len, u32 hash)
+{
+	flow->hash = hash;
+	flow->key_len = key_len;
+	memcpy(&flow->key, key, sizeof(flow->key));
+	__flow_tbl_insert(table, flow);
+}
+
 void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			 struct sw_flow_key *key, int key_len)
 {
 	flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
+	flow->key_len = key_len;
 	memcpy(&flow->key, key, sizeof(flow->key));
 	__flow_tbl_insert(table, flow);
 }
diff --git a/datapath/flow.h b/datapath/flow.h
index dab6980..67433fb 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -114,6 +114,7 @@ struct sw_flow {
 	struct hlist_node hash_node[2];
 	u32 hash;
 
+	int key_len;
 	struct sw_flow_key key;
 	struct sw_flow_actions __rcu *sf_acts;
 
@@ -211,13 +212,17 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
 	return (table->count > table->n_buckets);
 }
 
+u32 ovs_flow_key_hash(struct sw_flow_key *key, int len);
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				    struct sw_flow_key *key, int len);
+				    struct sw_flow_key *key, int len,
+				    u32 *hashp);
 void ovs_flow_tbl_destroy(struct flow_table *table);
 void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
 struct flow_table *ovs_flow_tbl_alloc(int new_size);
 struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
 struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
+void ovs_flow_tbl_insert_w_hash(struct flow_table *table, struct sw_flow *flow,
+				struct sw_flow_key *key, int key_len, u32 hash);
 void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			 struct sw_flow_key *key, int key_len);
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index f471fbc..94a7469 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -149,6 +149,8 @@ enum ovs_packet_cmd {
  * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
  * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
  * %OVS_USERSPACE_ATTR_USERDATA attribute.
+ * @OVS_PACKET_ATTR_KEY_INFO: kernel key hash and header offsets for the
+ * originally received packet.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_PACKET_* commands.
@@ -159,11 +161,18 @@ enum ovs_packet_attr {
 	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
 	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
 	OVS_PACKET_ATTR_USERDATA,    /* u64 OVS_ACTION_ATTR_USERSPACE arg. */
+	OVS_PACKET_ATTR_KEY_INFO,    /* struct ovs_key_info */
 	__OVS_PACKET_ATTR_MAX
 };
 
 #define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
 
+struct ovs_key_info {
+	__u32 hash;
+	__u16 network_offset;
+	__u16 transport_offset;
+};
+
 /* Virtual ports. */
 
 #define OVS_VPORT_FAMILY  "ovs_vport"
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 4425f6f..67b73e2 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -1174,6 +1174,7 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
         [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
                                      .min_len = ETH_HEADER_LEN },
         [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
+        [OVS_PACKET_ATTR_KEY_INFO] = { .type = NL_A_UNSPEC, .optional = true },
 
         /* OVS_PACKET_CMD_ACTION only. */
         [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },
@@ -1214,6 +1215,15 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
     upcall->key = CONST_CAST(struct nlattr *,
                              nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
     upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
+
+    if (a[OVS_PACKET_ATTR_KEY_INFO]) {
+        const struct ovs_key_info * key_info;
+        key_info = nl_attr_get(a[OVS_PACKET_ATTR_KEY_INFO]);
+
+        upcall->kernel_hash = key_info->hash;
+        upcall->network_offset = key_info->network_offset;
+        upcall->transport_offset = key_info->transport_offset;
+    }
     upcall->userdata = (a[OVS_PACKET_ATTR_USERDATA]
                         ? nl_attr_get_u64(a[OVS_PACKET_ATTR_USERDATA])
                         : 0);
diff --git a/lib/dpif.h b/lib/dpif.h
index a478db2..b2647d8 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -553,6 +553,10 @@ struct dpif_upcall {
     struct nlattr *key;         /* Flow key. */
     size_t key_len;             /* Length of 'key' in bytes. */
 
+    uint32_t kernel_hash;       /* Valid if network_offset != 0 */
+    uint16_t network_offset;
+    uint16_t transport_offset;
+
     /* DPIF_UC_ACTION only. */
     uint64_t userdata;          /* Argument to OVS_ACTION_ATTR_USERSPACE. */
 };
diff --git a/lib/flow.c b/lib/flow.c
index 861a1f1..770f1ae 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -36,6 +36,9 @@
 #include "packets.h"
 #include "unaligned.h"
 #include "vlog.h"
+#include "dpif.h"
+#include <linux/tcp.h>
+#include <linux/udp.h>
 
 VLOG_DEFINE_THIS_MODULE(flow);
 
@@ -316,6 +319,42 @@ invalid:
 
 }
 
+/* Initializes 'packet' header pointers as follows:
+ *
+ *    - packet->l2 to the start of the Ethernet header.
+ *
+ *    - packet->l3 to just past the Ethernet header, or just past the
+ *      vlan_header if one is present, to the first byte of the payload of the
+ *      Ethernet frame.
+ *
+ *    - packet->l4 to just past the IPv4 header, if one is present and has a
+ *      correct length, and otherwise NULL.
+ *
+ *    - packet->l7 to just past the TCP or UDP header, if one is
+ *      present and has a correct length, and otherwise NULL.
+ */
+void
+flow_init_packet(struct ofpbuf *packet, const struct flow *flow,
+                 const struct dpif_upcall *upcall)
+{
+    packet->l2 = packet->data;
+    packet->l3 = (char *)packet->data + upcall->network_offset;
+    if (upcall->transport_offset == 0) {
+        packet->l4 = NULL;
+        packet->l7 = NULL;
+    } else {
+        packet->l4 = (char *)packet->data + upcall->transport_offset;
+        if (flow->nw_proto == IPPROTO_UDP) {
+            packet->l7 = (char *)packet->l4 + sizeof (struct udphdr);
+        } else if (flow->nw_proto == IPPROTO_TCP) {
+            struct tcphdr *tcp = packet->l4;
+            packet->l7 = (char *)packet->l4 + tcp->doff * 4;
+        } else {
+            packet->l7 = NULL;
+        }
+    }
+}
+
 /* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and
  * 'ofp_in_port'.
  *
@@ -330,7 +369,7 @@ invalid:
  *    - packet->l4 to just past the IPv4 header, if one is present and has a
  *      correct length, and otherwise NULL.
  *
- *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
+ *    - packet->l7 to just past the TCP or UDP header, if one is
  *      present and has a correct length, and otherwise NULL.
  */
 void
@@ -343,11 +382,16 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark,
 
     COVERAGE_INC(flow_extract);
 
-    memset(flow, 0, sizeof *flow);
-
-    if (tnl) {
-        ovs_assert(tnl != &flow->tunnel);
-        flow->tunnel = *tnl;
+    if (tnl == &flow->tunnel) {
+        /* Keep given tunnel data */
+        BUILD_ASSERT(OBJECT_OFFSETOF(flow, tunnel) == 0);
+        memset((char *)flow + sizeof *tnl, 0, sizeof *flow - sizeof *tnl);
+    }
+    else {
+        memset(flow, 0, sizeof *flow);
+        if (tnl) {
+            flow->tunnel = *tnl;
+        }
     }
     flow->in_port = ofp_in_port;
     flow->skb_priority = skb_priority;
@@ -387,7 +431,7 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark,
  *    - packet->l4 to just past the IPv4 header, if one is present and has a
  *      correct length, and otherwise NULL.
  *
- *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
+ *    - packet->l7 to just past the TCP or UDP header, if one is
  *      present and has a correct length, and otherwise NULL.
  */
 void
@@ -428,7 +472,6 @@ flow_extract_l3_onwards(struct ofpbuf *packet, struct flow *flow,
                     if (icmp) {
                         flow->tp_src = htons(icmp->icmp_type);
                         flow->tp_dst = htons(icmp->icmp_code);
-                        packet->l7 = b.data;
                     }
                 }
             }
@@ -444,9 +487,7 @@ flow_extract_l3_onwards(struct ofpbuf *packet, struct flow *flow,
         } else if (flow->nw_proto == IPPROTO_UDP) {
             parse_udp(packet, &b, flow);
         } else if (flow->nw_proto == IPPROTO_ICMPV6) {
-            if (parse_icmpv6(&b, flow)) {
-                packet->l7 = b.data;
-            }
+            parse_icmpv6(&b, flow);
         }
     } else if (dl_type == htons(ETH_TYPE_ARP) ||
                dl_type == htons(ETH_TYPE_RARP)) {
diff --git a/lib/flow.h b/lib/flow.h
index 8e79e62..3d8c51b 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -75,9 +75,10 @@ struct flow_tnl {
 * 16-bit OpenFlow 1.0 port number.  In the software datapath interface (dpif)
 * layer and its implementations (e.g. dpif-linux, dpif-netdev), it is instead
 * a 32-bit datapath port number.
+* Note: flow_extract() depends on 'tunnel' being the first member.
 */
 struct flow {
-    struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
+    struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. KEEP 1st */
     ovs_be64 metadata;          /* OpenFlow Metadata. */
     struct in6_addr ipv6_src;   /* IPv6 source address. */
     struct in6_addr ipv6_dst;   /* IPv6 destination address. */
@@ -121,6 +122,10 @@ struct flow_metadata {
     uint16_t in_port;                /* OpenFlow port or zero. */
 };
 
+struct dpif_upcall;
+
+void flow_init_packet(struct ofpbuf *, const struct flow *,
+                      const struct dpif_upcall *);
 void flow_extract(struct ofpbuf *, uint32_t priority, uint32_t mark,
                   const struct flow_tnl *, uint16_t in_port, struct flow *);
 void flow_extract_l3_onwards(struct ofpbuf *, struct flow *,
diff --git a/lib/packets.c b/lib/packets.c
index 73dfcdc..bc6d3d0 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -454,17 +454,19 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr)
 {
     struct ip_header *nh = packet->l3;
 
-    if (nh->ip_proto == IPPROTO_TCP && packet->l7) {
-        struct tcp_header *th = packet->l4;
-
-        th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr);
-    } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) {
-        struct udp_header *uh = packet->l4;
-
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+    if (packet->l7) {
+        if (nh->ip_proto == IPPROTO_TCP) {
+            struct tcp_header *th = packet->l4;
+
+            th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr);
+        } else if (nh->ip_proto == IPPROTO_UDP) {
+            struct udp_header *uh = packet->l4;
+
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     }
@@ -560,17 +562,19 @@ static void
 packet_update_csum128(struct ofpbuf *packet, uint8_t proto,
                      ovs_be32 addr[4], const ovs_be32 new_addr[4])
 {
-    if (proto == IPPROTO_TCP && packet->l7) {
-        struct tcp_header *th = packet->l4;
+    if (packet->l7) {
+        if (proto == IPPROTO_TCP) {
+            struct tcp_header *th = packet->l4;
 
-        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
-    } else if (proto == IPPROTO_UDP && packet->l7) {
-        struct udp_header *uh = packet->l4;
+            th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+        } else if (proto == IPPROTO_UDP) {
+            struct udp_header *uh = packet->l4;
 
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     }
@@ -710,9 +714,10 @@ packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst)
 uint8_t
 packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow)
 {
-    if ((flow->dl_type == htons(ETH_TYPE_IP) ||
-         flow->dl_type == htons(ETH_TYPE_IPV6)) &&
-        flow->nw_proto == IPPROTO_TCP && packet->l7) {
+    if ((flow->dl_type == htons(ETH_TYPE_IP)
+         || flow->dl_type == htons(ETH_TYPE_IPV6))
+        && flow->nw_proto == IPPROTO_TCP
+        && packet->l7) {
         const struct tcp_header *tcp = packet->l4;
         return TCP_FLAGS(tcp->tcp_ctl);
     } else {
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index b37b482..30ac04a 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3518,29 +3518,44 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
         struct flow_miss *existing_miss;
         struct ofproto_dpif *ofproto;
         uint32_t odp_in_port;
-        struct flow flow;
         uint32_t hash;
         int error;
 
         error = ofproto_receive(backer, upcall->packet, upcall->key,
-                                upcall->key_len, &flow, &miss->key_fitness,
+                                upcall->key_len, &miss->flow, &miss->key_fitness,
                                 &ofproto, &odp_in_port, &miss->initial_tci);
-        if (error == ENODEV) {
-            /* Received packet on port for which we couldn't associate
-             * an ofproto.  This can happen if a port is removed while
-             * traffic is being received.  Print a rate-limited message
-             * in case it happens frequently. */
-            VLOG_INFO_RL(&rl, "received packet on unassociated port %"PRIu32,
-                         flow.in_port);
-        }
         if (error) {
+            if (error == ENODEV) {
+                /* Received packet on port for which we couldn't associate
+                 * an ofproto.  This can happen if a port is removed while
+                 * traffic is being received.  Print a rate-limited message
+                 * in case it happens frequently. */
+                VLOG_INFO_RL(&rl, "received packet on unassociated port %"PRIu32,
+                             miss->flow.in_port);
+            }
             continue;
         }
-        flow_extract(upcall->packet, flow.skb_priority, flow.skb_mark,
-                     &flow.tunnel, flow.in_port, &miss->flow);
+
+        if (miss->key_fitness == ODP_FIT_PERFECT
+            && upcall->network_offset != 0) {
+            /* We have a perfect key fitness and non-zero network offset
+             * implies valid offsets and hash from kernel.
+             * Just parse packet layer pointers (l2/3/4/7) */
+            flow_init_packet(upcall->packet, &miss->flow, upcall);
+            /* re-use the hash value from kernel */
+            hash = upcall->kernel_hash;
+        } else {
+            /* The packet may have been modified since the key extraction,
+             * or the kernel provided key may otherwise be insufficient.
+             * Do full flow key extraction and compute the corresponding hash.
+             */
+            flow_extract(upcall->packet, miss->flow.skb_priority,
+                         miss->flow.skb_mark, &miss->flow.tunnel,
+                         miss->flow.in_port, &miss->flow);
+            hash = flow_hash(&miss->flow, 0);
+        }
 
         /* Add other packets to a to-do list. */
-        hash = flow_hash(&miss->flow, 0);
         existing_miss = flow_miss_find(&todo, &miss->flow, hash);
         if (!existing_miss) {
             hmap_insert(&todo, &miss->hmap_node, hash);
-- 
1.7.10.4




More information about the dev mailing list