[ovs-discuss] [PATCH] datapath: Work around Xen Dom0 netback driver bug.

Ben Pfaff blp at nicira.com
Thu Aug 13 21:45:47 UTC 2009


The Xen netback driver does not gracefully support data chunks longer than
2048 bytes.  In particular, if the skb header is longer than 2048 bytes,
netback will make the initial fragment that it passes to the DomU driver 0
bytes long.  The current Linux DomU does not tolerate that and crashes when
eth_type_trans() calls skb_pull().

Properly this should be fixed in the netback driver.  Ideally the netfront
driver in the DomU should tolerate it too.  But for now we need to work
around the problem.

Bug #1801, NIC-23.
---
 datapath/actions.c |  186 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 datapath/actions.h |    1 -
 2 files changed, 180 insertions(+), 7 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index a037e43..d41cb21 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -274,10 +274,178 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 	return length;
 }
 
-int dp_xmit_skb(struct sk_buff *skb)
+#ifdef CONFIG_XEN
+/* Maximum number of bytes we can safely put into the header or into a single
+ * fragment. */
+#define MAX_CHUNK 2048
+
+/* Maximum number of bytes in an skb that needs to be chunked. */
+#define MAX_CHUNKABLE min_t(int, ((1 + MAX_SKB_FRAGS) * MAX_CHUNK), 65535)
+
+/* Appends a fragment to shinfo->frags[] and initializes its members. */
+static void add_frag(struct skb_shared_info *shinfo, struct page *page,
+		     int page_offset, int size)
+{
+	skb_frag_t *frag;
+
+	BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
+	frag = &shinfo->frags[shinfo->nr_frags++];
+	frag->page = page;
+	frag->page_offset = page_offset;
+	frag->size = size;
+}
+
+/**
+ * add_chunk - add data to set of fragments
+ * @shinfo: set of fragments
+ * @data: data to add
+ * @len: number of bytes in @data
+ * @gfp: allocation flags
+ *
+ * Copies data into the allocated fragments in @shinfo, which must not contain
+ * any fragments other than those allocated by add_chunk() itself.  Ensures
+ * that none of the fragments are more than %MAX_CHUNK bytes long, adding more
+ * fragments as necessary.  Returns 0 or a negative error.
+ */
+static int add_chunk(struct skb_shared_info *shinfo,
+		     void *data, int len, gfp_t gfp)
+{
+	int copy;
+
+	for (; len; data += copy, len -= copy) {
+		skb_frag_t *frag;
+
+		/* Allocate a new fragment if necessary. */
+		if (!shinfo->nr_frags ||
+		    shinfo->frags[shinfo->nr_frags - 1].size >= MAX_CHUNK)
+		{
+			struct page *page = alloc_page(gfp);
+			if (!page)
+				return -ENOMEM;
+			add_frag(shinfo, page, 0, 0);
+		}
+
+		/* Append to fragment.  We know that frag->page_offset is 0
+		 * (since we allocated the frag ourselves). */
+		frag = &shinfo->frags[shinfo->nr_frags - 1];
+		copy = min(len, MAX_CHUNK - frag->size);
+		BUG_ON(PageHighMem(frag->page));
+		memcpy(page_address(frag->page) + frag->size, data, copy);
+		frag->size += copy;
+	}
+	return 0;
+}
+
+/**
+ * chunk_skb - breaks an sk_buff into fragments suitable for the netback driver
+ * @skb: sk_buff to fragment
+ * @gfp: allocation flags
+ *
+ * If @skb is destined for a netback device (as indicated by skb->dev), this
+ * function ensures that @skb's header and each of its fragments is no longer
+ * than %MAX_CHUNK bytes long, breaking them up into additional fragments if
+ * necessary.
+ *
+ * This function is necessary because the Xen netback driver does not
+ * gracefully support data chunks longer than 2048 bytes.  In particular, if
+ * the skb's header is longer than 2048 bytes, netback will make the initial
+ * fragment that it passes to the DomU driver 0 bytes long.  The current Linux
+ * DomU does not tolerate that and crashes when eth_type_trans() calls
+ * skb_pull().
+ *
+ * @skb must not be shared.
+ *
+ * Returns 0 or a negative error.
+ */
+static int chunk_skb(struct sk_buff *skb, gfp_t gfp)
+{
+	struct skb_shared_info *shinfo;
+	struct skb_shared_info nshinfo;
+	int extra_frags;
+	int err, i;
+
+	/* Fast path the common case where skb is not big enough to need
+	 * chunking or not destined for a netback device. */
+	if (skb->len < MAX_CHUNK ||
+	    skb->dev->dev_addr[0] != 0xfe || skb->dev->dev_addr[1] != 0xff)
+		return 0;
+
+	/* If the packet is bigger than MAX_CHUNKABLE, then there's no way we
+	 * can chunk it properly and we might as well give up right away. */
+	if (skb->len > MAX_CHUNKABLE)
+		return -E2BIG;
+
+	/* Pull data into header. */
+	if (!pskb_may_pull(skb, MAX_CHUNK))
+		return -ENOMEM;
+	shinfo = skb_shinfo(skb); /* Only valid *after* skb_may_pull(). */
+
+	/* Count the number of fragments that we would generate.  If we
+	 * wouldn't create any extra fragments, then there's nothing to do. */
+	extra_frags = skb_headlen(skb) / MAX_CHUNK;
+	for (i = 0; i < shinfo->nr_frags; i++)
+		extra_frags += shinfo->frags[i].size / MAX_CHUNK;
+	if (!extra_frags)
+		return 0;
+
+	/* Generate new fragments. */
+	nshinfo.nr_frags = 0;
+	err = add_chunk(&nshinfo, skb->data + MAX_CHUNK,
+			skb_headlen(skb) - MAX_CHUNK, gfp);
+	if (err)
+		goto error;
+	if (extra_frags + shinfo->nr_frags <= MAX_SKB_FRAGS) {
+		for (i = 0; i < shinfo->nr_frags; i++) {
+			skb_frag_t *frag = &shinfo->frags[i];
+			int ofs, n;
+
+			for (ofs = 0; ofs < frag->size; ofs += n) {
+				n = min_t(int, frag->size - ofs, MAX_CHUNK);
+				get_page(frag->page);
+				add_frag(&nshinfo, frag->page,
+					 frag->page_offset + ofs, n);
+			}
+		}
+	} else {
+		for (i = 0; i < shinfo->nr_frags; i++) {
+			skb_frag_t *frag = &shinfo->frags[i];
+			void *frag_data = kmap_skb_frag(frag);
+			err = add_chunk(&nshinfo,
+					frag_data + frag->page_offset,
+					frag->size, gfp);
+			kunmap_skb_frag(frag_data);
+			if (err)
+				goto error;
+		}
+	}
+
+	/* Now replace the old fragments by the new ones. */
+	for (i = 0; i < shinfo->nr_frags; i++)
+		put_page(shinfo->frags[i].page);
+	shinfo->nr_frags = nshinfo.nr_frags;
+	memcpy(shinfo->frags, nshinfo.frags,
+	       nshinfo.nr_frags * sizeof(skb_frag_t));
+	skb->data_len = skb->len - MAX_CHUNK;
+
+	return 0;
+
+error:
+	for (i = 0; i < nshinfo.nr_frags; i++)
+		put_page(nshinfo.frags[i].page);
+	return err;
+}
+#else  /* !CONFIG_XEN */
+static int chunk_skb(struct sk_buff *skb, gfp_t gfp)
+{
+	return 0;
+}
+#endif	/* !CONFIG_XEN */
+
+static int dp_xmit_skb(struct sk_buff *skb, gfp_t gfp)
 {
 	struct datapath *dp = skb->dev->br_port->dp;
 	int len = skb->len;
+	int err;
 
 	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
 		printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
@@ -286,13 +454,19 @@ int dp_xmit_skb(struct sk_buff *skb)
 		return -E2BIG;
 	}
 
+	err = chunk_skb(skb, gfp);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
 	dev_queue_xmit(skb);
 
 	return len;
 }
 
 static void
-do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+do_output(struct datapath *dp, struct sk_buff *skb, int out_port, gfp_t gfp)
 {
 	struct net_bridge_port *p;
 	struct net_device *dev;
@@ -308,7 +482,7 @@ do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 	if (is_dp_dev(dev))
 		dp_dev_recv(dev, skb);
         else
-		dp_xmit_skb(skb);
+		dp_xmit_skb(skb, gfp);
 	return;
 
 error:
@@ -334,7 +508,7 @@ static int output_group(struct datapath *dp, __u16 group,
 			struct sk_buff *clone = skb_clone(skb, gfp);
 			if (!clone)
 				return -1;
-			do_output(dp, clone, prev_port);
+			do_output(dp, clone, prev_port, gfp);
 		}
 		prev_port = p->port_no;
 	}
@@ -365,7 +539,7 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
 	for (; n_actions > 0; a++, n_actions--) {
 		WARN_ON_ONCE(skb_shared(skb));
 		if (prev_port != -1) {
-			do_output(dp, skb_clone(skb, gfp), prev_port);
+			do_output(dp, skb_clone(skb, gfp), prev_port, gfp);
 			prev_port = -1;
 		}
 
@@ -417,7 +591,7 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
 			return -ENOMEM;
 	}
 	if (prev_port != -1)
-		do_output(dp, skb, prev_port);
+		do_output(dp, skb, prev_port, gfp);
 	else
 		kfree_skb(skb);
 	return err;
diff --git a/datapath/actions.h b/datapath/actions.h
index bda6d55..9ec54b1 100644
--- a/datapath/actions.h
+++ b/datapath/actions.h
@@ -17,7 +17,6 @@ struct odp_flow_key;
 union odp_action;
 
 struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp);
-int dp_xmit_skb(struct sk_buff *);
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
 		    struct odp_flow_key *key,
 		    const union odp_action *, int n_actions,
-- 
1.6.3.3





More information about the discuss mailing list