[ovs-dev] [PATCH 5/8] datapath: Add support for tunnel fragmentation.

Jesse Gross jesse at nicira.com
Wed Aug 18 07:08:49 UTC 2010


Up until now it was assumed that encapsulated packets larger than
the MTU would be fragmented by the IP stack.  However, some
tunneling protocols provide their own fragmentation mechanism.  This
adds the necessary support to the generic tunnel code to support
fragmentation.

Signed-off-by: Jesse Gross <jesse at nicira.com>
---
 datapath/tunnel.c    |   54 ++++++++++++++++++++++++++++++++++++-------------
 datapath/tunnel.h    |    8 +++++-
 datapath/vport-gre.c |   14 ++++++++++--
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index cbfb2e2..8e71fc0 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -665,29 +665,46 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu
 	new_iph->frag_off = frag_off;
 	ip_select_ident(new_iph, &rt->u.dst, NULL);
 
-	tnl_vport->tnl_ops->build_header(skb, vport, mutable);
+	memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags = 0;
 
-	/* Allow our local IP stack to fragment the outer packet even if the
-	 * DF bit is set as a last resort. */
-	skb->local_df = 1;
+	skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt->u.dst);
+	if (unlikely(!skb))
+		goto error;
 
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags = 0;
+	while (skb) {
+		struct sk_buff *next = skb->next;
+		int frag_len = skb->len - mutable->tunnel_hlen;
 
-	err = ip_local_out(skb);
-	if (likely(net_xmit_eval(err) == 0))
-		return orig_len;
-	else {
-		vport_record_error(vport, VPORT_E_TX_ERROR);
-		return 0;
-	}
+		skb->next = NULL;
+
+		err = ip_local_out(skb);
+		if (unlikely(net_xmit_eval(err) != 0)) {
+			orig_len -= frag_len;
+			skb = next;
+			goto free_frags;
+		}
+
+		skb = next;
+	};
+
+	return orig_len;
 
 error_free:
 	kfree_skb(skb);
 error:
-	vport_record_error(vport, VPORT_E_TX_DROPPED);
-
 	return 0;
+free_frags:
+	/* There's no point in continuing to send fragments once one has been
+	 * dropped so just free the rest.  This may help improve the congestion
+	 * that caused the first packet to be dropped. */
+	while (skb) {
+		struct sk_buff *next = skb->next;
+		orig_len -= skb->len - mutable->tunnel_hlen;
+		kfree_skb(skb);
+		skb = next;
+	};
+	return orig_len;
 }
 
 int tnl_send(struct vport *vport, struct sk_buff *skb)
@@ -828,6 +845,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 		skb = next_skb;
 	} while (skb);
 
+	if (unlikely(orig_len == 0))
+		vport_record_error(vport, VPORT_E_TX_DROPPED);
+
 	return orig_len;
 
 error_free:
@@ -895,6 +915,7 @@ struct vport *tnl_create(const char *name, const void __user *config,
 {
 	struct vport *vport;
 	struct tnl_vport *tnl_vport;
+	int initial_frag_id;
 	int err;
 
 	vport = vport_alloc(sizeof(struct tnl_vport), vport_ops);
@@ -917,6 +938,9 @@ struct vport *tnl_create(const char *name, const void __user *config,
 	vport_gen_rand_ether_addr(tnl_vport->mutable->eth_addr);
 	tnl_vport->mutable->mtu = ETH_DATA_LEN;
 
+	get_random_bytes(&initial_frag_id, sizeof(int));
+	atomic_set(&tnl_vport->frag_id, initial_frag_id);
+
 	err = set_config(config, tnl_ops, NULL, tnl_vport->mutable);
 	if (err)
 		goto error_free_mutable;
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index b3f863b..615d5ed 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -44,8 +44,10 @@ struct tnl_ops {
 	u8 ipproto;
 
 	int (*hdr_len)(const struct tnl_port_config *);
-	void (*build_header)(struct sk_buff *, const struct vport *,
-			     const struct tnl_mutable_config *);
+	struct sk_buff *(*build_header)(struct sk_buff *,
+					const struct vport *,
+					const struct tnl_mutable_config *,
+					struct dst_entry *);
 };
 
 struct tnl_vport {
@@ -57,6 +59,8 @@ struct tnl_vport {
 
 	/* Protected by RCU. */
 	struct tnl_mutable_config *mutable;
+
+	atomic_t frag_id;
 };
 
 int tnl_init(void);
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 1590309..1b25a92 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -45,9 +45,10 @@ static int gre_hdr_len(const struct tnl_port_config *port_config)
 	return len;
 }
 
-static void gre_build_header(struct sk_buff *skb,
-			     const struct vport *vport,
-			     const struct tnl_mutable_config *mutable)
+static struct sk_buff *gre_build_header(struct sk_buff *skb,
+					const struct vport *vport,
+					const struct tnl_mutable_config *mutable,
+					struct dst_entry *dst)
 {
 	struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb);
 	__be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
@@ -78,6 +79,13 @@ static void gre_build_header(struct sk_buff *skb,
 						skb->len - sizeof(struct iphdr),
 						0));
 	}
+
+	/* Allow our local IP stack to fragment the outer packet even if the
+	 * DF bit is set as a last resort. */
+	skb->local_df = 1;
+
+	skb->next = NULL;
+	return skb;
 }
 
 static int parse_header(struct iphdr *iph, __be16 *flags, __be32 *key)
-- 
1.7.0.4





More information about the dev mailing list