[ovs-dev] [PATCH 2/4] datapath: Use vlan acceleration for vlan operations.

Jesse Gross jesse at nicira.com
Sun Feb 6 00:14:47 UTC 2011


Using the kernel vlan acceleration has a number of benefits:
it enables hardware tagging, allows usage of TSO and checksum
offloading, and is generally easier to manipulate.  This switches
the vlan actions to use skb->vlan_tci field for any necessary
changes.  In places that do not support vlan acceleration in a way
that we can use (in particular kernels before 2.6.37) we perform
any necessary conversions, such as tagging and GSO before the
packet leaves Open vSwitch.

Signed-off-by: Jesse Gross <jesse at nicira.com>
---
 datapath/actions.c            |  150 ++++++++---------------------------------
 datapath/datapath.c           |    8 ++
 datapath/flow.c               |    8 ++-
 datapath/tunnel.c             |  112 ++++++++++++++++++-------------
 datapath/vport-internal_dev.c |   11 +++-
 datapath/vport-netdev.c       |   46 ++++++++++++-
 6 files changed, 163 insertions(+), 172 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 3223c65..b7f9deb 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -24,6 +24,7 @@
 #include "checksum.h"
 #include "datapath.h"
 #include "openvswitch/datapath-protocol.h"
+#include "vlan.h"
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *, struct sk_buff *,
@@ -52,14 +53,22 @@ static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
 	return NULL;
 }
 
-static struct sk_buff *vlan_pull_tag(struct sk_buff *skb)
+static struct sk_buff *strip_vlan(struct sk_buff *skb)
 {
-	struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
 	struct ethhdr *eh;
 
-	/* Verify we were given a vlan packet */
-	if (vh->h_vlan_proto != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)
+	if (vlan_tx_tag_present(skb)) {
+		vlan_set_tci(skb, 0);
 		return skb;
+	}
+
+	if (unlikely(vlan_eth_hdr(skb)->h_vlan_proto != htons(ETH_P_8021Q) ||
+	    skb->len < VLAN_ETH_HLEN))
+		return skb;
+
+	skb = make_writable(skb, 0);
+	if (unlikely(!skb))
+		return NULL;
 
 	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
@@ -80,133 +89,32 @@ static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
 				       const struct nlattr *a, u32 actions_len)
 {
 	__be16 tci = nla_get_be16(a);
+	struct vlan_ethhdr *vh;
+	__be16 old_tci;
 
-	skb = make_writable(skb, VLAN_HLEN);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
-
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		/* Modify vlan id, but maintain other TCI values */
-		struct vlan_ethhdr *vh;
-		__be16 old_tci;
-
-		if (skb->len < VLAN_ETH_HLEN)
-			return skb;
-
-		vh = vlan_eth_hdr(skb);
-		old_tci = vh->h_vlan_TCI;
-
-		vh->h_vlan_TCI = tci;
-
-		if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
-			__be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
-
-			skb->csum = ~csum_partial((char *)diff, sizeof(diff),
-						~skb->csum);
-		}
-	} else {
-		int err;
+	if (vlan_tx_tag_present(skb) || skb->protocol != htons(ETH_P_8021Q))
+		return __vlan_hwaccel_put_tag(skb, ntohs(tci));
 
-		/* Add vlan header */
+	skb = make_writable(skb, VLAN_HLEN);
+	if (unlikely(!skb))
+		return NULL;
 
-		/* Set up checksumming pointers for checksum-deferred packets
-		 * on Xen.  Otherwise, dev_queue_xmit() will try to do this
-		 * when we send the packet out on the wire, and it will fail at
-		 * that point because skb_checksum_setup() will not look inside
-		 * an 802.1Q header. */
-		err = vswitch_skb_checksum_setup(skb);
-		if (unlikely(err)) {
-			kfree_skb(skb);
-			return ERR_PTR(err);
-		}
+	if (unlikely(skb->len < VLAN_ETH_HLEN))
+		return skb;
 
-		/* GSO is not implemented for packets with an 802.1Q header, so
-		 * we have to do segmentation before we add that header.
-		 *
-		 * GSO does work with hardware-accelerated VLAN tagging, but we
-		 * can't use hardware-accelerated VLAN tagging since it
-		 * requires the device to have a VLAN group configured (with
-		 * e.g. vconfig(8)) and we don't do that.
-		 *
-		 * Having to do this here may be a performance loss, since we
-		 * can't take advantage of TSO hardware support, although it
-		 * does not make a measurable network performance difference
-		 * for 1G Ethernet.  Fixing that would require patching the
-		 * kernel (either to add GSO support to the VLAN protocol or to
-		 * support hardware-accelerated VLAN tagging without VLAN
-		 * groups configured). */
-		if (skb_is_gso(skb)) {
-			const struct nlattr *actions_left;
-			int actions_len_left;
-			struct sk_buff *segs;
-
-			segs = skb_gso_segment(skb, 0);
-			kfree_skb(skb);
-			if (IS_ERR(segs))
-				return ERR_CAST(segs);
-
-			actions_len_left = actions_len;
-			actions_left = nla_next(a, &actions_len_left);
-
-			do {
-				struct sk_buff *nskb = segs->next;
-
-				segs->next = NULL;
-
-				/* GSO can change the checksum type so update.*/
-				compute_ip_summed(segs, true);
-
-				segs = __vlan_put_tag(segs, ntohs(tci));
-				err = -ENOMEM;
-				if (segs) {
-					err = do_execute_actions(
-						dp, segs, key, actions_left,
-						actions_len_left);
-				}
-
-				if (unlikely(err)) {
-					while ((segs = nskb)) {
-						nskb = segs->next;
-						segs->next = NULL;
-						kfree_skb(segs);
-					}
-					return ERR_PTR(err);
-				}
-
-				segs = nskb;
-			} while (segs->next);
-
-			skb = segs;
-			compute_ip_summed(skb, true);
-		}
+	vh = vlan_eth_hdr(skb);
 
-		/* The hardware-accelerated version of vlan_put_tag() works
-		 * only for a device that has a VLAN group configured (with
-		 * e.g. vconfig(8)), so call the software-only version
-		 * __vlan_put_tag() directly instead.
-		 */
-		skb = __vlan_put_tag(skb, ntohs(tci));
-		if (!skb)
-			return ERR_PTR(-ENOMEM);
+	old_tci = vh->h_vlan_TCI;
+	vh->h_vlan_TCI = tci;
 
-		/* GSO doesn't fix up the hardware computed checksum so this
-		 * will only be hit in the non-GSO case. */
-		if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-						+ ETH_HLEN, VLAN_HLEN, 0));
+	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
+		__be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
+		skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
 	}
 
 	return skb;
 }
 
-static struct sk_buff *strip_vlan(struct sk_buff *skb)
-{
-	skb = make_writable(skb, 0);
-	if (skb)
-		vlan_pull_tag(skb);
-	return skb;
-}
-
 static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
 {
 	return (key->dl_type == htons(ETH_P_IP) &&
@@ -417,8 +325,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 		case ODP_ACTION_ATTR_SET_DL_TCI:
 			skb = modify_vlan_tci(dp, skb, key, a, rem);
-			if (IS_ERR(skb))
-				return PTR_ERR(skb);
 			break;
 
 		case ODP_ACTION_ATTR_STRIP_VLAN:
diff --git a/datapath/datapath.c b/datapath/datapath.c
index ba32e37..c48dc9d 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -483,6 +483,14 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
 		nskb = skb->next;
 		skb->next = NULL;
 
+		if (vlan_tx_tag_present(skb)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb)) {
+				err = -ENOMEM;
+				goto err_kfree_skbs;
+			}
+		}
+
 		len = sizeof(struct odp_header);
 		len += nla_total_size(skb->len);
 		len += nla_total_size(FLOW_BUFSIZE);
diff --git a/datapath/flow.c b/datapath/flow.c
index 735e147..4b0e6cc 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -34,6 +34,8 @@
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 
+#include "vlan.h"
+
 static struct kmem_cache *flow_cache;
 static unsigned int hash_seed __read_mostly;
 
@@ -449,8 +451,12 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 
 	/* dl_type, dl_vlan, dl_vlan_pcp. */
 	__skb_pull(skb, 2 * ETH_ALEN);
-	if (eth->h_proto == htons(ETH_P_8021Q))
+
+	if (vlan_tx_tag_present(skb))
+		key->dl_tci = htons(vlan_get_tci(skb));
+	else if (eth->h_proto == htons(ETH_P_8021Q))
 		parse_vlan(skb, key);
+
 	key->dl_type = parse_ethertype(skb);
 	skb_reset_network_header(skb);
 	__skb_push(skb, skb->data - (unsigned char *)eth);
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 40577fb..63023de 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -33,6 +33,7 @@
 #include "datapath.h"
 #include "table.h"
 #include "tunnel.h"
+#include "vlan.h"
 #include "vport.h"
 #include "vport-generic.h"
 #include "vport-internal_dev.h"
@@ -439,6 +440,7 @@ void tnl_rcv(struct vport *vport, struct sk_buff *skb)
 
 	ecn_decapsulate(skb);
 	compute_ip_summed(skb, false);
+	vlan_set_tci(skb, 0);
 
 	vport_receive(vport, skb);
 }
@@ -682,7 +684,8 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab
 
 		vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
 		vh->h_vlan_encapsulated_proto = skb->protocol;
-	}
+	} else
+		vlan_set_tci(nskb, vlan_get_tci(skb));
 	skb_reset_mac_header(nskb);
 
 	/* Protocol */
@@ -720,17 +723,27 @@ static bool check_mtu(struct sk_buff *skb,
 	int mtu = 0;
 	unsigned int packet_length = skb->len - ETH_HLEN;
 
-	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+	/* Allow for one level of tagging in the packet length. */
+	if (!vlan_tx_tag_present(skb) &&
+	    eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
 		packet_length -= VLAN_HLEN;
 
 	if (pmtud) {
+		int vlan_header = 0;
+
 		frag_off = htons(IP_DF);
 
+		/* The tag needs to go in packet regardless of where it
+		 * currently is, so subtract it from the MTU.
+		 */
+		if (vlan_tx_tag_present(skb) ||
+		    eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+			vlan_header = VLAN_HLEN;
+
 		mtu = dst_mtu(&rt_dst(rt))
 			- ETH_HLEN
 			- mutable->tunnel_hlen
-			- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
-				VLAN_HLEN : 0);
+			- vlan_header;
 	}
 
 	if (skb->protocol == htons(ETH_P_IP)) {
@@ -1041,27 +1054,31 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
 		goto error_free;
 
 	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-			+ mutable->tunnel_hlen;
+			+ mutable->tunnel_hlen
+			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
-	if (skb_is_gso(skb)) {
-		struct sk_buff *nskb;
+	skb = check_headroom(skb, min_headroom);
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(skb);
+		goto error;
+	}
 
-		/*
-		 * If we are doing GSO on a pskb it is better to make sure that
-		 * the headroom is correct now.  We will only have to copy the
-		 * portion in the linear data area and GSO will preserve
-		 * headroom when it creates the segments.  This is particularly
-		 * beneficial on Xen where we get a lot of GSO pskbs.
-		 * Conversely, we avoid copying if it is just to get our own
-		 * writable clone because GSO will do the copy for us.
-		 */
-		if (skb_headroom(skb) < min_headroom) {
-			skb = check_headroom(skb, min_headroom);
-			if (IS_ERR(skb)) {
-				err = PTR_ERR(skb);
-				goto error;
-			}
+	/* On kernels after 2.6.37 GSO can handle vlan tags, so tag it now
+	 * so we don't have to fix up every segment later.
+	 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+	if (vlan_tx_tag_present(skb)) {
+		skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+		if (unlikely(!skb)) {
+			err = -ENOMEM;
+			goto error;
 		}
+		vlan_set_tci(skb, 0);
+	}
+#endif
+
+	if (skb_is_gso(skb)) {
+		struct sk_buff *nskb;
 
 		nskb = skb_gso_segment(skb, 0);
 		kfree_skb(skb);
@@ -1071,32 +1088,23 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
 		}
 
 		skb = nskb;
-	} else {
-		skb = check_headroom(skb, min_headroom);
-		if (IS_ERR(skb)) {
-			err = PTR_ERR(skb);
-			goto error;
-		}
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			/*
-			 * Pages aren't locked and could change at any time.
-			 * If this happens after we compute the checksum, the
-			 * checksum will be wrong.  We linearize now to avoid
-			 * this problem.
-			 */
-			if (unlikely(need_linearize(skb))) {
-				err = __skb_linearize(skb);
-				if (unlikely(err))
-					goto error_free;
-			}
-
-			err = skb_checksum_help(skb);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		/* Pages aren't locked and could change at any time.
+		 * If this happens after we compute the checksum, the
+		 * checksum will be wrong.  We linearize now to avoid
+		 * this problem.
+		 */
+		if (unlikely(need_linearize(skb))) {
+			err = __skb_linearize(skb);
 			if (unlikely(err))
 				goto error_free;
-		} else if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
-	}
+		}
+
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error_free;
+	} else if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
 
 	return skb;
 
@@ -1159,7 +1167,8 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 	u8 tos;
 
 	/* Validate the protocol headers before we try to use them. */
-	if (skb->protocol == htons(ETH_P_8021Q)) {
+	if (skb->protocol == htons(ETH_P_8021Q) &&
+	    !vlan_tx_tag_present(skb)) {
 		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
 			goto error_free;
 
@@ -1250,6 +1259,15 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 		struct sk_buff *next_skb = skb->next;
 		skb->next = NULL;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+		if (vlan_tx_tag_present(skb)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				goto next;
+			vlan_set_tci(skb, 0);
+		}
+#endif
+
 		if (likely(cache)) {
 			skb_push(skb, cache->len);
 			memcpy(skb->data, get_cached_header(cache), cache->len);
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index be29074..d1feea1 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -6,6 +6,7 @@
  * kernel, by Linus Torvalds and others.
  */
 
+#include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -230,8 +231,16 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
 	struct net_device *netdev = netdev_vport_priv(vport)->dev;
 	int len;
 
-	skb->dev = netdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	if (vlan_tx_tag_present(skb)) {
+		skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+		if (unlikely(!skb))
+			return 0;
+	}
+#endif
+
 	len = skb->len;
+	skb->dev = netdev;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, netdev);
 
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index 85e0eb9..23ec1a5 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -261,10 +261,54 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 static int netdev_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-	int len = skb->len;
+	int len;
 
 	skb->dev = netdev_vport->dev;
 	forward_ip_summed(skb);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	if (vlan_tx_tag_present(skb)) {
+		int err;
+
+		err = vswitch_skb_checksum_setup(skb);
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		if (skb_is_gso(skb)) {
+			struct sk_buff *nskb;
+
+			nskb = skb_gso_segment(skb, 0);
+			kfree_skb(skb);
+			skb = nskb;
+			if (IS_ERR(skb))
+				return 0;
+
+			len = 0;
+			do {
+				nskb = skb->next;
+				skb->next = NULL;
+
+				skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+				if (likely(skb)) {
+					len += skb->len;
+					dev_queue_xmit(skb);
+				}
+
+				skb = nskb;
+			} while (skb);
+
+			return len;
+		} else {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				return 0;
+		}
+	}
+#endif
+
+	len = skb->len;
 	dev_queue_xmit(skb);
 
 	return len;
-- 
1.7.1





More information about the dev mailing list