[ovs-dev] [PATCH v2] stt: linearize for CONFIG_SLUB case

Pravin B Shelar pshelar at ovn.org
Mon Apr 25 21:35:20 UTC 2016


STT implementation we saw performance improvements with linearizing
skb for SLUB case.  So following patch skips zero copy operation
for such a case.

Performance number for large packet TCP test using netperf.

OVS branch     TCP      Host0     Host1
version        Gbps     CPU%      CPU%
-----------------------------------------
2.5            9.4       272       315

master +       9.4       230       285
patch

Tested-By: Vasmi Abidi <vabidi at vmware.com>
Signed-off-by: Pravin B Shelar <pshelar at ovn.org>
---
v1-v2:
- while reassembly, try to merge skb before copying the skbs.
- define separate coalesce_skb() to copy skb for SLUB case.
---
 datapath/linux/compat/stt.c | 250 +++++++++++++++++++++++++++++++-------------
 1 file changed, 176 insertions(+), 74 deletions(-)

diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
index eb397e8..a1b309a 100644
--- a/datapath/linux/compat/stt.c
+++ b/datapath/linux/compat/stt.c
@@ -49,6 +49,14 @@
 #define STT_DST_PORT 7471
 
 #ifdef OVS_STT
+#ifdef CONFIG_SLUB
+/*
+ * We saw better performance with skipping zero copy in case of SLUB.
+ * So skip zero copy for SLUB case.
+ */
+#define SKIP_ZERO_COPY
+#endif
+
 #define STT_VER 0
 
 /* @list: Per-net list of STT ports.
@@ -219,73 +227,6 @@ static int clear_gso(struct sk_buff *skb)
 	return 0;
 }
 
-static struct sk_buff *normalize_frag_list(struct sk_buff *head,
-					   struct sk_buff **skbp)
-{
-	struct sk_buff *skb = *skbp;
-	struct sk_buff *last;
-
-	do {
-		struct sk_buff *frags;
-
-		if (skb_shared(skb)) {
-			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-			if (unlikely(!nskb))
-				return ERR_PTR(-ENOMEM);
-
-			nskb->next = skb->next;
-			consume_skb(skb);
-			skb = nskb;
-			*skbp = skb;
-		}
-
-		if (head) {
-			head->len -= skb->len;
-			head->data_len -= skb->len;
-			head->truesize -= skb->truesize;
-		}
-
-		frags = skb_shinfo(skb)->frag_list;
-		if (frags) {
-			int err;
-
-			err = skb_unclone(skb, GFP_ATOMIC);
-			if (unlikely(err))
-				return ERR_PTR(err);
-
-			last = normalize_frag_list(skb, &frags);
-			if (IS_ERR(last))
-				return last;
-
-			skb_shinfo(skb)->frag_list = NULL;
-			last->next = skb->next;
-			skb->next = frags;
-		} else {
-			last = skb;
-		}
-
-		skbp = &skb->next;
-	} while ((skb = skb->next));
-
-	return last;
-}
-
-/* Takes a linked list of skbs, which potentially contain frag_list
- * (whose members in turn potentially contain frag_lists, etc.) and
- * converts them into a single linear linked list.
- */
-static int straighten_frag_list(struct sk_buff **skbp)
-{
-	struct sk_buff *err_skb;
-
-	err_skb = normalize_frag_list(NULL, skbp);
-	if (IS_ERR(err_skb))
-		return PTR_ERR(err_skb);
-
-	return 0;
-}
-
 static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from)
 {
 	to->protocol = from->protocol;
@@ -465,6 +406,74 @@ static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset)
 	return 0;
 }
 
+#ifndef SKIP_ZERO_COPY
+static struct sk_buff *normalize_frag_list(struct sk_buff *head,
+					   struct sk_buff **skbp)
+{
+	struct sk_buff *skb = *skbp;
+	struct sk_buff *last;
+
+	do {
+		struct sk_buff *frags;
+
+		if (skb_shared(skb)) {
+			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+			if (unlikely(!nskb))
+				return ERR_PTR(-ENOMEM);
+
+			nskb->next = skb->next;
+			consume_skb(skb);
+			skb = nskb;
+			*skbp = skb;
+		}
+
+		if (head) {
+			head->len -= skb->len;
+			head->data_len -= skb->len;
+			head->truesize -= skb->truesize;
+		}
+
+		frags = skb_shinfo(skb)->frag_list;
+		if (frags) {
+			int err;
+
+			err = skb_unclone(skb, GFP_ATOMIC);
+			if (unlikely(err))
+				return ERR_PTR(err);
+
+			last = normalize_frag_list(skb, &frags);
+			if (IS_ERR(last))
+				return last;
+
+			skb_shinfo(skb)->frag_list = NULL;
+			last->next = skb->next;
+			skb->next = frags;
+		} else {
+			last = skb;
+		}
+
+		skbp = &skb->next;
+	} while ((skb = skb->next));
+
+	return last;
+}
+
+/* Takes a linked list of skbs, which potentially contain frag_list
+ * (whose members in turn potentially contain frag_lists, etc.) and
+ * converts them into a single linear linked list.
+ */
+static int straighten_frag_list(struct sk_buff **skbp)
+{
+	struct sk_buff *err_skb;
+
+	err_skb = normalize_frag_list(NULL, skbp);
+	if (IS_ERR(err_skb))
+		return PTR_ERR(err_skb);
+
+	return 0;
+}
+
 static int coalesce_skb(struct sk_buff **headp)
 {
 	struct sk_buff *frag, *head, *prev;
@@ -510,6 +519,32 @@ static int coalesce_skb(struct sk_buff **headp)
 	head->next = NULL;
 	return 0;
 }
+#else
+static int coalesce_skb(struct sk_buff **headp)
+{
+	struct sk_buff *frag, *head = *headp;
+	int tot_len = FRAG_CB(head)->first.tot_len;
+	int err;
+
+	if (!head->next)
+		return 0;
+
+	err = pskb_expand_head(head, skb_headroom(head), tot_len, GFP_ATOMIC);
+	if (err)
+		return err;
+
+	if (unlikely(!__pskb_pull_tail(head, head->data_len)))
+		BUG();
+
+	for (frag = head->next; frag; frag = frag->next) {
+		skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len);
+	}
+
+	head->next = NULL;
+	head->truesize = SKB_TRUESIZE(head->len);
+	return 0;
+}
+#endif
 
 static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
 			    bool ipv4, bool tcp, int l4_offset)
@@ -522,6 +557,12 @@ static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
 
 static int try_to_segment(struct sk_buff *skb)
 {
+#ifdef SKIP_ZERO_COPY
+	/* coalesce_skb() since does not generate frag-list no need to
+	 * linearize it here.
+	 */
+	return 0;
+#else
 	struct stthdr *stth = stt_hdr(skb);
 	bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL);
 	bool ipv4 = !!(stth->flags & STT_PROTO_IPV4);
@@ -529,16 +570,19 @@ static int try_to_segment(struct sk_buff *skb)
 	int l4_offset = stth->l4_offset;
 
 	return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset);
+#endif
 }
 
 static int segment_skb(struct sk_buff **headp, bool csum_partial,
 		       bool ipv4, bool tcp, int l4_offset)
 {
+#ifndef SKIP_ZERO_COPY
 	int err;
 
 	err = coalesce_skb(headp);
 	if (err)
 		return err;
+#endif
 
 	if (skb_shinfo(*headp)->frag_list)
 		return __try_to_segment(*headp, csum_partial,
@@ -1054,16 +1098,61 @@ static struct pkt_frag *lookup_frag(struct net *net,
 	return victim_frag;
 }
 
+#ifdef SKIP_ZERO_COPY
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+		      int *delta, bool *headstolen)
+{
+	int err;
+
+	if (to->next)
+		return -EINVAL;
+
+	if (FRAG_CB(to)->offset)
+		return -EINVAL;
+
+	if (unlikely(skb_unclone(to, GFP_ATOMIC)))
+		return -ENOMEM;
+	if (skb_try_coalesce(to, from, headstolen, delta))
+		return 0;
+
+	*headstolen = false;
+	err = pskb_expand_head(to, skb_headroom(to),
+			       to->len + from->len, GFP_ATOMIC);
+	if (err)
+		return err;
+
+	if (unlikely(!__pskb_pull_tail(to, to->data_len)))
+			BUG();
+
+	if (unlikely(to->data_len || (from->len > skb_tailroom(to))))
+		return -EINVAL;
+
+	skb_copy_bits(from, 0, skb_put(to, from->len), from->len);
+
+	*delta = from->len;
+	to->truesize += from->len;
+	return 0;
+}
+#else
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+		      int *delta, bool *headstolen)
+{
+	*headstolen = false;
+	return -EINVAL;
+}
+#endif
+
 static struct sk_buff *reassemble(struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *tcph = tcp_hdr(skb);
 	u32 seq = ntohl(tcph->seq);
 	struct stt_percpu *stt_percpu;
-	struct sk_buff *last_skb;
+	struct sk_buff *last_skb, *copied_skb = NULL;
 	struct pkt_frag *frag;
 	struct pkt_key key;
-	int tot_len;
+	int tot_len, delta = skb->truesize;
+	bool headstolen;
 	u32 hash;
 
 	tot_len = seq >> STT_SEQ_LEN_SHIFT;
@@ -1103,7 +1192,6 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 		FRAG_CB(skb)->first.set_ecn_ce = false;
 		list_add_tail(&frag->lru_node, &stt_percpu->frag_lru);
 		stt_percpu->frag_mem_used += skb->truesize;
-
 		skb = NULL;
 		goto unlock;
 	}
@@ -1114,8 +1202,13 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 	last_skb = FRAG_CB(frag->skbs)->first.last_skb;
 	if (likely(FRAG_CB(last_skb)->offset + last_skb->len ==
 		   FRAG_CB(skb)->offset)) {
-		last_skb->next = skb;
-		FRAG_CB(frag->skbs)->first.last_skb = skb;
+
+		if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) {
+			copied_skb = skb;
+		} else {
+			last_skb->next = skb;
+			FRAG_CB(frag->skbs)->first.last_skb = skb;
+		}
 	} else {
 		struct sk_buff *prev = NULL, *next;
 
@@ -1154,8 +1247,15 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 
 	FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos);
 	FRAG_CB(frag->skbs)->first.rcvd_len += skb->len;
-	FRAG_CB(frag->skbs)->first.mem_used += skb->truesize;
-	stt_percpu->frag_mem_used += skb->truesize;
+	if (copied_skb)  {
+		if (headstolen) {
+			skb->len = 0;
+			skb->data_len = 0;
+			skb->truesize -= delta;
+		}
+	}
+	stt_percpu->frag_mem_used += delta;
+	FRAG_CB(frag->skbs)->first.mem_used += delta;
 
 	if (FRAG_CB(frag->skbs)->first.tot_len ==
 	    FRAG_CB(frag->skbs)->first.rcvd_len) {
@@ -1174,6 +1274,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 		skb = NULL;
 	}
 
+	if (copied_skb)
+		kfree_skb_partial(copied_skb, headstolen);
 	goto unlock;
 
 unlock_free:
-- 
2.5.5




More information about the dev mailing list