[ovs-dev] [PATCHv3 3/5] datapath: Add SCTP support

Joe Stringer joe at wand.net.nz
Wed Jun 12 05:35:56 UTC 2013


This patch adds support for rewriting SCTP src,dst ports similar to the
functionality already available for TCP/UDP.

Rewriting SCTP ports is expensive due to double-recalculation of the
SCTP checksums; this is performed to ensure that packets traversing OVS
with invalid checksums will continue to the destination with any
checksum corruption intact.

Reviewed-by: Simon Horman <horms at verge.net.au>
Signed-off-by: Joe Stringer <joe at wand.net.nz>
---
This patch introduces sparse warnings when calling
sctp_end_cksum(), due to a bug that was fixed in v3.10:
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=eee1d5a14780b9391ec51f3feaf4cffb521ddbb1

I've now tested this through use of sctp_test from lksctp, linking two
VMs via the OVS bridge. I added the following flows:
"sctp,tp_dst=7777 actions=mod_tp_dst:8888,normal"
"sctp,tp_src=8888 actions=mod_tp_src:7777,normal"

Then, with an lksctp server running on one host on port 8888, I ran the
lksctp client with the destination port 7777. The connections establish
and transfer data successfully. I took pcaps from VM interfaces and
verified with wireshark that the checksums were correct.

v3: Rebase
    Refactor sctp checksum computation
    Handle skb fragments for checksum calculation
    Fix sparse errors
v2: Remove SCTP checksum recalculation when changing IP address
    Calculate checksums as delta from incoming checksum
---
 datapath/actions.c                         |   40 +++++++++++++++++++
 datapath/checksum.c                        |    3 ++
 datapath/checksum.h                        |   15 +++++++
 datapath/datapath.c                        |    6 +++
 datapath/flow.c                            |   60 ++++++++++++++++++++++++++++
 datapath/flow.h                            |    8 ++--
 datapath/linux/Modules.mk                  |    1 +
 datapath/linux/compat/include/linux/sctp.h |   17 ++++++++
 8 files changed, 146 insertions(+), 4 deletions(-)
 create mode 100644 datapath/linux/compat/include/linux/sctp.h

diff --git a/datapath/actions.c b/datapath/actions.c
index 0dac658..d4fdd65 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -22,11 +22,13 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/openvswitch.h>
+#include <linux/sctp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/in6.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
+#include <linux/crc32c.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/checksum.h>
@@ -352,6 +354,40 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 	return 0;
 }
 
+static int set_sctp(struct sk_buff *skb,
+		     const struct ovs_key_sctp *sctp_port_key)
+{
+	struct sctphdr *sh;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct sctphdr));
+	if (unlikely(err))
+		return err;
+
+	sh = sctp_hdr(skb);
+	if (sctp_port_key->sctp_src != sh->source ||
+	    sctp_port_key->sctp_dst != sh->dest) {
+		__le32 old_correct_csum, new_csum, old_csum;
+
+		old_csum = sh->checksum;
+		old_correct_csum = compute_sctp_csum(skb);
+
+		sh->source = sctp_port_key->sctp_src;
+		sh->dest = sctp_port_key->sctp_dst;
+
+                new_csum = compute_sctp_csum(skb);
+
+		/* Carry any checksum errors through. */
+		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb_clear_rxhash(skb);
+	}
+
+	return 0;
+}
+
 static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 {
 	struct vport *vport;
@@ -459,6 +495,10 @@ static int execute_set_action(struct sk_buff *skb,
 	case OVS_KEY_ATTR_UDP:
 		err = set_udp(skb, nla_data(nested_attr));
 		break;
+
+	case OVS_KEY_ATTR_SCTP:
+		err = set_sctp(skb, nla_data(nested_attr));
+		break;
 	}
 
 	return err;
diff --git a/datapath/checksum.c b/datapath/checksum.c
index 5146c65..bfa75a7 100644
--- a/datapath/checksum.c
+++ b/datapath/checksum.c
@@ -59,6 +59,9 @@ static int vswitch_skb_checksum_setup(struct sk_buff *skb)
 	case IPPROTO_UDP:
 		csum_offset = offsetof(struct udphdr, check);
 		break;
+	case IPPROTO_SCTP:
+		csum_offset = offsetof(struct sctphdr, check);
+		break;
 	default:
 		if (net_ratelimit())
 			pr_err("Attempting to checksum a non-TCP/UDP packet, "
diff --git a/datapath/checksum.h b/datapath/checksum.h
index a440c59..a97d47b 100644
--- a/datapath/checksum.h
+++ b/datapath/checksum.h
@@ -23,6 +23,7 @@
 #include <linux/version.h>
 
 #include <net/checksum.h>
+#include <net/sctp/checksum.h>
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) || \
 	(defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID))
@@ -170,4 +171,18 @@ static inline unsigned char *rpl__pskb_pull_tail(struct sk_buff *skb,
 #define __pskb_pull_tail rpl__pskb_pull_tail
 #endif
 
+static inline __le32 compute_sctp_csum(const struct sk_buff *skb)
+{
+	const struct sk_buff *iter;
+	__u32 crc;
+	__u16 tp_len = skb_headlen(skb) - skb_transport_offset(skb);
+
+	crc = sctp_start_cksum((__u8 *)sctp_hdr(skb), tp_len);
+	skb_walk_frags(skb, iter)
+		crc = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
+					crc);
+
+	return sctp_end_cksum(crc);
+}
+
 #endif /* checksum.h */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 42af315..8839213 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -723,6 +723,12 @@ static int validate_set(const struct nlattr *a,
 
 		return validate_tp_port(flow_key);
 
+	case OVS_KEY_ATTR_SCTP:
+		if (flow_key->ip.proto != IPPROTO_SCTP)
+			return -EINVAL;
+
+		return validate_tp_port(flow_key);
+
 	default:
 		return -EINVAL;
 	}
diff --git a/datapath/flow.c b/datapath/flow.c
index 7f897bd..40be695 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -34,6 +34,7 @@
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/sctp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -103,6 +104,12 @@ static bool udphdr_ok(struct sk_buff *skb)
 				  sizeof(struct udphdr));
 }
 
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct sctphdr));
+}
+
 static bool icmphdr_ok(struct sk_buff *skb)
 {
 	return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -698,6 +705,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				key->ipv4.tp.src = udp->source;
 				key->ipv4.tp.dst = udp->dest;
 			}
+		} else if (key->ip.proto == IPPROTO_SCTP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->ipv4.tp.src = sctp->source;
+				key->ipv4.tp.dst = sctp->dest;
+			}
 		} else if (key->ip.proto == IPPROTO_ICMP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			if (icmphdr_ok(skb)) {
@@ -762,6 +776,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				key->ipv6.tp.src = udp->source;
 				key->ipv6.tp.dst = udp->dest;
 			}
+		} else if (key->ip.proto == NEXTHDR_SCTP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->ipv6.tp.src = sctp->source;
+				key->ipv6.tp.dst = sctp->dest;
+			}
 		} else if (key->ip.proto == NEXTHDR_ICMP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (icmp6hdr_ok(skb)) {
@@ -843,6 +864,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
 	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
 	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+	[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
 	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
 	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
 	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
@@ -856,6 +878,7 @@ static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 	const struct ovs_key_icmp *icmp_key;
 	const struct ovs_key_tcp *tcp_key;
 	const struct ovs_key_udp *udp_key;
+	const struct ovs_key_sctp *sctp_key;
 
 	switch (swkey->ip.proto) {
 	case IPPROTO_TCP:
@@ -880,6 +903,17 @@ static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 		swkey->ipv4.tp.dst = udp_key->udp_dst;
 		break;
 
+	case IPPROTO_SCTP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_SCTP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+		swkey->ipv4.tp.src = sctp_key->sctp_src;
+		swkey->ipv4.tp.dst = sctp_key->sctp_dst;
+		break;
+
 	case IPPROTO_ICMP:
 		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
 			return -EINVAL;
@@ -901,6 +935,7 @@ static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 	const struct ovs_key_icmpv6 *icmpv6_key;
 	const struct ovs_key_tcp *tcp_key;
 	const struct ovs_key_udp *udp_key;
+	const struct ovs_key_sctp *sctp_key;
 
 	switch (swkey->ip.proto) {
 	case IPPROTO_TCP:
@@ -925,6 +960,17 @@ static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
 		swkey->ipv6.tp.dst = udp_key->udp_dst;
 		break;
 
+	case IPPROTO_SCTP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_SCTP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+		swkey->ipv6.tp.src = sctp_key->sctp_src;
+		swkey->ipv6.tp.dst = sctp_key->sctp_dst;
+		break;
+
 	case IPPROTO_ICMPV6:
 		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
 			return -EINVAL;
@@ -1454,6 +1500,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				udp_key->udp_src = swkey->ipv6.tp.src;
 				udp_key->udp_dst = swkey->ipv6.tp.dst;
 			}
+		} else if (swkey->ip.proto == IPPROTO_SCTP) {
+			struct ovs_key_sctp *sctp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+			if (!nla)
+				goto nla_put_failure;
+			sctp_key = nla_data(nla);
+			if (swkey->eth.type == htons(ETH_P_IP)) {
+				sctp_key->sctp_src = swkey->ipv4.tp.src;
+				sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+				sctp_key->sctp_src = swkey->ipv6.tp.src;
+				sctp_key->sctp_dst = swkey->ipv6.tp.dst;
+			}
 		} else if (swkey->eth.type == htons(ETH_P_IP) &&
 			   swkey->ip.proto == IPPROTO_ICMP) {
 			struct ovs_key_icmp *icmp_key;
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..8fc4899 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -86,8 +86,8 @@ struct sw_flow_key {
 			} addr;
 			union {
 				struct {
-					__be16 src;		/* TCP/UDP source port. */
-					__be16 dst;		/* TCP/UDP destination port. */
+					__be16 src;		/* TCP/UDP/SCTP source port. */
+					__be16 dst;		/* TCP/UDP/SCTP destination port. */
 				} tp;
 				struct {
 					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
@@ -102,8 +102,8 @@ struct sw_flow_key {
 			} addr;
 			__be32 label;			/* IPv6 flow label. */
 			struct {
-				__be16 src;		/* TCP/UDP source port. */
-				__be16 dst;		/* TCP/UDP destination port. */
+				__be16 src;		/* TCP/UDP/SCTP source port. */
+				__be16 dst;		/* TCP/UDP/SCTP destination port. */
 			} tp;
 			struct {
 				struct in6_addr target;	/* ND target address. */
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 1434a2d..a435d1f 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -49,6 +49,7 @@ openvswitch_headers += \
 	linux/compat/include/linux/rcupdate.h \
 	linux/compat/include/linux/reciprocal_div.h \
 	linux/compat/include/linux/rtnetlink.h \
+	linux/compat/include/linux/sctp.h \
 	linux/compat/include/linux/skbuff.h \
 	linux/compat/include/linux/slab.h \
 	linux/compat/include/linux/stddef.h \
diff --git a/datapath/linux/compat/include/linux/sctp.h b/datapath/linux/compat/include/linux/sctp.h
new file mode 100644
index 0000000..e6b9174
--- /dev/null
+++ b/datapath/linux/compat/include/linux/sctp.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_SCTP_WRAPPER_H
+#define __LINUX_SCTP_WRAPPER_H 1
+
+#include_next <linux/sctp.h>
+
+#ifndef NEXTHDR_SCTP
+#define NEXTHDR_SCTP    132 /* Stream Control Transport Protocol */
+#endif
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
+{
+	return (struct sctphdr *)skb_transport_header(skb);
+}
+#endif /* HAVE_SKBUFF_HEADER_HELPERS */
+
+#endif
-- 
1.7.10.4




More information about the dev mailing list