[ovs-dev] [PATCH 2/4] Upstream GRE: Allow multiple GREPROTO_CISCO protocol handlers.

Pravin B Shelar pshelar at nicira.com
Fri Oct 5 20:01:20 UTC 2012


Currently only one protocol handler of GREPROTO_CISCO protocol
is allowed. Soon we will have ovs tunnel registering for same protocol
as GRE device.
Following patch extends GRE de-multiplexer so that it can multiple GRE
modules can register GRE protocol handler.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
 include/net/gre.h  |   17 +++
 include/net/ipip.h |    9 ++
 net/ipv4/gre.c     |  276 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 302 insertions(+), 0 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index 8266547..1eb3166 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -2,6 +2,7 @@
 #define __LINUX_GRE_H
 
 #include <linux/skbuff.h>
+#include <net/ipip.h>
 
 #define GREPROTO_CISCO		0
 #define GREPROTO_PPTP		1
@@ -15,4 +16,20 @@ struct gre_protocol {
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
+struct gre_protocol_v0 {
+	int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
+	int (*err_handler)(struct sk_buff *skb, u32 info,
+			   const struct tnl_ptk_info *tpi);
+};
+
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+
+void gre_update_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
 #endif
diff --git a/include/net/ipip.h b/include/net/ipip.h
index ddc077c..7de2500 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -41,6 +41,15 @@ struct ip_tunnel {
 	struct gro_cells		gro_cells;
 };
 
+struct tnl_ptk_info {
+	__be16 flags;
+	__be16 proto;
+	__be32 key;
+	__be32 seq;
+	int hdr_len;
+	__sum16 csum;
+};
+
 struct ip_tunnel_prl_entry {
 	struct ip_tunnel_prl_entry __rcu *next;
 	__be32				addr;
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 5a903dc..f62725e 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -16,15 +16,21 @@
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/skbuff.h>
+#include <linux/if.h>
+#include <linux/icmp.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/if_tunnel.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
+#include <net/icmp.h>
 
+#define GREPROTO_V0_MAX 2
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol_v0 __rcu *gre_proto_v0[GREPROTO_V0_MAX] __read_mostly;
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -104,12 +110,269 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+	ASSERT_RTNL();
+	if (priority >= GREPROTO_V0_MAX)
+		goto err_out;
+
+	if (gre_proto_v0[priority])
+		goto err_out;
+
+	RCU_INIT_POINTER(gre_proto_v0[priority], proto);
+	return 0;
+
+err_out:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol_v0);
+
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+	ASSERT_RTNL();
+	if (priority >= GREPROTO_V0_MAX)
+		goto err_out;
+
+	if (rtnl_dereference(gre_proto_v0[priority]) != proto)
+		goto err_out;
+	RCU_INIT_POINTER(gre_proto_v0[priority], NULL);
+	synchronize_rcu();
+	return 0;
+
+err_out:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol_v0);
+
+void gre_update_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	struct gre_base_hdr *greh = (struct gre_base_hdr *)&iph[1];
+
+	greh->flags = tpi->flags;
+	greh->protocol = tpi->proto;
+
+	if (tpi->flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + tpi->hdr_len - 4);
+
+		if (tpi->flags&GRE_SEQ) {
+			*ptr = tpi->seq;
+			ptr--;
+		}
+		if (tpi->flags&GRE_KEY) {
+			*ptr = tpi->key;
+			ptr--;
+		}
+		if (tpi->flags&GRE_CSUM) {
+			*(__sum16 *)ptr = 0;
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb,
+						skb_transport_offset(skb),
+						skb->len - skb_transport_offset(skb),
+						0));
+		}
+	}
+}
+EXPORT_SYMBOL(gre_update_header);
+
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
+	__sum16 csum = 0;
+
+	if (greh->flags & GRE_CSUM) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_COMPLETE:
+			csum = csum_fold(skb->csum);
+
+			if (!csum)
+				break;
+			/* Fall through. */
+
+		case CHECKSUM_NONE:
+			skb->csum = 0;
+			csum = __skb_checksum_complete(skb);
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			break;
+		}
+	}
+
+	return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi)
+{
+	struct gre_base_hdr *greh = (struct gre_base_hdr *)skb->data;
+	__be32 *options = (__be32 *)(greh + 1);
+
+	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+		return -EINVAL;
+
+	tpi->flags = greh->flags;
+	tpi->proto = greh->protocol;
+
+	tpi->hdr_len = GRE_HEADER_SECTION;
+	tpi->csum = check_checksum(skb);
+
+	if (tpi->csum)
+		return -EINVAL;
+
+	if (greh->flags & GRE_CSUM) {
+		tpi->hdr_len += GRE_HEADER_SECTION;
+		options++;
+	}
+
+	if (greh->flags & GRE_KEY) {
+		if ((void *)(options + 1) > (void *)skb_tail_pointer(skb))
+			return -1;
+		tpi->hdr_len += GRE_HEADER_SECTION;
+		tpi->key = *options;
+		options++;
+	} else
+		tpi->key = 0;
+
+	if (unlikely(greh->flags & GRE_SEQ)) {
+		if ((void *) (options + 1) > (void *)skb_tail_pointer(skb))
+			return -1;
+
+		tpi->seq = *options;
+		tpi->hdr_len += GRE_HEADER_SECTION;
+		options++;
+	} else
+		tpi->seq = 0;
+
+	/* WCCP version 1 and 2 protocol decoding.
+	 * - Change protocol to IP
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (tpi->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+		tpi->proto = htons(ETH_P_IP);
+		if ((*(u8 *)options & 0xF0) != 0x40)
+			tpi->hdr_len += 4;
+	}
+
+	return 0;
+}
+
+static int ipgre_rcv_v0(struct sk_buff *skb)
+{
+	struct tnl_ptk_info tpi;
+	int i;
+
+	if (!pskb_may_pull(skb, 16))
+		goto drop;
+
+	if (parse_gre_header(skb, &tpi) < 0)
+		goto drop;
+
+	for (i = 0; i < GREPROTO_V0_MAX; i++) {
+		const struct gre_protocol_v0 __rcu *proto = gre_proto_v0[i];
+
+		if (proto) {
+			int ret;
+
+			ret = proto->handler(skb, &tpi);
+			if (ret <= 0)
+				return ret;
+		}
+
+	}
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static void ipgre_err_v0(struct sk_buff *skb, u32 info)
+{
+
+	/* All the routers (except for Linux) return only
+	 * 8 bytes of packet payload. It means, that precise relaying of
+	 * ICMP in the real Internet is absolutely infeasible.
+	 *
+	 * Moreover, Cisco "wise men" put GRE key to the third word
+	 * in GRE header. It makes impossible maintaining even soft
+	 * state for keyed
+	 * GRE tunnels with enabled checksum. Tell them "thank you".
+	 *
+	 * Well, I wonder, rfc1812 was written by Cisco employee,
+	 * what the hell these idiots break standards established
+	 * by themselves???
+	 **/
+
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
+	struct tnl_ptk_info tpi;
+	int i;
+
+	if (!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))
+		return;
+
+	parse_gre_header(skb, &tpi);
+
+	if (tpi.csum)
+		return;
+
+	/* If only 8 bytes returned, keyed message will be dropped here */
+	if (tpi.flags & GRE_KEY) {
+		if ((tpi.flags & GRE_CSUM) && (tpi.hdr_len < 12))
+			return;
+		if (tpi.hdr_len < 8)
+			return;
+	}
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+		return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+		break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+
+	case ICMP_REDIRECT:
+		break;
+	}
+
+	for (i = 0; i < GREPROTO_V0_MAX; i++) {
+		const struct gre_protocol_v0 __rcu *proto = gre_proto_v0[i];
+
+		if (proto) {
+			if (proto->err_handler(skb, info, &tpi) <= 0)
+				return;
+		}
+
+	}
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct gre_protocol ipgre_protocol = {
+	.handler     = ipgre_rcv_v0,
+	.err_handler = ipgre_err_v0,
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -118,12 +381,25 @@ static int __init gre_init(void)
 		pr_err("can't add protocol\n");
 		return -EAGAIN;
 	}
+	rtnl_lock();
+	if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
+		pr_info("%s: can't add ipgre handler\n", __func__);
+		rtnl_unlock();
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+	rtnl_unlock();
 
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	rtnl_lock();
+	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
+		pr_info("%s: can't remove protocol\n", __func__);
+	rtnl_unlock();
+
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
-- 
1.7.1




More information about the dev mailing list