[ovs-dev] [RFC upstream PATCH 1/4] GRE: Allow multiple GREPROTO_CISCO protocol handlers.
Pravin B Shelar
pshelar at nicira.com
Wed Sep 26 23:34:35 UTC 2012
Currently only one protocol handler of GREPROTO_CISCO protocol
is allowed. Soon we will have ovs tunnel registering for same protocol
as GRE device.
Following patch extends GRE de-multiplexer so that it can multiple GRE
modules can register GRE protocol handler.
Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
include/net/gre.h | 12 +++
include/net/ipip.h | 9 ++
net/ipv4/gre.c | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/ip_gre.c | 139 ++++----------------------
4 files changed, 323 insertions(+), 118 deletions(-)
diff --git a/include/net/gre.h b/include/net/gre.h
index 8266547..82e9276 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -2,6 +2,7 @@
#define __LINUX_GRE_H
#include <linux/skbuff.h>
+#include <net/ipip.h>
#define GREPROTO_CISCO 0
#define GREPROTO_PPTP 1
@@ -15,4 +16,15 @@ struct gre_protocol {
int gre_add_protocol(const struct gre_protocol *proto, u8 version);
int gre_del_protocol(const struct gre_protocol *proto, u8 version);
+struct gre_protocol_v0 {
+ int (*handler)(struct sk_buff *skb, struct tnl_ptk_info *tpi);
+ int (*err_handler)(struct sk_buff *skb, u32 info,
+ struct tnl_ptk_info *tpi);
+};
+
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags,
+ __be16 proto, __be32 key, u32 seqno);
+
#endif
diff --git a/include/net/ipip.h b/include/net/ipip.h
index a93cf6d..4b77671 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -38,6 +38,15 @@ struct ip_tunnel {
unsigned int prl_count; /* # of entries in PRL */
};
+struct tnl_ptk_info {
+ __be16 flags;
+ __be16 proto;
+ __be32 key;
+ __be32 seq;
+ int hdr_len;
+ __be16 csum;
+};
+
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a4910..2c384c7 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -16,15 +16,27 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/skbuff.h>
+#include <linux/if.h>
+#include <linux/icmp.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/if_tunnel.h>
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <net/protocol.h>
#include <net/gre.h>
+#include <net/icmp.h>
+#define GREPROTO_V0_MAX 2
+#define GRE_HEADER_SECTION 4
+
+struct gre_base_hdr {
+ __be16 flags;
+ __be16 protocol;
+};
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol_v0 __rcu *gre_proto_v0[GREPROTO_V0_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -112,12 +124,273 @@ static void gre_err(struct sk_buff *skb, u32 info)
rcu_read_unlock();
}
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+ if (priority >= GREPROTO_V0_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (gre_proto_v0[priority])
+ goto err_out_unlock;
+
+ RCU_INIT_POINTER(gre_proto_v0[priority], proto);
+ spin_unlock(&gre_proto_lock);
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol_v0);
+
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+ if (priority >= GREPROTO_V0_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (rcu_dereference_protected(gre_proto_v0[priority],
+ lockdep_is_held(&gre_proto_lock)) != proto)
+ goto err_out_unlock;
+ RCU_INIT_POINTER(gre_proto_v0[priority], NULL);
+ spin_unlock(&gre_proto_lock);
+ synchronize_rcu();
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol_v0);
+
+void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags,
+ __be16 proto, __be32 key, u32 seqno)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ ((__be16 *)(iph + 1))[0] = flags;
+ ((__be16 *)(iph + 1))[1] = proto;
+
+ if (flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)iph) + hlen - 4);
+
+ if (flags&GRE_SEQ) {
+ *ptr = htonl(seqno);
+ ptr--;
+ }
+ if (flags&GRE_KEY) {
+ *ptr = key;
+ ptr--;
+ }
+ if (flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1),
+ skb->len - sizeof(struct iphdr));
+ }
+ }
+}
+EXPORT_SYMBOL(build_gre_header);
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
+ __sum16 csum = 0;
+
+ if (greh->flags & GRE_CSUM) {
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
+
+ if (!csum)
+ break;
+ /* Fall through. */
+
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ break;
+ }
+ }
+
+ return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi)
+{
+
+ /* IP and ICMP protocol handlers check that the IHL is valid. */
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)skb->data;
+ __be32 *options = (__be32 *)(greh + 1);
+
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = greh->flags;
+ tpi->proto = greh->protocol;
+
+ tpi->hdr_len = GRE_HEADER_SECTION;
+ tpi->csum = check_checksum(skb);
+
+ if (greh->flags & GRE_CSUM) {
+ tpi->hdr_len += GRE_HEADER_SECTION;
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ if ((void *)(options + 1) > (void *)skb_tail_pointer(skb))
+ return -1;
+ tpi->hdr_len += GRE_HEADER_SECTION;
+ tpi->key = *options;
+ options++;
+ } else
+ tpi->key = 0;
+
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ if ((void *) (options + 1) > (void *)skb_tail_pointer(skb))
+ return -1;
+
+ tpi->seq = *options;
+ tpi->hdr_len += GRE_HEADER_SECTION;
+ options++;
+ } else
+ tpi->seq = 0;
+
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (tpi->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40)
+ tpi->hdr_len += 4;
+ }
+
+ return 0;
+}
+
+static int ipgre_rcv_v0(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ int i;
+
+ if (!pskb_may_pull(skb, 16))
+ goto drop;
+
+ if (parse_gre_header(skb, &tpi) < 0)
+ goto drop;
+
+ rcu_read_lock();
+ for (i = 0; i < GREPROTO_V0_MAX; i++) {
+ if (gre_proto_v0[i]->handler) {
+ int ret;
+
+ ret = gre_proto_v0[i]->handler(skb, &tpi);
+ if (ret <= 0) {
+ rcu_read_unlock();
+ return ret;
+ }
+ }
+
+ }
+ rcu_read_unlock();
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void ipgre_err_v0(struct sk_buff *skb, u32 info)
+{
+
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ *
+ * Moreover, Cisco "wise men" put GRE key to the third word
+ * in GRE header. It makes impossible maintaining even soft
+ * state for keyed
+ * GRE tunnels with enabled checksum. Tell them "thank you".
+ *
+ * Well, I wonder, rfc1812 was written by Cisco employee,
+ * what the hell these idiots break standards established
+ * by themselves???
+ **/
+
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct tnl_ptk_info tpi;
+ int i;
+
+ if (!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))
+ return;
+
+ parse_gre_header(skb, &tpi);
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (tpi.flags & GRE_KEY) {
+ if ((tpi.flags & GRE_CSUM) && (tpi.hdr_len < 12))
+ return;
+ if (tpi.hdr_len < 8)
+ return;
+ }
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < GREPROTO_V0_MAX; i++) {
+ if (gre_proto_v0[i]->err_handler) {
+ if (gre_proto_v0[i]->err_handler(skb, info, &tpi) <= 0) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+
+ }
+ rcu_read_unlock();
+}
+
static const struct net_protocol net_gre_protocol = {
.handler = gre_rcv,
.err_handler = gre_err,
.netns_ok = 1,
};
+static const struct gre_protocol ipgre_protocol = {
+ .handler = ipgre_rcv_v0,
+ .err_handler = ipgre_err_v0,
+};
+
static int __init gre_init(void)
{
pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -126,12 +399,20 @@ static int __init gre_init(void)
pr_err("can't add protocol\n");
return -EAGAIN;
}
+ if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
+ pr_info("%s: can't add ipgre handler\n", __func__);
+ inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+ return -EAGAIN;
+ }
return 0;
}
static void __exit gre_exit(void)
{
+ if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
+ pr_info("%s: can't remove protocol\n", __func__);
+
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f233c1d..3bda6e2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -466,7 +466,7 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
}
-static void ipgre_err(struct sk_buff *skb, u32 info)
+static int ipgre_err(struct sk_buff *skb, u32 info, struct tnl_ptk_info *tnl_ptk_info)
{
/* All the routers (except for Linux) return only
@@ -483,63 +483,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
*/
const struct iphdr *iph = (const struct iphdr *)skb->data;
- __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
- int grehlen = (iph->ihl<<2) + 4;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
- __be16 flags;
-
- flags = p[0];
- if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
- if (flags&(GRE_VERSION|GRE_ROUTING))
- return;
- if (flags&GRE_KEY) {
- grehlen += 4;
- if (flags&GRE_CSUM)
- grehlen += 4;
- }
- }
-
- /* If only 8 bytes returned, keyed message will be dropped here */
- if (skb_headlen(skb) < grehlen)
- return;
-
- switch (type) {
- default:
- case ICMP_PARAMETERPROB:
- return;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe they are just ether pollution. --ANK
- */
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return;
- break;
-
- case ICMP_REDIRECT:
- break;
- }
rcu_read_lock();
t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
- flags & GRE_KEY ?
- *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
- p[1]);
- if (t == NULL)
- goto out;
+ tnl_ptk_info->key, tnl_ptk_info->proto);
+ if (t == NULL) {
+ return 1;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
@@ -565,6 +518,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
t->err_time = jiffies;
out:
rcu_read_unlock();
+ return 0;
}
static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
@@ -589,80 +543,29 @@ ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
return INET_ECN_encapsulate(tos, inner);
}
-static int ipgre_rcv(struct sk_buff *skb)
+static int ipgre_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi)
{
const struct iphdr *iph;
- u8 *h;
- __be16 flags;
- __sum16 csum = 0;
- __be32 key = 0;
- u32 seqno = 0;
struct ip_tunnel *tunnel;
- int offset = 4;
- __be16 gre_proto;
if (!pskb_may_pull(skb, 16))
goto drop_nolock;
iph = ip_hdr(skb);
- h = skb->data;
- flags = *(__be16 *)h;
-
- if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
- /* - Version must be 0.
- - We do not support routing headers.
- */
- if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop_nolock;
-
- if (flags&GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
- if (!csum)
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
- offset += 4;
- }
- if (flags&GRE_KEY) {
- key = *(__be32 *)(h + offset);
- offset += 4;
- }
- if (flags&GRE_SEQ) {
- seqno = ntohl(*(__be32 *)(h + offset));
- offset += 4;
- }
- }
-
- gre_proto = *(__be16 *)(h + 2);
rcu_read_lock();
if ((tunnel = ipgre_tunnel_lookup(skb->dev,
- iph->saddr, iph->daddr, key,
- gre_proto))) {
+ iph->saddr, iph->daddr, tpi->key,
+ tpi->proto))) {
struct pcpu_tstats *tstats;
secpath_reset(skb);
- skb->protocol = gre_proto;
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
- skb->protocol = htons(ETH_P_IP);
- if ((*(h + offset) & 0xF0) != 0x40)
- offset += 4;
- }
+ skb->protocol = tpi->proto;
skb->mac_header = skb->network_header;
- __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+ __pskb_pull(skb, tpi->hdr_len);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), tpi->hdr_len);
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -674,20 +577,20 @@ static int ipgre_rcv(struct sk_buff *skb)
}
#endif
- if (((flags&GRE_CSUM) && csum) ||
- (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ if (((tpi->flags&GRE_CSUM) && tpi->csum) ||
+ (!(tpi->flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
tunnel->dev->stats.rx_crc_errors++;
tunnel->dev->stats.rx_errors++;
goto drop;
}
if (tunnel->parms.i_flags&GRE_SEQ) {
- if (!(flags&GRE_SEQ) ||
- (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
+ if (!(tpi->flags&GRE_SEQ) ||
+ (tunnel->i_seqno && (s32)(tpi->seq - tunnel->i_seqno) < 0)) {
tunnel->dev->stats.rx_fifo_errors++;
tunnel->dev->stats.rx_errors++;
goto drop;
}
- tunnel->i_seqno = seqno + 1;
+ tunnel->i_seqno = tpi->seq + 1;
}
/* Warning: All skb pointers will be invalidated! */
@@ -1373,7 +1276,7 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
}
-static const struct gre_protocol ipgre_protocol = {
+static const struct gre_protocol_v0 ipgre_protocol = {
.handler = ipgre_rcv,
.err_handler = ipgre_err,
};
@@ -1771,7 +1674,7 @@ static int __init ipgre_init(void)
if (err < 0)
return err;
- err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+ err = gre_add_protocol_v0(&ipgre_protocol, 0);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
@@ -1791,7 +1694,7 @@ out:
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+ gre_del_protocol_v0(&ipgre_protocol, 0);
add_proto_failed:
unregister_pernet_device(&ipgre_net_ops);
goto out;
@@ -1801,7 +1704,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
+ if (gre_del_protocol_v0(&ipgre_protocol, 0) < 0)
pr_info("%s: can't remove protocol\n", __func__);
unregister_pernet_device(&ipgre_net_ops);
}
--
1.7.10
More information about the dev
mailing list