[ovs-dev] [PATCH 13/14] datapath: Backport upstream Geneve implementation.

Jesse Gross jesse at nicira.com
Thu Feb 19 03:02:23 UTC 2015


This resolves the differences between the OVS Geneve implementation
and the upstream kernel, particularly in the area of the split
between vport-geneve.c and the generic Geneve data plane.

Signed-off-by: Jesse Gross <jesse at nicira.com>
---
 datapath/linux/.gitignore                  |   1 +
 datapath/linux/Modules.mk                  |   1 +
 datapath/linux/compat/geneve.c             | 241 ++++++++++++++++++++
 datapath/linux/compat/include/net/geneve.h |  76 ++++++-
 datapath/vport-geneve.c                    | 349 +++++------------------------
 5 files changed, 377 insertions(+), 291 deletions(-)
 create mode 100644 datapath/linux/compat/geneve.c

diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore
index 2fb1198..69d6658 100644
--- a/datapath/linux/.gitignore
+++ b/datapath/linux/.gitignore
@@ -18,6 +18,7 @@
 /flow_netlink.c
 /flow_table.c
 /genetlink-openvswitch.c
+/geneve.c
 /genl_exec.c
 /gre.c
 /gso.c
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 53865da..7d9710d 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -3,6 +3,7 @@ openvswitch_sources += \
 	linux/compat/exthdrs_core.c \
 	linux/compat/flex_array.c \
 	linux/compat/flow_dissector.c \
+	linux/compat/geneve.c \
 	linux/compat/gre.c \
 	linux/compat/gso.c \
 	linux/compat/genetlink-openvswitch.c \
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
new file mode 100644
index 0000000..35c01bb
--- /dev/null
+++ b/datapath/linux/compat/geneve.c
@@ -0,0 +1,241 @@
+/*
+ * Geneve: Generic Network Virtualization Encapsulation
+ *
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ethtool.h>
+#include <linux/mutex.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/geneve.h>
+#include <net/protocol.h>
+#include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#include "compat.h"
+#include "gso.h"
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+	return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+static void geneve_build_header(struct genevehdr *geneveh,
+				__be16 tun_flags, u8 vni[3],
+				u8 options_len, u8 *options)
+{
+	geneveh->ver = GENEVE_VER;
+	geneveh->opt_len = options_len / 4;
+	geneveh->oam = !!(tun_flags & TUNNEL_OAM);
+	geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
+	geneveh->rsvd1 = 0;
+	memcpy(geneveh->vni, vni, 3);
+	geneveh->proto_type = htons(ETH_P_TEB);
+	geneveh->rsvd2 = 0;
+
+	memcpy(geneveh->options, options, options_len);
+}
+
+/* Transmit a fully formatted Geneve frame.
+ *
+ * When calling this function. The skb->data should point
+ * to the geneve header which is fully formed.
+ *
+ * This function will add other UDP tunnel headers.
+ */
+int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
+		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
+		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
+		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
+		    bool csum, bool xnet)
+{
+	struct genevehdr *gnvh;
+	int min_headroom;
+	int err;
+
+	skb = udp_tunnel_handle_offloads(skb, csum, (opt_len == 0));
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+
+	err = skb_cow_head(skb, min_headroom);
+	if (unlikely(err)) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	skb = vlan_hwaccel_push_inside(skb);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
+	geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
+
+	ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+
+	return udp_tunnel_xmit_skb(rt, skb, src, dst,
+				   tos, ttl, df, src_port, dst_port, xnet,
+				   !csum);
+}
+
+/* Callback from net/ipv4/udp.c to receive packets */
+static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct genevehdr *geneveh;
+	struct geneve_sock *gs;
+	int opts_len;
+
+	/* Need Geneve and inner Ethernet header to be present */
+	if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
+		goto error;
+
+	/* Return packets with reserved bits set */
+	geneveh = geneve_hdr(skb);
+
+	if (unlikely(geneveh->ver != GENEVE_VER))
+		goto error;
+
+	if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
+		goto error;
+
+	opts_len = geneveh->opt_len * 4;
+	if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+				 htons(ETH_P_TEB)))
+		goto drop;
+
+	gs = rcu_dereference_sk_user_data(sk);
+	if (!gs)
+		goto drop;
+
+	gs->rcv(gs, skb);
+	return 0;
+
+drop:
+	/* Consume bad packet */
+	kfree_skb(skb);
+	return 0;
+
+error:
+	/* Let the UDP layer deal with the skb */
+	return 1;
+}
+
+static struct socket *geneve_create_sock(struct net *net, bool ipv6,
+					 __be16 port)
+{
+	struct socket *sock;
+	struct udp_port_cfg udp_conf;
+	int err;
+
+	memset(&udp_conf, 0, sizeof(udp_conf));
+
+	if (ipv6) {
+		udp_conf.family = AF_INET6;
+	} else {
+		udp_conf.family = AF_INET;
+		udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+	}
+
+	udp_conf.local_udp_port = port;
+
+	/* Open UDP socket */
+	err = udp_sock_create(net, &udp_conf, &sock);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	return sock;
+}
+
+/* Create new listen socket if needed */
+static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
+						geneve_rcv_t *rcv, void *data,
+						bool ipv6)
+{
+	struct geneve_sock *gs;
+	struct socket *sock;
+	struct udp_tunnel_sock_cfg tunnel_cfg;
+
+	gs = kzalloc(sizeof(*gs), GFP_KERNEL);
+	if (!gs)
+		return ERR_PTR(-ENOMEM);
+
+	sock = geneve_create_sock(net, ipv6, port);
+	if (IS_ERR(sock)) {
+		kfree(gs);
+		return ERR_CAST(sock);
+	}
+
+	gs->sock = sock;
+	gs->rcv = rcv;
+	gs->rcv_data = data;
+
+	/* Mark socket as an encapsulation socket */
+	tunnel_cfg.sk_user_data = gs;
+	tunnel_cfg.encap_type = 1;
+	tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
+	tunnel_cfg.encap_destroy = NULL;
+	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
+
+	return gs;
+}
+
+struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
+				    geneve_rcv_t *rcv, void *data,
+				    bool no_share, bool ipv6)
+{
+	return geneve_socket_create(net, port, rcv, data, ipv6);
+}
+
+static void rcu_free_gs(struct rcu_head *rcu)
+{
+	struct geneve_sock *gs = container_of(rcu, struct geneve_sock, rcu);
+
+	kfree(gs);
+}
+
+void geneve_sock_release(struct geneve_sock *gs)
+{
+	udp_tunnel_sock_release(gs->sock);
+	call_rcu(&gs->rcu, rcu_free_gs);
+}
diff --git a/datapath/linux/compat/include/net/geneve.h b/datapath/linux/compat/include/net/geneve.h
index 2cb294f..402ef38 100644
--- a/datapath/linux/compat/include/net/geneve.h
+++ b/datapath/linux/compat/include/net/geneve.h
@@ -1,8 +1,28 @@
 #ifndef __NET_GENEVE_WRAPPER_H
 #define __NET_GENEVE_WRAPPER_H  1
 
-/* Not yet upstream. */
-#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+#ifdef CONFIG_INET
+#include <net/udp_tunnel.h>
+#endif
+
+
+/* Geneve Header:
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |Ver|  Opt Len  |O|C|    Rsvd.  |          Protocol Type        |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |        Virtual Network Identifier (VNI)       |    Reserved   |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |                    Variable Length Options                    |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Option Header:
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |          Option Class         |      Type     |R|R|R| Length  |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |                      Variable Option Data                     |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
 struct geneve_opt {
 	__be16	opt_class;
 	u8	type;
@@ -17,7 +37,57 @@ struct geneve_opt {
 	u8	r3:1;
 	u8	length:5;
 #endif
-        u8	opt_data[];
+	u8	opt_data[];
 };
 
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+
+struct genevehdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	u8 opt_len:6;
+	u8 ver:2;
+	u8 rsvd1:6;
+	u8 critical:1;
+	u8 oam:1;
+#else
+	u8 ver:2;
+	u8 opt_len:6;
+	u8 oam:1;
+	u8 critical:1;
+	u8 rsvd1:6;
 #endif
+	__be16 proto_type;
+	u8 vni[3];
+	u8 rsvd2;
+	struct geneve_opt options[];
+};
+
+#ifdef CONFIG_INET
+struct geneve_sock;
+
+typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
+
+struct geneve_sock {
+	geneve_rcv_t		*rcv;
+	void			*rcv_data;
+	struct socket		*sock;
+	struct rcu_head		rcu;
+};
+
+#define GENEVE_VER 0
+#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+
+struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
+				    geneve_rcv_t *rcv, void *data,
+				    bool no_share, bool ipv6);
+
+void geneve_sock_release(struct geneve_sock *vs);
+
+int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
+		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
+		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
+		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
+		    bool csum, bool xnet);
+#endif /*ifdef CONFIG_INET */
+
+#endif /*ifdef__NET_GENEVE_WRAPPER_H */
diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
index 56306bb..624a6a0 100644
--- a/datapath/vport-geneve.c
+++ b/datapath/vport-geneve.c
@@ -2,29 +2,20 @@
  * Copyright (c) 2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/version.h>
-
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/net.h>
 #include <linux/rculist.h>
 #include <linux/udp.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
 
 #include <net/geneve.h>
 #include <net/icmp.h>
@@ -34,58 +25,15 @@
 #include <net/xfrm.h>
 
 #include "datapath.h"
-#include "gso.h"
 #include "vport.h"
 
-/*
- * Geneve Header:
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *  |Ver|  Opt Len  |O|C|    Rsvd.  |          Protocol Type        |
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *  |        Virtual Network Identifier (VNI)       |    Reserved   |
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *  |                    Variable Length Options                    |
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Option Header:
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *  |          Option Class         |      Type     |R|R|R| Length  |
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *  |                      Variable Option Data                     |
- *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- */
-
-struct genevehdr {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-	u8 opt_len:6;
-	u8 ver:2;
-	u8 rsvd1:6;
-	u8 critical:1;
-	u8 oam:1;
-#else
-	u8 ver:2;
-	u8 opt_len:6;
-	u8 oam:1;
-	u8 critical:1;
-	u8 rsvd1:6;
-#endif
-	__be16 proto_type;
-	u8 vni[3];
-	u8 rsvd2;
-	struct geneve_opt options[];
-};
-
-#define GENEVE_VER 0
-
-#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
-
 /**
  * struct geneve_port - Keeps track of open UDP ports
- * @sock: The socket created for this port number.
+ * @gs: The socket created for this port number.
  * @name: vport name.
  */
 struct geneve_port {
-	struct socket *sock;
+	struct geneve_sock *gs;
 	char name[IFNAMSIZ];
 };
 
@@ -127,67 +75,16 @@ static __be64 vni_to_tunnel_id(const __u8 *vni)
 #endif
 }
 
-static void geneve_build_header(const struct vport *vport,
-			      struct sk_buff *skb)
-{
-	struct geneve_port *geneve_port = geneve_vport(vport);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	struct udphdr *udph = udp_hdr(skb);
-	struct genevehdr *geneveh = (struct genevehdr *)(udph + 1);
-	const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->egress_tun_info;
-
-	udph->dest = inet_sport(geneve_port->sock->sk);
-	udph->source = udp_flow_src_port(net, skb, 0, 0, true);
-	udph->check = 0;
-	udph->len = htons(skb->len - skb_transport_offset(skb));
-
-	geneveh->ver = GENEVE_VER;
-	geneveh->opt_len = tun_info->options_len / 4;
-	geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM);
-	geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT);
-	geneveh->rsvd1 = 0;
-	geneveh->proto_type = htons(ETH_P_TEB);
-	tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni);
-	geneveh->rsvd2 = 0;
-
-	memcpy(geneveh->options, tun_info->options, tun_info->options_len);
-}
-
-static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
+static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 {
-	struct geneve_port *geneve_port;
-	struct genevehdr *geneveh;
+	struct vport *vport = gs->rcv_data;
+	struct genevehdr *geneveh = geneve_hdr(skb);
 	int opts_len;
 	struct ovs_tunnel_info tun_info;
 	__be64 key;
 	__be16 flags;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
-	if (unlikely(udp_lib_checksum_complete(skb)))
-		goto error;
-#endif
-
-	if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
-		goto error;
-
-	geneveh = geneve_hdr(skb);
-
-	if (unlikely(geneveh->ver != GENEVE_VER))
-		goto error;
-
-	if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
-		goto error;
-
-	geneve_port = rcu_dereference_sk_user_data(sk);
-	if (unlikely(!geneve_port))
-		goto error;
-
 	opts_len = geneveh->opt_len * 4;
-	if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
-				 htons(ETH_P_TEB)))
-		goto error;
-
-	geneveh = geneve_hdr(skb);
 
 	flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
@@ -195,67 +92,22 @@ static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
 
 	key = vni_to_tunnel_id(geneveh->vni);
-	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
-				udp_hdr(skb)->source, udp_hdr(skb)->dest,
-				key, flags,
-				geneveh->options, opts_len);
-
-	ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info);
-	goto out;
-
-error:
-	kfree_skb(skb);
-out:
-	return 0;
-}
-
-/* Arbitrary value.  Irrelevant as long as it's not 0 since we set the handler. */
-#define UDP_ENCAP_GENEVE 1
-static int geneve_socket_init(struct geneve_port *geneve_port, struct net *net,
-			      __be16 dst_port)
-{
-	struct sockaddr_in sin;
-	int err;
-
-	err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
-			       &geneve_port->sock);
-	if (err)
-		goto error;
-
-	/* release net ref. */
-	sk_change_net(geneve_port->sock->sk, net);
-
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_ANY);
-	sin.sin_port = dst_port;
-
-	err = kernel_bind(geneve_port->sock,
-			  (struct sockaddr *)&sin, sizeof(struct sockaddr_in));
-	if (err)
-		goto error_sock;
 
-	rcu_assign_sk_user_data(geneve_port->sock->sk, geneve_port);
-	udp_sk(geneve_port->sock->sk)->encap_type = UDP_ENCAP_GENEVE;
-	udp_sk(geneve_port->sock->sk)->encap_rcv = geneve_rcv;
-
-	udp_encap_enable();
-
-	return 0;
+	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
+			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
+			       key, flags,
+			       geneveh->options, opts_len);
 
-error_sock:
-	sk_release_kernel(geneve_port->sock->sk);
-error:
-	pr_warn("cannot register geneve protocol handler: %d\n", err);
-	return err;
+	ovs_vport_receive(vport, skb, &tun_info);
 }
 
 static int geneve_get_options(const struct vport *vport,
 			      struct sk_buff *skb)
 {
 	struct geneve_port *geneve_port = geneve_vport(vport);
+	__be16 dst_port = inet_sport(geneve_port->gs->sock->sk);
 
-	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
-			ntohs(inet_sport(geneve_port->sock->sk))))
+	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
 	return 0;
 }
@@ -264,9 +116,7 @@ static void geneve_tnl_destroy(struct vport *vport)
 {
 	struct geneve_port *geneve_port = geneve_vport(vport);
 
-	/* Release socket */
-	rcu_assign_sk_user_data(geneve_port->sock->sk, NULL);
-	sk_release_kernel(geneve_port->sock->sk);
+	geneve_sock_release(geneve_port->gs);
 
 	ovs_vport_deferred_free(vport);
 }
@@ -276,6 +126,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 	struct net *net = ovs_dp_get_net(parms->dp);
 	struct nlattr *options = parms->options;
 	struct geneve_port *geneve_port;
+	struct geneve_sock *gs;
 	struct vport *vport;
 	struct nlattr *a;
 	int err;
@@ -303,79 +154,40 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 	geneve_port = geneve_vport(vport);
 	strncpy(geneve_port->name, parms->name, IFNAMSIZ);
 
-	err = geneve_socket_init(geneve_port, net, htons(dst_port));
-	if (err)
-		goto error_free;
+	gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
+	if (IS_ERR(gs)) {
+		ovs_vport_free(vport);
+		return (void *)gs;
+	}
+	geneve_port->gs = gs;
 
 	return vport;
-
-error_free:
-	ovs_vport_free(vport);
 error:
 	return ERR_PTR(err);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
-
-static void geneve_fix_segment(struct sk_buff *skb)
-{
-	struct udphdr *udph = udp_hdr(skb);
-
-	udph->len = htons(skb->len - skb_transport_offset(skb));
-}
-
-static struct sk_buff *handle_offloads(struct sk_buff *skb)
+static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
-	return ovs_iptunnel_handle_offloads(skb, false, geneve_fix_segment);
-}
-#else
-
-static struct sk_buff *handle_offloads(struct sk_buff *skb)
-{
-	int err = 0;
-
-	if (skb_is_gso(skb)) {
-
-		if (skb_is_encapsulated(skb)) {
-			err = -ENOSYS;
-			goto error;
-		}
-
-		err = skb_unclone(skb, GFP_ATOMIC);
-		if (unlikely(err))
-			goto error;
-
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	skb->encapsulation = 1;
-	return skb;
-error:
-	kfree_skb(skb);
-	return ERR_PTR(err);
-}
-#endif
-
-static int geneve_send(struct vport *vport, struct sk_buff *skb)
-{
-	struct ovs_key_ipv4_tunnel *tun_key;
-	int network_offset = skb_network_offset(skb);
-	struct rtable *rt;
-	int min_headroom;
+	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct ovs_tunnel_info *tun_info;
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct geneve_port *geneve_port = geneve_vport(vport);
+	__be16 dport = inet_sport(geneve_port->gs->sock->sk);
+	__be16 sport;
 	__be32 saddr;
+	struct rtable *rt;
+	u8 vni[3], opts_len, *opts;
 	__be16 df;
-	int sent_len;
 	int err;
 
-	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	if (unlikely(!tun_info)) {
 		err = -EINVAL;
 		goto error;
 	}
 
-	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
+	tun_key = &tun_info->tunnel;
 
-	/* Route lookup */
 	saddr = tun_key->ipv4_src;
 	rt = find_route(ovs_dp_get_net(vport->dp),
 			&saddr, tun_key->ipv4_dst,
@@ -386,66 +198,28 @@ static int geneve_send(struct vport *vport, struct sk_buff *skb)
 		goto error;
 	}
 
-	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-			+ GENEVE_BASE_HLEN
-			+ OVS_CB(skb)->egress_tun_info->options_len
-			+ sizeof(struct iphdr)
-			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
-	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
-		int head_delta = SKB_DATA_ALIGN(min_headroom -
-						skb_headroom(skb) +
-						16);
-
-		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
-					0, GFP_ATOMIC);
-		if (unlikely(err))
-			goto err_free_rt;
-	}
-
-	if (skb_vlan_tag_present(skb)) {
-		if (unlikely(!vlan_insert_tag_set_proto(skb,
-							skb->vlan_proto,
-							skb_vlan_tag_get(skb)))) {
-			err = -ENOMEM;
-			skb = NULL;
-			goto err_free_rt;
-		}
-		vlan_set_tci(skb, 0);
-	}
-
-	skb_reset_inner_headers(skb);
-
-	__skb_push(skb, GENEVE_BASE_HLEN +
-			OVS_CB(skb)->egress_tun_info->options_len);
-	skb_reset_transport_header(skb);
-
-	geneve_build_header(vport, skb);
-
-	/* Offloading */
-	skb = handle_offloads(skb);
-	if (IS_ERR(skb)) {
-		err = PTR_ERR(skb);
-		skb = NULL;
-		goto err_free_rt;
-	}
-
 	df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+	sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+	tunnel_id_to_vni(tun_key->tun_id, vni);
+	skb->ignore_df = 1;
 
-	/* NOTE: If geneve_xmit_skb() is backported, opts may only be passed
-	 * in if TUNNEL_GENEVE_OPT is set, see upstream.
-	 */
-
-	sent_len = iptunnel_xmit(skb->sk, rt, skb,
-			     saddr, tun_key->ipv4_dst,
-			     IPPROTO_UDP, tun_key->ipv4_tos,
-			     tun_key->ipv4_ttl,
-			     df, false);
+	if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+		opts = (u8 *)tun_info->options;
+		opts_len = tun_info->options_len;
+	} else {
+		opts = NULL;
+		opts_len = 0;
+	}
 
-	return sent_len > 0 ? sent_len + network_offset : sent_len;
+	err = geneve_xmit_skb(geneve_port->gs, rt, skb, saddr,
+			      tun_key->ipv4_dst, tun_key->ipv4_tos,
+			      tun_key->ipv4_ttl, df, sport, dport,
+			      tun_key->tun_flags, vni, opts_len, opts,
+			      !!(tun_key->tun_flags & TUNNEL_CSUM), false);
+	if (err < 0)
+		ip_rt_put(rt);
+	return err;
 
-err_free_rt:
-	ip_rt_put(rt);
 error:
 	kfree_skb(skb);
 	return err;
@@ -454,6 +228,7 @@ error:
 static const char *geneve_get_name(const struct vport *vport)
 {
 	struct geneve_port *geneve_port = geneve_vport(vport);
+
 	return geneve_port->name;
 }
 
@@ -462,17 +237,15 @@ static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
 {
 	struct geneve_port *geneve_port = geneve_vport(vport);
 	struct net *net = ovs_dp_get_net(vport->dp);
+	__be16 dport = inet_sport(geneve_port->gs->sock->sk);
+	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
 
-	/*
-	 * Get tp_src and tp_dst, refert to geneve_build_header().
+	/* Get tp_src and tp_dst, refert to geneve_build_header().
 	 */
 	return ovs_tunnel_get_egress_info(egress_tun_info,
 					  ovs_dp_get_net(vport->dp),
 					  OVS_CB(skb)->egress_tun_info,
-					  IPPROTO_UDP, skb->mark,
-					  udp_flow_src_port(net, skb, 0, 0, true),
-					  inet_sport(geneve_port->sock->sk));
-
+					  IPPROTO_UDP, skb->mark, sport, dport);
 }
 
 const struct vport_ops ovs_geneve_vport_ops = {
@@ -481,6 +254,6 @@ const struct vport_ops ovs_geneve_vport_ops = {
 	.destroy		= geneve_tnl_destroy,
 	.get_name		= geneve_get_name,
 	.get_options		= geneve_get_options,
-	.send			= geneve_send,
+	.send			= geneve_tnl_send,
 	.get_egress_tun_info	= geneve_get_egress_tun_info,
 };
-- 
1.9.1




More information about the dev mailing list