[ovs-dev] [PATCH 1/8] nsh: datapath support for network service headers

pritesh pritesh.kothari at cisco.com
Fri Sep 20 08:04:18 UTC 2013


This patch adds support for Network Service Headers (nsh) over VXLAN
as mentioned in [1]. Here changes are made to datapath to add nsh
headers whenever a vxlan port with destination port as 9030 is created.
IANA port allocation for nsh over vxlan is yet to be done.

[1] http://tools.ietf.org/html/draft-quinn-nsh-01

Signed-off-by: pritesh <pritesh.kothari at cisco.com>

 create mode 100644 datapath/linux/compat/include/net/nsh.h

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 4defcdb..285b571 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -369,6 +369,7 @@ static size_t key_attr_size(void)
 		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_NSP */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
diff --git a/datapath/flow.c b/datapath/flow.c
index 29122af..4f47a48 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -1235,6 +1235,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	bool ttl = false;
 	__be16 tun_flags = 0;
+	__be32 nsp = 0;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -1246,6 +1247,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
 			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+			[OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32),
 		};
 
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -1290,11 +1292,16 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
 			tun_flags |= TUNNEL_CSUM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_NSP:
+			nsp |= htonl(be32_to_cpu(nla_get_be32(a)) << 8);
+			tun_flags |= TUNNEL_NSP;
+			break;
 		default:
 			return -EINVAL;
 		}
 	}
 
+	SW_FLOW_KEY_PUT(match, tun_key.nsp, nsp, is_mask);
 	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 
 	if (rem > 0) {
@@ -1322,6 +1329,7 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
 			   const struct ovs_key_ipv4_tunnel *output)
 {
 	struct nlattr *nla;
+	__be32 nsp = cpu_to_be32(ntohl(output->nsp) >> 8);
 
 	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
 	if (!nla)
@@ -1347,6 +1355,9 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_CSUM) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
+	if (output->tun_flags & TUNNEL_NSP &&
+	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_NSP, nsp))
+		return -EMSGSIZE;
 
 	nla_nest_end(skb, nla);
 	return 0;
diff --git a/datapath/flow.h b/datapath/flow.h
index 03eae03..b316e0a 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -51,6 +51,7 @@ struct sw_flow_actions {
 
 struct ovs_key_ipv4_tunnel {
 	__be64 tun_id;
+	__be32 nsp;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
 	__be16 tun_flags;
@@ -60,9 +61,10 @@ struct ovs_key_ipv4_tunnel {
 
 static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
 					 const struct iphdr *iph, __be64 tun_id,
-					 __be16 tun_flags)
+					 __be32 nsp, __be16 tun_flags)
 {
 	tun_key->tun_id = tun_id;
+	tun_key->nsp = nsp;
 	tun_key->ipv4_src = iph->saddr;
 	tun_key->ipv4_dst = iph->daddr;
 	tun_key->ipv4_tos = iph->tos;
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 057e1d5..7e0acce 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -65,4 +65,5 @@ openvswitch_headers += \
 	linux/compat/include/net/net_namespace.h \
 	linux/compat/include/net/netlink.h \
 	linux/compat/include/net/vxlan.h \
+	linux/compat/include/net/nsh.h \
 	linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index a786aa9..a4aec4d 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -20,6 +20,7 @@
 #define TUNNEL_VERSION	__cpu_to_be16(0x40)
 #define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT	__cpu_to_be16(0x0100)
+#define TUNNEL_NSP	__cpu_to_be16(0x0200)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/datapath/linux/compat/include/net/nsh.h b/datapath/linux/compat/include/net/nsh.h
new file mode 100644
index 0000000..bd4d7fb
--- /dev/null
+++ b/datapath/linux/compat/include/net/nsh.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2013 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef NSH_H
+#define NSH_H 1
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+
+/**
+ * struct nsh_bhdr - Network Service Base Header.
+ * @o: Operations and Management Packet indicator bit
+ * @c: If this bit is set then one or more contexts are in use.
+ * @proto: IEEE Ethertypes to indicate the frame within.
+ * @svc_idx: TTL functionality and location within service path.
+ * @svc_path: To uniquely identify service path.
+ */
+struct nsh_base {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8	res:6,
+		c:1,
+		o:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	o:1,
+		c:1,
+		res:6;
+#else
+#error "Bitfield Endianess not defined."
+#endif
+	__be16	proto;
+	__u8	svc_idx;
+	__be32	svc_path;
+}__attribute__((packed));
+
+/**
+ * struct nsh_ctx - Keeps track of NSH context data
+ * @npc: NSH network platform context
+ * @nsc: NSH network shared context
+ * @spc: NSH service platform context
+ * @ssc: NSH service shared context
+ */
+struct nsh_ctx {
+	__be32 npc;
+	__be32 nsc;
+	__be32 spc;
+	__be32 ssc;
+};
+
+/**
+ * struct nshdr - Network Service header
+ * @nsh_base: Network Service Base Header.
+ * @nsh_ctx: Network Service Context Header.
+ */
+struct nshhdr {
+	struct nsh_base b;
+	struct nsh_ctx c;
+};
+
+
+/* NSH Header Length */
+#define NSH_HLEN (sizeof(struct udphdr) + \
+		  sizeof(struct vxlanhdr) + \
+		  sizeof(struct nshhdr))
+#define NSH_DST_PORT	9030   /* UDP Port for NSH on VXLAN */
+#define NSH_P_TEB	0x6558 /* Transparent Ethernet Bridging */
+#define NSH_M_NSP	0xFFFFFF00
+#define NSH_M_NSI	0x000000FF
+
+
+#endif /* nsh.h */
diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
index 3ac816b..1c15dfb 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -4,9 +4,11 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/nsh.h>
 
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+			   __be32 key, __be32 nsp);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni);
+		   __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp);
 
 __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
 
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 4f7671b..8a6d864 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -50,6 +50,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
+#include <net/nsh.h>
 
 #include "compat.h"
 #include "gso.h"
@@ -89,6 +90,16 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
 	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
 }
 
+static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb)
+{
+	return (struct nshhdr *)(vxlan_hdr(skb) + 1);
+}
+
 /* Find VXLAN socket based on network namespace and UDP port */
 
 static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
@@ -107,13 +118,20 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
+	struct udphdr *udp;
+	bool isnsh = false;
+	__be32 nsp = 0;
+
+	udp = (struct udphdr *)udp_hdr(skb);
+	if (udp->dest == htons(NSH_DST_PORT))
+		isnsh = true;
 
 	/* Need Vxlan and inner Ethernet header to be present */
-	if (!pskb_may_pull(skb, VXLAN_HLEN))
+	if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN))
 		goto error;
 
 	/* Return packets with reserved bits set */
-	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+	vxh = vxlan_hdr(skb);
 	if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
 	    (vxh->vx_vni & htonl(0xff))) {
 		pr_warn("invalid vxlan flags=%#x vni=%#x\n",
@@ -121,14 +139,32 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto error;
 	}
 
-	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+	if (isnsh) {
+		struct nshhdr *nsh = nsh_hdr(skb);
+
+		if (unlikely(nsh->b.svc_idx == 0)) {
+			pr_warn("NSH service index reached zero\n");
+			goto drop;
+		}
+
+		if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) {
+			pr_warn("invalid NSH service path=%#x\n",
+					ntohl(nsh->b.svc_path));
+			goto drop;
+		}
+
+		nsp = nsh->b.svc_path | htonl(nsh->b.svc_idx);
+	}
+
+	if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN,
+				 htons(ETH_P_TEB)))
 		goto drop;
 
 	vs = vxlan_find_sock(sock_net(sk), inet_sport(sk));
 	if (!vs)
 		goto drop;
 
-	vs->rcv(vs, skb, vxh->vx_vni);
+	vs->rcv(vs, skb, vxh->vx_vni, nsp);
 	return 0;
 
 drop:
@@ -212,8 +248,9 @@ static int handle_offloads(struct sk_buff *skb)
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni)
+		   __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp)
 {
+	bool isnsh = (dst_port == htons(NSH_DST_PORT));
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
 	int min_headroom;
@@ -222,7 +259,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	skb_reset_inner_headers(skb);
 
 	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-			+ VXLAN_HLEN + sizeof(struct iphdr)
+			+ (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr)
 			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
 	/* Need space for new headers (invalidates iph ptr) */
@@ -239,6 +276,20 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		vlan_set_tci(skb, 0);
 	}
 
+	if (isnsh) {
+		struct nshhdr *nsh;
+		uint8_t nsi = ntohl(nsp) & NSH_M_NSI;
+
+		nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh));
+		nsh->b.o = 0;
+		nsh->b.res = 0;
+		nsh->b.svc_idx = nsi ? nsi : 0x01;
+		nsh->b.proto = htons(NSH_P_TEB);
+		nsh->b.svc_path = nsp & htonl(NSH_M_NSP);
+		nsh->b.c = 0;
+		memset(&nsh->c, 0x00, sizeof nsh->c);
+	}
+
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
 	vxh->vx_vni = vni;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index b6c1d6f..139fe17 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -112,7 +112,8 @@ static int gre_rcv(struct sk_buff *skb,
 		return PACKET_REJECT;
 
 	key = key_to_tunnel_id(tpi->key, tpi->seq);
-	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
+	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 0,
+			      filter_tnl_flags(tpi->flags));
 
 	ovs_vport_receive(vport, skb, &tun_key);
 	return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index e4e603f..77df084 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -232,7 +232,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+	ovs_flow_tun_key_init(&tun_key, iph, key, 0, TUNNEL_KEY);
 
 	/* Drop non-IP inner packets */
 	inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 3401dfd..c5d1b5a 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      __be32 vx_vni, __be32 nsp)
 {
 	struct ovs_key_ipv4_tunnel tun_key;
 	struct vport *vport = vs->data;
@@ -69,7 +70,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(vx_vni) >> 8);
-	ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+	ovs_flow_tun_key_init(&tun_key, iph, key, nsp,
+			      TUNNEL_KEY | TUNNEL_NSP);
 
 	ovs_vport_receive(vport, skb, &tun_key);
 }
@@ -181,7 +183,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     OVS_CB(skb)->tun_key->ipv4_tos,
 			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+			     OVS_CB(skb)->tun_key->nsp);
 	if (err < 0)
 		ip_rt_put(rt);
 error:
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 09c26b5..6239400 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -304,6 +304,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_TTL,		/* u8 Tunnel IP TTL. */
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,	/* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,		/* No argument. CSUM packet. */
+	OVS_TUNNEL_KEY_ATTR_NSP,		/* be32 NSH service path */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
-- 
1.7.9.5




More information about the dev mailing list