[ovs-dev] [PATCH 1/8] nsh: datapath support for network service headers

Jarno Rajahalme jrajahalme at nicira.com
Tue Oct 1 21:44:52 UTC 2013


On Sep 20, 2013, at 1:04 AM, pritesh <pritesh.kothari at cisco.com> wrote:

> This patch adds support for Network Service Headers (nsh) over VXLAN
> as mentioned in [1]. Here changes are made to datapath to add nsh
> headers whenever a vxlan port with destination port as 9030 is created.
> IANA port allocation for nsh over vxlan is yet to be done.
> 
> [1] http://tools.ietf.org/html/draft-quinn-nsh-01
> 
> Signed-off-by: pritesh <pritesh.kothari at cisco.com>
> 
> 
...
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 29122af..4f47a48 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -1235,6 +1235,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
> 	int rem;
> 	bool ttl = false;
> 	__be16 tun_flags = 0;
> +	__be32 nsp = 0;
> 
> 	nla_for_each_nested(a, attr, rem) {
> 		int type = nla_type(a);
> @@ -1246,6 +1247,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
> 			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
> 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
> 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
> +			[OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32),
> 		};
> 
> 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
> @@ -1290,11 +1292,16 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
> 		case OVS_TUNNEL_KEY_ATTR_CSUM:
> 			tun_flags |= TUNNEL_CSUM;
> 			break;
> +		case OVS_TUNNEL_KEY_ATTR_NSP:
> +			nsp |= htonl(be32_to_cpu(nla_get_be32(a)) << 8);

Is the "|=" operator intentional here? If there were two of these attributes present,
is the intention to OR them together?

> +			tun_flags |= TUNNEL_NSP;
> +			break;
> 		default:
> 			return -EINVAL;
> 		}
> 	}
> 
...
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 03eae03..b316e0a 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -51,6 +51,7 @@ struct sw_flow_actions {
> 
> struct ovs_key_ipv4_tunnel {
> 	__be64 tun_id;
> +	__be32 nsp;

I'd like a comment here mentioning the storage if nsi in place of the reserved
bits.

> 	__be32 ipv4_src;
> 	__be32 ipv4_dst;
> 	__be16 tun_flags;
> @@ -60,9 +61,10 @@ struct ovs_key_ipv4_tunnel {
> 
> 
...
> --- /dev/null
> +++ b/datapath/linux/compat/include/net/nsh.h
> @@ -0,0 +1,86 @@
> +/*
> + * Copyright (c) 2013 Cisco Systems, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#ifndef NSH_H
> +#define NSH_H 1
> +
> +#include <linux/types.h>
> +#include <asm/byteorder.h>
> +
> +
> +/**
> + * struct nsh_bhdr - Network Service Base Header.
> + * @o: Operations and Management Packet indicator bit
> + * @c: If this bit is set then one or more contexts are in use.
> + * @proto: IEEE Ethertypes to indicate the frame within.
> + * @svc_idx: TTL functionality and location within service path.
> + * @svc_path: To uniquely identify service path.
> + */
> +struct nsh_base {
> +#if defined(__LITTLE_ENDIAN_BITFIELD)
> +	__u8	res:6,
> +		c:1,
> +		o:1;
> +#elif defined(__BIG_ENDIAN_BITFIELD)
> +	__u8	o:1,
> +		c:1,
> +		res:6;
> +#else
> +#error "Bitfield Endianess not defined."
> +#endif
> +	__be16	proto;

I know this is not your call, but I find it weird that they chose to place 
the proto field literally in the middle of a 32-bit field. Taking this set
of fields it would be trivial to come up with a more logical order of fields.

> +	__u8	svc_idx;
> +	__be32	svc_path;

Since the path is only the highest 24 bits, this would benefit from a comment.

> +}__attribute__((packed));
> +
> +/**
> + * struct nsh_ctx - Keeps track of NSH context data
> + * @npc: NSH network platform context
> + * @nsc: NSH network shared context
> + * @spc: NSH service platform context
> + * @ssc: NSH service shared context
> + */
> +struct nsh_ctx {
> +	__be32 npc;
> +	__be32 nsc;
> +	__be32 spc;
> +	__be32 ssc;
> +};
> +
> +/**
> + * struct nshdr - Network Service header
> + * @nsh_base: Network Service Base Header.
> + * @nsh_ctx: Network Service Context Header.
> + */
> +struct nshhdr {
> +	struct nsh_base b;
> +	struct nsh_ctx c;
> +};
> +
> +
> +/* NSH Header Length */
> +#define NSH_HLEN (sizeof(struct udphdr) + \
> +		  sizeof(struct vxlanhdr) + \
> +		  sizeof(struct nshhdr))

Is this named correctly and in the right place? I guess this header should
not be vxlan dependent.

> +#define NSH_DST_PORT	9030   /* UDP Port for NSH on VXLAN */
> +#define NSH_P_TEB	0x6558 /* Transparent Ethernet Bridging */
> +#define NSH_M_NSP	0xFFFFFF00
> +#define NSH_M_NSI	0x000000FF

The masks are a bit confusing, as reading this header alone does not tell
that they are actually about the way nsp and nsi are stored within the tunnel
key.
I think these should be in flow.h right after the tunnel key struct rather than
here.

> +
> +
> +#endif /* nsh.h */
> diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
> index 3ac816b..1c15dfb 100644
> --- a/datapath/linux/compat/include/net/vxlan.h
> +++ b/datapath/linux/compat/include/net/vxlan.h
> @@ -4,9 +4,11 @@
> #include <linux/skbuff.h>
> #include <linux/netdevice.h>
> #include <linux/udp.h>
> +#include <net/nsh.h>
> 
> struct vxlan_sock;
> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key);
> +typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
> +			   __be32 key, __be32 nsp);
> 
> /* per UDP socket information */
> struct vxlan_sock {
> @@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
> int vxlan_xmit_skb(struct vxlan_sock *vs,
> 		   struct rtable *rt, struct sk_buff *skb,
> 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
> -		   __be16 src_port, __be16 dst_port, __be32 vni);
> +		   __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp);

I'm not exactly sure on the policy about the compat files, but I think they
should be about compatibility with older kernels. Hence I'm not sure if
this is the right place to introduce new functionality. Could someone
comment on this?

Also, I'd like to see NSH implemented in a bit more general fashion, so
that it could be also used on top of GRE, like the examples in the RFC show.

> 
> __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
> 
> diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
> index 4f7671b..8a6d864 100644
> --- a/datapath/linux/compat/vxlan.c
> +++ b/datapath/linux/compat/vxlan.c
> @@ -50,6 +50,7 @@
> #include <net/net_namespace.h>
> #include <net/netns/generic.h>
> #include <net/vxlan.h>
> +#include <net/nsh.h>
> 
> #include "compat.h"
> #include "gso.h"
> @@ -89,6 +90,16 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
> 	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
> }
> 
> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
> +{
> +	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
> +}
> +
> +static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb)
> +{
> +	return (struct nshhdr *)(vxlan_hdr(skb) + 1);
> +}
> +
> /* Find VXLAN socket based on network namespace and UDP port */
> 
> static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
> @@ -107,13 +118,20 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
> {
> 	struct vxlan_sock *vs;
> 	struct vxlanhdr *vxh;
> +	struct udphdr *udp;
> +	bool isnsh = false;
> +	__be32 nsp = 0;
> +
> +	udp = (struct udphdr *)udp_hdr(skb);
> +	if (udp->dest == htons(NSH_DST_PORT))
> +		isnsh = true;
> 
> 	/* Need Vxlan and inner Ethernet header to be present */
> -	if (!pskb_may_pull(skb, VXLAN_HLEN))
> +	if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN))
> 		goto error;
> 
> 	/* Return packets with reserved bits set */
> -	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
> +	vxh = vxlan_hdr(skb);
> 	if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
> 	    (vxh->vx_vni & htonl(0xff))) {
> 		pr_warn("invalid vxlan flags=%#x vni=%#x\n",
> @@ -121,14 +139,32 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
> 		goto error;
> 	}
> 
> -	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
> +	if (isnsh) {
> +		struct nshhdr *nsh = nsh_hdr(skb);
> +
> +		if (unlikely(nsh->b.svc_idx == 0)) {
> +			pr_warn("NSH service index reached zero\n");
> +			goto drop;
> +		}
> +
> +		if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) {

I guess you should use ~NSH_M_NSP here instead? In the end it is the same,
but it would be less confusing, as the b.svc_path field does not contain an NSI.
Also, the usual treatment of reserved bits is ignore on reception, rather than
dropping.

> +			pr_warn("invalid NSH service path=%#x\n",
> +					ntohl(nsh->b.svc_path));
> +			goto drop;
> +		}
> +
> +		nsp = nsh->b.svc_path | htonl(nsh->b.svc_idx);
> +	}
> +
...
> 
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -304,6 +304,7 @@ enum ovs_tunnel_key_attr {
> 	OVS_TUNNEL_KEY_ATTR_TTL,		/* u8 Tunnel IP TTL. */
> 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,	/* No argument, set DF. */
> 	OVS_TUNNEL_KEY_ATTR_CSUM,		/* No argument. CSUM packet. */
> +	OVS_TUNNEL_KEY_ATTR_NSP,		/* be32 NSH service path */

Should state here that only lowest 24 bits are used.

> 	__OVS_TUNNEL_KEY_ATTR_MAX
> };
> 
> -- 
> 1.7.9.5
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev




More information about the dev mailing list