[ovs-dev] [PATCH v2 1/7] nsh: datapath support for network service headers
Pritesh Kothari
pritesh.kothari at cisco.com
Tue Feb 25 23:44:14 UTC 2014
This patch adds support for Network Service Headers (nsh) over VXLAN
as mentioned in [1]. Here changes are made to datapath to add nsh
headers whenever a vxlan port with destination port as 6633 (which is
IANA allocated port for nsh over vxlan) is created.
[1] http://tools.ietf.org/html/draft-quinn-sfc-nsh-02
Signed-off-by: Pritesh Kothari <pritesh.kothari at cisco.com>
create mode 100644 datapath/linux/compat/include/net/nsh.h
diff --git a/datapath/datapath.c b/datapath/datapath.c
index f7c3391..b96ad1e 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -361,6 +361,7 @@ static size_t key_attr_size(void)
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_NSP */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
diff --git a/datapath/flow.h b/datapath/flow.h
index 270a324..6a342be 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -41,9 +41,13 @@ struct sk_buff;
#define OVS_TUNNEL_KEY_SIZE \
(offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+/* Used for masking nsp and nsi values in field nsp below */
+#define NSH_M_NSP 0xFFFFFF00
+#define NSH_M_NSI 0x000000FF
struct ovs_key_ipv4_tunnel {
__be64 tun_id;
+ __be32 nsp; /* it contains (nsp - 24 bits | nsi - 8 bits) here */
__be32 ipv4_src;
__be32 ipv4_dst;
__be16 tun_flags;
@@ -53,9 +57,10 @@ struct ovs_key_ipv4_tunnel {
static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
const struct iphdr *iph, __be64 tun_id,
- __be16 tun_flags)
+ __be32 nsp, __be16 tun_flags)
{
tun_key->tun_id = tun_id;
+ tun_key->nsp = nsp;
tun_key->ipv4_src = iph->saddr;
tun_key->ipv4_dst = iph->daddr;
tun_key->ipv4_tos = iph->tos;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 40751cb..a2bc1e9 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -333,6 +333,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int rem;
bool ttl = false;
__be16 tun_flags = 0;
+ __be32 nsp = 0;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
@@ -344,6 +345,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32),
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -388,11 +390,16 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_flags |= TUNNEL_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_NSP:
+ nsp = htonl(be32_to_cpu(nla_get_be32(a)) << 8);
+ tun_flags |= TUNNEL_NSP;
+ break;
default:
return -EINVAL;
}
}
+ SW_FLOW_KEY_PUT(match, tun_key.nsp, nsp, is_mask);
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
if (rem > 0) {
@@ -420,6 +427,7 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output)
{
struct nlattr *nla;
+ __be32 nsp = cpu_to_be32(ntohl(output->nsp) >> 8);
nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
if (!nla)
@@ -445,6 +453,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
+ if (output->tun_flags & TUNNEL_NSP &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_NSP, nsp))
+ return -EMSGSIZE;
nla_nest_end(skb, nla);
return 0;
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index cedb8c9..94910b9 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -71,4 +71,5 @@ openvswitch_headers += \
linux/compat/include/net/netlink.h \
linux/compat/include/net/sock.h \
linux/compat/include/net/vxlan.h \
+ linux/compat/include/net/nsh.h \
linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index a786aa9..a4aec4d 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -20,6 +20,7 @@
#define TUNNEL_VERSION __cpu_to_be16(0x40)
#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
+#define TUNNEL_NSP __cpu_to_be16(0x0200)
struct tnl_ptk_info {
__be16 flags;
diff --git a/datapath/linux/compat/include/net/nsh.h b/datapath/linux/compat/include/net/nsh.h
new file mode 100644
index 0000000..bdc81b4
--- /dev/null
+++ b/datapath/linux/compat/include/net/nsh.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2013, 2014 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef NSH_H
+#define NSH_H 1
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+
+/**
+ * struct nsh_bhdr - Network Service Base Header.
+ * @o: Operations and Management Packet indicator bit
+ * @c: If this bit is set then one or more contexts are in use.
+ * @proto: IEEE Ethertypes to indicate the frame within.
+ * @svc_idx: TTL functionality and location within service path.
+ * @svc_path: To uniquely identify service path.
+ */
+struct nsh_base {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 res:6,
+ c:1,
+ o:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 o:1,
+ c:1,
+ res:6;
+#else
+#error "Bitfield Endianess not defined."
+#endif
+ __u8 res1;
+ __be16 proto;
+ union {
+ struct {
+ __u8 svc_path[3];
+ __u8 svc_idx;
+ };
+ __be32 b2;
+ };
+};
+
+/**
+ * struct nsh_ctx - Keeps track of NSH context data
+ * @npc: NSH network platform context
+ * @nsc: NSH network shared context
+ * @spc: NSH service platform context
+ * @ssc: NSH service shared context
+ */
+struct nsh_ctx {
+ __be32 npc;
+ __be32 nsc;
+ __be32 spc;
+ __be32 ssc;
+};
+
+/**
+ * struct nshdr - Network Service header
+ * @nsh_base: Network Service Base Header.
+ * @nsh_ctx: Network Service Context Header.
+ */
+struct nshhdr {
+ struct nsh_base b;
+ struct nsh_ctx c;
+};
+
+
+#define ETH_P_NSH 0x894F /* Ethertype for NSH */
+#define NSH_P_TEB 0x6558 /* Transparent Ethernet Bridging */
+#define NSH_DST_PORT 6633 /* UDP Port for NSH on VXLAN */
+
+
+#endif /* nsh.h */
diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
index 3ac816b..1c15dfb 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -4,9 +4,11 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
+#include <net/nsh.h>
struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+ __be32 key, __be32 nsp);
/* per UDP socket information */
struct vxlan_sock {
@@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni);
+ __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp);
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 64877e0..0c3b9f9 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -50,6 +50,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/vxlan.h>
+#include <net/nsh.h>
#include "compat.h"
#include "datapath.h"
@@ -57,6 +58,9 @@
#include "vlan.h"
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+#define NSH_HLEN (sizeof(struct udphdr) + \
+ sizeof(struct vxlanhdr) + \
+ sizeof(struct nshhdr))
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
@@ -66,18 +70,35 @@ struct vxlanhdr {
__be32 vx_vni;
};
+static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+ return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb)
+{
+ return (struct nshhdr *)(vxlan_hdr(skb) + 1);
+}
+
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_sock *vs;
struct vxlanhdr *vxh;
+ struct udphdr *udp;
+ bool isnsh = false;
+ __be32 nsp = 0;
+
+ udp = (struct udphdr *)udp_hdr(skb);
+ if (udp->dest == htons(NSH_DST_PORT))
+ isnsh = true;
/* Need Vxlan and inner Ethernet header to be present */
- if (!pskb_may_pull(skb, VXLAN_HLEN))
+ if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN))
goto error;
/* Return packets with reserved bits set */
- vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ vxh = vxlan_hdr(skb);
if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
(vxh->vx_vni & htonl(0xff))) {
pr_warn("invalid vxlan flags=%#x vni=%#x\n",
@@ -85,14 +106,26 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto error;
}
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+ if (isnsh) {
+ struct nshhdr *nsh = nsh_hdr(skb);
+
+ if (unlikely(nsh->b.svc_idx == 0)) {
+ pr_warn("NSH service index reached zero\n");
+ goto drop;
+ }
+
+ nsp = nsh->b.b2; /* same as svc_path | htonl(svc_idx) */
+ }
+
+ if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN,
+ htons(ETH_P_TEB)))
goto drop;
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
- vs->rcv(vs, skb, vxh->vx_vni);
+ vs->rcv(vs, skb, vxh->vx_vni, nsp);
return 0;
drop:
@@ -176,15 +209,16 @@ static int handle_offloads(struct sk_buff *skb)
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni)
+ __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp)
{
+ bool isnsh = (dst_port == htons(NSH_DST_PORT));
struct vxlanhdr *vxh;
struct udphdr *uh;
int min_headroom;
int err;
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + VXLAN_HLEN + sizeof(struct iphdr)
+ + (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr)
+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
/* Need space for new headers (invalidates iph ptr) */
@@ -203,6 +237,21 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
skb_reset_inner_headers(skb);
+ if (isnsh) {
+ struct nshhdr *nsh;
+ uint8_t nsi = ntohl(nsp) & NSH_M_NSI;
+
+ nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh));
+ nsh->b.o = 0;
+ nsh->b.res = 0;
+ nsh->b.proto = htons(NSH_P_TEB);
+ /* b2 should precede svc_idx, else svc_idx will be zero */
+ nsh->b.b2 = nsp & htonl(NSH_M_NSP);
+ nsh->b.svc_idx = nsi ? nsi : 0x01;
+ nsh->b.c = 0;
+ memset(&nsh->c, 0x00, sizeof nsh->c);
+ }
+
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 8737b63..ca1dc3a 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -110,7 +110,8 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_REJECT;
key = key_to_tunnel_id(tpi->key, tpi->seq);
- ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
+ ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 0,
+ filter_tnl_flags(tpi->flags));
ovs_vport_receive(vport, skb, &tun_key);
return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index c2698ae..6edb920 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -237,7 +237,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
/* Save outer tunnel values */
iph = ip_hdr(skb);
- ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_key_init(&tun_key, iph, key, 0, TUNNEL_KEY);
/* Drop non-IP inner packets */
inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index ab2b6f7..999fdff 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -58,7 +58,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
}
/* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+ __be32 vx_vni, __be32 nsp)
{
struct ovs_key_ipv4_tunnel tun_key;
struct vport *vport = vs->data;
@@ -68,7 +69,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_key_init(&tun_key, iph, key, nsp,
+ TUNNEL_KEY | TUNNEL_NSP);
ovs_vport_receive(vport, skb, &tun_key);
}
@@ -180,7 +182,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df,
src_port, dst_port,
- htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+ htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+ OVS_CB(skb)->tun_key->nsp);
if (err < 0)
ip_rt_put(rt);
error:
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index d1ff5ec..194acfe 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -328,6 +328,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
+ OVS_TUNNEL_KEY_ATTR_NSP, /* be32 NSH svc path (lower 24 bits) */
__OVS_TUNNEL_KEY_ATTR_MAX
};
--
1.7.9.5
More information about the dev
mailing list