[ovs-dev] [PATCH 2/2] datapath: Add support for kernel 4.19.x and 4.20.x

Yifeng Sun pkusunyifeng at gmail.com
Sat Mar 16 02:20:08 UTC 2019


This patch introduces changes needed by OVS to support latest
Linux kernels (4.19.x and 4.20.x). Recent kernels changed many
APIs used by OVS. One major change is that struct nf_conntrack_l3proto
became unvisible outside of kernel, so the needed get_l4proto
function is added in file compact/nf_conntrack_core.c to
accommodate this issue.

This patch doesn't introduce new failed tests when running
'make check-kmod' for kernels listed below:
    3.10.0-957.5.1.el7.x86_64
    4.4.0-142-generic
    4.17.14
    4.18.0-16-generic
    4.19.26
    4.20.13

Travis passed at
https://travis-ci.org/yifsun/ovs-travis/builds/507034016

Signed-off-by: Yifeng Sun <pkusunyifeng at gmail.com>
---
 .travis.yml                                   |  2 +
 NEWS                                          |  2 +
 acinclude.m4                                  | 20 ++++-
 datapath/conntrack.c                          | 72 ++++++++++++++++-
 .../include/net/netfilter/nf_conntrack_core.h |  6 ++
 .../net/netfilter/nf_conntrack_count.h        |  2 +
 datapath/linux/compat/nf_conncount.c          |  6 +-
 datapath/linux/compat/nf_conntrack_core.c     | 80 +++++++++++++++++++
 datapath/linux/compat/nf_conntrack_proto.c    |  3 +
 9 files changed, 186 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 32d5f1918..dc7a20b6e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,8 @@ env:
   - KERNEL=3.16.54 TESTSUITE=1 DPDK=1
   - KERNEL=3.16.54 DPDK_SHARED=1
   - KERNEL=3.16.54 DPDK_SHARED=1 OPTS="--enable-shared"
+  - KERNEL=4.20.16
+  - KERNEL=4.19.29
   - KERNEL=4.18.20
   - KERNEL=4.17.19
   - KERNEL=4.16.18
diff --git a/NEWS b/NEWS
index 1e4744dbd..af5b5222f 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,8 @@ Post-v2.11.0
    - OVN:
      * Select IPAM mac_prefix in a random manner if not provided by the user
    - New QoS type "linux-netem" on Linux.
+   - Linux datapath:
+     * Support for the kernel versions 4.19.x, 4.20.x.
 
 v2.11.0 - 19 Feb 2019
 ---------------------
diff --git a/acinclude.m4 b/acinclude.m4
index 3cd6ea730..7b11b7740 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -151,10 +151,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [
     AC_MSG_RESULT([$kversion])
 
     if test "$version" -ge 4; then
-       if test "$version" = 4 && test "$patchlevel" -le 18; then
+       if test "$version" = 4 && test "$patchlevel" -le 20; then
           : # Linux 4.x
        else
-          AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 4.18.x is not supported (please refer to the FAQ for advice)])
+          AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 4.20.x is not supported (please refer to the FAQ for advice)])
        fi
     elif test "$version" = 3 && test "$patchlevel" -ge 10; then
        : # Linux 3.x
@@ -583,6 +583,22 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                   [OVS_DEFINE([HAVE_VOID_INET_FRAGS_INIT])])
   OVS_GREP_IFELSE([$KSRC/include/net/inetpeer.h], [vif],
                   [OVS_DEFINE([HAVE_INETPEER_VIF_SUPPORT])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_helper.h],
+                        [nf_ct_helper_ext_add], [nf_conntrack_helper],
+                        [OVS_DEFINE([HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+                        [nf_ct_invert_tuple], [l3proto],
+                        [OVS_DEFINE([HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO])])
+  OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], [nf_ct_get_tuple],
+                  [OVS_DEFINE([HAVE_NF_CT_GET_TUPLE])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+                        [nf_conntrack_in], [net],
+                        [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_TAKES_NET])])
+  OVS_GREP_IFELSE([$KSRC/include/net/ipv6_frag.h], [IP6_DEFRAG_CONNTRACK_IN],
+                  [OVS_DEFINE([HAVE_IPV6_FRAG_H])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l4proto.h],
+                        [__nf_ct_l4proto_find], [l3proto],
+                        [OVS_DEFINE([HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO])])
 
   dnl Check for dst_cache and ipv6 lable to use backported tunnel infrastructure.
   dnl OVS does not really need ipv6 label field, but its presence signifies that
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index a7dc9e0c3..b9b79e2cc 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -38,6 +38,10 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #endif
 
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
+#include <net/ipv6_frag.h>
+#endif
+
 #include "datapath.h"
 #include "conntrack.h"
 #include "flow.h"
@@ -645,32 +649,62 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
-	unsigned int dataoff;
 	u8 protonum;
 
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+	const struct nf_conntrack_l3proto *l3proto;
+	unsigned int dataoff;
+
 	l3proto = __nf_ct_l3proto_find(l3num);
 	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
 				 &protonum) <= 0) {
 		pr_debug("ovs_ct_find_existing: Can't get protonum\n");
 		return NULL;
 	}
+#else
+	int protooff;
+
+	protooff = get_l4proto(skb, skb_network_offset(skb),
+			       l3num, &protonum);
+	if (protooff <= 0) {
+		pr_warn("ovs_ct_find_existing: Can't get protonum\n");
+		return NULL;
+	}
+#endif
+
+#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO
 	l4proto = __nf_ct_l4proto_find(l3num, protonum);
-	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     protonum, net, &tuple, l3proto, l4proto)) {
+#else
+	l4proto = __nf_ct_l4proto_find(protonum);
+#endif
+
+#ifdef HAVE_NF_CT_GET_TUPLE
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       l3num, net, &tuple)) {
+		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
+		return NULL;
+	}
+#else
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       l3num, net, &tuple)) {
 		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
 		return NULL;
 	}
+#endif
 
 	/* Must invert the tuple if skb has been transformed by NAT. */
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
 		if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+#else
+		if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) {
+#endif
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
@@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 	if (!cached) {
 		struct nf_conn *tmpl = info->ct;
 		int err;
+#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET
+		struct nf_hook_state state = {};
+#endif
 
 		/* Associate skb with specified zone. */
 		if (tmpl) {
@@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 			nf_ct_set(skb, tmpl, IP_CT_NEW);
 		}
 
+#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET
 		err = nf_conntrack_in(net, info->family,
 				      NF_INET_PRE_ROUTING, skb);
+#else
+		state.hook = NF_INET_PRE_ROUTING,
+		state.pf = info->family,
+		state.net = net,
+		err = nf_conntrack_in(skb, &state);
+#endif
 		if (err != NF_ACCEPT)
 			return -ENOENT;
 
@@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 {
 	int nh_ofs;
 	int err;
+	/* From kernel 4.19.0+, Function handle_fragments may shrink skb's
+	 * headroom, which will result in loss of ethernet header data.
+	 * This buf is used to backup the header data before calling
+	 * handle_fragments. */
+	char buf[32];
 
 	/* The conntrack module expects to be working at L3. */
 	nh_ofs = skb_network_offset(skb);
+	if (nh_ofs > sizeof(buf))
+		return -EINVAL;
+	memcpy(buf, skb->data, nh_ofs);
 	skb_pull_rcsum(skb, nh_ofs);
 
 	err = ovs_skb_network_trim(skb);
@@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		err = ovs_ct_commit(net, key, info, skb);
 	else
 		err = ovs_ct_lookup(net, key, info, skb);
+	if (err)
+		return err;
 
+	if (skb_headroom(skb) < nh_ofs) {
+		err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC);
+		if (err)
+			return err;
+	}
 	skb_push(skb, nh_ofs);
+	memcpy(skb->data, buf, nh_ofs);
 	skb_postpush_rcsum(skb, skb->data, nh_ofs);
 	if (err)
 		kfree_skb(skb);
@@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 		return -EINVAL;
 	}
 
+#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+#else
+	help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
+#endif
 	if (!help) {
 		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 7834c8c25..7fca7dc55 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -104,4 +104,10 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 #define nf_ct_delete rpl_nf_ct_delete
 #endif /* HAVE_NF_CONN_TIMER */
 
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+int rpl_get_l4proto(const struct sk_buff *skb,
+		    unsigned int nhoff, u8 pf, u8 *l4num);
+#define get_l4proto rpl_get_l4proto
+#endif
+
 #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
index fd536f3e1..a26eb9f87 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
@@ -4,6 +4,8 @@
 #include <linux/list.h>
 
 #ifdef HAVE_UPSTREAM_NF_CONNCOUNT
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include_next <net/netfilter/nf_conntrack_count.h>
 
 static inline int rpl_nf_conncount_modinit(void)
diff --git a/datapath/linux/compat/nf_conncount.c b/datapath/linux/compat/nf_conncount.c
index 0bee96274..eeae440f8 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -13,6 +13,8 @@
  *		only ignore TIME_WAIT or gone connections
  *   (C) CC Computer Consultants GmbH, 2007
  */
+#ifndef HAVE_UPSTREAM_NF_CONNCOUNT
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/in6.h>
@@ -138,7 +140,7 @@ static bool conn_free(struct nf_conncount_list *list,
 
 	if (list->count == 0) {
 		spin_unlock(&list->list_lock);
-                return free_entry;
+		return free_entry;
 	}
 
 	list->count--;
@@ -635,3 +637,5 @@ void rpl_nf_conncount_modexit(void)
 	kmem_cache_destroy(conncount_conn_cachep);
 	kmem_cache_destroy(conncount_rb_cachep);
 }
+
+#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */
diff --git a/datapath/linux/compat/nf_conntrack_core.c b/datapath/linux/compat/nf_conntrack_core.c
index a7d3d4331..ecfeae1a0 100644
--- a/datapath/linux/compat/nf_conntrack_core.c
+++ b/datapath/linux/compat/nf_conntrack_core.c
@@ -1,4 +1,7 @@
+#include <linux/types.h>
 #include <linux/version.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 
 #ifndef HAVE_NF_CT_ZONE_INIT
 
@@ -11,3 +14,80 @@ const struct nf_conntrack_zone nf_ct_zone_dflt = {
 };
 
 #endif /* HAVE_NF_CT_ZONE_INIT */
+
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u_int8_t *protonum)
+{
+	int dataoff = -1;
+	const struct iphdr *iph;
+	struct iphdr _iph;
+
+	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+	if (!iph)
+		return -1;
+
+	/* Conntrack defragments packets, we might still see fragments
+	 * inside ICMP packets though.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		return -1;
+
+	dataoff = nhoff + (iph->ihl << 2);
+	*protonum = iph->protocol;
+
+	/* Check bogus IP headers */
+	if (dataoff > skb->len) {
+		pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+			 nhoff, iph->ihl << 2, skb->len);
+		return -1;
+	}
+	return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u8 *protonum)
+{
+	int protoff = -1;
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("can't get nexthdr\n");
+		return -1;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("can't find proto in pkt\n");
+		return -1;
+	}
+
+	*protonum = nexthdr;
+	return protoff;
+}
+#endif
+
+int rpl_get_l4proto(const struct sk_buff *skb,
+		    unsigned int nhoff, u8 pf, u8 *l4num)
+{
+	switch (pf) {
+	case NFPROTO_IPV4:
+		return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+	default:
+		*l4num = 0;
+		break;
+	}
+	return -1;
+}
+#endif /* HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO */
diff --git a/datapath/linux/compat/nf_conntrack_proto.c b/datapath/linux/compat/nf_conntrack_proto.c
index 4ac66f61c..89c2f5422 100644
--- a/datapath/linux/compat/nf_conntrack_proto.c
+++ b/datapath/linux/compat/nf_conntrack_proto.c
@@ -1,7 +1,10 @@
 #include <linux/types.h>
 
 #include <net/netfilter/nf_conntrack.h>
+
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
 #include <net/netfilter/nf_conntrack_l3proto.h>
+#endif
 
 /*
  * Upstream net-next commmit 7e35ec0e8044
-- 
2.19.1



More information about the dev mailing list