[ovs-dev] [PATCH v2] datapath: compat: Backports bugfixes for nf_conncount

Yifeng Sun pkusunyifeng at gmail.com
Wed Aug 7 00:05:48 UTC 2019


This patch backports several critical bug fixes related to
locking and data consistency in nf_conncount code.

This backport is based on the following upstream net-next upstream commits.
4cd273b ("netfilter: nf_conncount: don't skip eviction when age is negative")
d4e7df1 ("netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node()")
53ca0f2 ("netfilter: nf_conncount: remove wrong condition check routine")
3c5cdb1 ("netfilter: nf_conncount: fix unexpected permanent node of list.")
31568ec ("netfilter: nf_conncount: fix list_del corruption in conn_free")
fd3e71a ("netfilter: nf_conncount: use spin_lock_bh instead of spin_lock")

This patch also added additional compat code so that it can build on
all supported kernel versions.

Travis tests are at
https://travis-ci.org/yifsun/ovs-travis/builds/568603796

VMware-BZ: #2396471

CC: Taehee Yoo <ap420073 at gmail.com>
Signed-off-by: Yifeng Sun <pkusunyifeng at gmail.com>
---
v1->v2: Add fixes to support old kernel versions. Thanks YiHung for reviewing.

 acinclude.m4                                 |  2 ++
 datapath/linux/Modules.mk                    |  3 +-
 datapath/linux/compat/include/linux/rbtree.h | 19 ++++++++++++
 datapath/linux/compat/nf_conncount.c         | 46 ++++++++++++++++++----------
 4 files changed, 52 insertions(+), 18 deletions(-)
 create mode 100644 datapath/linux/compat/include/linux/rbtree.h

diff --git a/acinclude.m4 b/acinclude.m4
index 116ffcf9096d..f8e856d3303f 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1012,6 +1012,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                   [OVS_DEFINE([HAVE_GRE_CALC_HLEN])])
   OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [ip_gre_calc_hlen],
                   [OVS_DEFINE([HAVE_IP_GRE_CALC_HLEN])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/rbtree.h], [rb_link_node_rcu],
+                  [OVS_DEFINE([HAVE_RBTREE_RB_LINK_NODE_RCU])])
 
   if cmp -s datapath/linux/kcompat.h.new \
             datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index cbb29f1c69d0..69d7faeac414 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -116,5 +116,6 @@ openvswitch_headers += \
 	linux/compat/include/uapi/linux/netfilter.h \
 	linux/compat/include/linux/mm.h \
 	linux/compat/include/linux/netfilter.h \
-	linux/compat/include/linux/overflow.h
+	linux/compat/include/linux/overflow.h \
+	linux/compat/include/linux/rbtree.h
 EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
diff --git a/datapath/linux/compat/include/linux/rbtree.h b/datapath/linux/compat/include/linux/rbtree.h
new file mode 100644
index 000000000000..dbf20ff0e0b8
--- /dev/null
+++ b/datapath/linux/compat/include/linux/rbtree.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_RBTREE_WRAPPER_H
+#define __LINUX_RBTREE_WRAPPER_H 1
+
+#include_next <linux/rbtree.h>
+
+#ifndef HAVE_RBTREE_RB_LINK_NODE_RCU
+#include <linux/rcupdate.h>
+
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
+				    struct rb_node **rb_link)
+{
+	node->__rb_parent_color = (unsigned long)parent;
+	node->rb_left = node->rb_right = NULL;
+
+	rcu_assign_pointer(*rb_link, node);
+}
+#endif
+
+#endif /* __LINUX_RBTREE_WRAPPER_H */
diff --git a/datapath/linux/compat/nf_conncount.c b/datapath/linux/compat/nf_conncount.c
index eeae440f872d..6a4d058e7fac 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -54,6 +54,7 @@ struct nf_conncount_tuple {
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
+	bool				dead;
 	struct rcu_head			rcu_head;
 };
 
@@ -111,15 +112,16 @@ nf_conncount_add(struct nf_conncount_list *list,
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
-	spin_lock(&list->list_lock);
+	conn->dead = false;
+	spin_lock_bh(&list->list_lock);
 	if (list->dead == true) {
 		kmem_cache_free(conncount_conn_cachep, conn);
-		spin_unlock(&list->list_lock);
+		spin_unlock_bh(&list->list_lock);
 		return NF_CONNCOUNT_SKIP;
 	}
 	list_add_tail(&conn->node, &list->head);
 	list->count++;
-	spin_unlock(&list->list_lock);
+	spin_unlock_bh(&list->list_lock);
 	return NF_CONNCOUNT_ADDED;
 }
 
@@ -136,19 +138,22 @@ static bool conn_free(struct nf_conncount_list *list,
 {
 	bool free_entry = false;
 
-	spin_lock(&list->list_lock);
+	spin_lock_bh(&list->list_lock);
 
-	if (list->count == 0) {
-		spin_unlock(&list->list_lock);
+	if (conn->dead) {
+		spin_unlock_bh(&list->list_lock);
 		return free_entry;
 	}
 
 	list->count--;
+	conn->dead = true;
 	list_del_rcu(&conn->node);
-	if (list->count == 0)
+	if (list->count == 0) {
+		list->dead = true;
 		free_entry = true;
+	}
 
-	spin_unlock(&list->list_lock);
+	spin_unlock_bh(&list->list_lock);
 	call_rcu(&conn->rcu_head, __conn_free);
 	return free_entry;
 }
@@ -160,7 +165,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
 	const struct nf_conntrack_tuple_hash *found;
 	unsigned long a, b;
 	int cpu = raw_smp_processor_id();
-	__s32 age;
+	u32 age;
 
 	found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
 	if (found)
@@ -248,7 +253,7 @@ static void nf_conncount_list_init(struct nf_conncount_list *list)
 {
 	spin_lock_init(&list->list_lock);
 	INIT_LIST_HEAD(&list->head);
-	list->count = 1;
+	list->count = 0;
 	list->dead = false;
 }
 
@@ -261,6 +266,7 @@ static bool nf_conncount_gc_list(struct net *net,
 	struct nf_conn *found_ct;
 	unsigned int collected = 0;
 	bool free_entry = false;
+	bool ret = false;
 
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
 		found = find_or_evict(net, list, conn, &free_entry);
@@ -290,7 +296,15 @@ static bool nf_conncount_gc_list(struct net *net,
 		if (collected > CONNCOUNT_GC_MAX_NODES)
 			return false;
 	}
-	return false;
+
+	spin_lock_bh(&list->list_lock);
+	if (!list->count) {
+		list->dead = true;
+		ret = true;
+	}
+	spin_unlock_bh(&list->list_lock);
+
+	return ret;
 }
 
 static void __tree_nodes_free(struct rcu_head *h)
@@ -310,11 +324,8 @@ static void tree_nodes_free(struct rb_root *root,
 	while (gc_count) {
 		rbconn = gc_nodes[--gc_count];
 		spin_lock(&rbconn->list.list_lock);
-		if (rbconn->list.count == 0 && rbconn->list.dead == false) {
-			rbconn->list.dead = true;
-			rb_erase(&rbconn->node, root);
-			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
-		}
+		rb_erase(&rbconn->node, root);
+		call_rcu(&rbconn->rcu_head, __tree_nodes_free);
 		spin_unlock(&rbconn->list.list_lock);
 	}
 }
@@ -415,8 +426,9 @@ insert_tree(struct net *net,
 	nf_conncount_list_init(&rbconn->list);
 	list_add(&conn->node, &rbconn->list.head);
 	count = 1;
+	rbconn->list.count = count;
 
-	rb_link_node(&rbconn->node, parent, rbnode);
+	rb_link_node_rcu(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
 out_unlock:
 	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-- 
2.7.4



More information about the dev mailing list