[ovs-discuss] [NIC-20 06/11] datapath: Fix OOPS when dp_sysfs_add_if() fails.

Ben Pfaff blp at nicira.com
Wed Aug 5 22:37:09 UTC 2009


Until now, when dp_sysfs_add_if() failed, the caller ignored the failure.
This is a minor problem, because everything else should continue working,
without sysfs entries for the interface, in theory anyhow.  In actual
practice, the error exit path of dp_sysfs_add_if() does a kobject_put(),
and that kobject_put() calls release_nbp(), so that the new port gets
freed.  The next reference to the new port (usually in an ovs-vswitchd call
to the ODP_PORT_LIST ioctl) will then use the freed data and probably OOPS.

The fix is to make the datapath code, as opposed to the sysfs code,
responsible for creating and destroying the net_bridge_port kobject.  The
dp_sysfs_{add,del}_if() functions then just attach and detach the kobject
to sysfs and their cleanup routines no longer need to destroy the kobject
and indeed we don't care whether dp_sysfs_add_if() really succeeds.

This commit also makes the same transformation to the datapath's ifobj,
for consistency.

It is easy to trigger the OOPS fixed by this commit by adding a network
device A to a datapath, then renaming network device A to B, then renaming
network device C to A, then adding A to the datapath.  The last attempt to
add A will fail because a file named /sys/class/net/<datapath>/brif/A
already exists from the time that C was added to the datapath under the
name A.

This commit also adds some compatibility infrastructure, because it moves
code out of #ifdef SUPPORT_SYSFS and it otherwise wouldn't build.
---
 datapath/datapath.c                                |   50 ++++++++++++++++---
 datapath/datapath.h                                |    4 --
 datapath/dp_sysfs.h                                |    4 ++
 datapath/dp_sysfs_dp.c                             |    6 --
 datapath/dp_sysfs_if.c                             |   36 +++-----------
 datapath/linux-2.6/Modules.mk                      |    1 +
 .../linux-2.6/compat-2.6/include/linux/kobject.h   |   16 ++++++
 .../linux-2.6/compat-2.6/include/linux/netdevice.h |   12 ++++-
 8 files changed, 82 insertions(+), 47 deletions(-)
 create mode 100644 datapath/linux-2.6/compat-2.6/include/linux/kobject.h

diff --git a/datapath/datapath.c b/datapath/datapath.c
index d8bd245..f4284ed 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -171,6 +171,16 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
+static void release_dp(struct kobject *kobj)
+{
+	struct datapath *dp = container_of(kobj, struct datapath, ifobj);
+	kfree(dp);
+}
+
+struct kobj_type dp_ktype = {
+	.release = release_dp
+};
+
 static int create_dp(int dp_idx, const char __user *devnamep)
 {
 	struct net_device *dp_dev;
@@ -212,6 +222,13 @@ static int create_dp(int dp_idx, const char __user *devnamep)
 		skb_queue_head_init(&dp->queues[i]);
 	init_waitqueue_head(&dp->waitqueue);
 
+	/* Initialize kobject for bridge.  This will be added as
+	 * /sys/class/net/<devname>/bridge later, if sysfs is enabled. */
+	kobject_set_name(&dp->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); /* "bridge" */
+	dp->ifobj.kset = NULL;
+	dp->ifobj.parent = NULL;
+	kobject_init(&dp->ifobj, &dp_ktype);
+
 	/* Allocate table. */
 	err = -ENOMEM;
 	rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
@@ -284,7 +301,7 @@ static void do_destroy_dp(struct datapath *dp)
 	for (i = 0; i < DP_MAX_GROUPS; i++)
 		kfree(dp->groups[i]);
 	free_percpu(dp->stats_percpu);
-	kfree(dp);
+	kobject_put(&dp->ifobj);
 	module_put(THIS_MODULE);
 }
 
@@ -309,6 +326,19 @@ err_unlock:
 	return err;
 }
 
+static void release_nbp(struct kobject *kobj)
+{
+	struct net_bridge_port *p = container_of(kobj, struct net_bridge_port, kobj);
+	kfree(p);
+}
+
+struct kobj_type brport_ktype = {
+#ifdef SUPPORT_SYSFS
+	.sysfs_ops = &brport_sysfs_ops,
+#endif
+	.release = release_nbp
+};
+
 /* Called with RTNL lock and dp_mutex. */
 static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
 {
@@ -338,6 +368,13 @@ static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
 	list_add_rcu(&p->node, &dp->port_list);
 	dp->n_ports++;
 
+	/* Initialize kobject for bridge.  This will be added as
+	 * /sys/class/net/<devname>/brport later, if sysfs is enabled. */
+	kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); /* "brport" */
+	p->kobj.kset = NULL;
+	p->kobj.parent = &p->dev->NETDEV_DEV_MEMBER.kobj;
+	kobject_init(&p->kobj, &brport_ktype);
+
 	dp_ifinfo_notify(RTM_NEWLINK, p);
 
 	return 0;
@@ -435,15 +472,12 @@ int dp_del_port(struct net_bridge_port *p)
 	/* Then wait until no one is still using it, and destroy it. */
 	synchronize_rcu();
 
-	if (is_dp_dev(p->dev)) {
+	if (is_dp_dev(p->dev))
 		dp_dev_destroy(p->dev);
-	}
-	if (p->port_no != ODPP_LOCAL) {
+	if (p->port_no != ODPP_LOCAL)
 		dp_sysfs_del_if(p);
-	} else {
-		dev_put(p->dev);
-		kfree(p);
-	}
+	dev_put(p->dev);
+	kobject_put(&p->kobj);
 
 	return 0;
 }
diff --git a/datapath/datapath.h b/datapath/datapath.h
index e778a70..63d92cb 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -64,9 +64,7 @@ struct datapath {
 	struct mutex mutex;
 	int dp_idx;
 
-#ifdef SUPPORT_SYSFS
 	struct kobject ifobj;
-#endif
 
 	int drop_frags;
 
@@ -94,9 +92,7 @@ struct net_bridge_port {
 	u16 port_no;
 	struct datapath	*dp;
 	struct net_device *dev;
-#ifdef SUPPORT_SYSFS
 	struct kobject kobj;
-#endif
 	struct list_head node;   /* Element in datapath.ports. */
 };
 
diff --git a/datapath/dp_sysfs.h b/datapath/dp_sysfs.h
index d98fdf3..c0ac01b 100644
--- a/datapath/dp_sysfs.h
+++ b/datapath/dp_sysfs.h
@@ -29,5 +29,9 @@ int dp_sysfs_del_if(struct net_bridge_port *p);
  * multiple versions. */
 #endif
 
+#ifdef SUPPORT_SYSFS
+extern struct sysfs_ops brport_sysfs_ops;
+#endif
+
 #endif /* dp_sysfs.h */
 
diff --git a/datapath/dp_sysfs_dp.c b/datapath/dp_sysfs_dp.c
index 5764a3a..9699a07 100644
--- a/datapath/dp_sysfs_dp.c
+++ b/datapath/dp_sysfs_dp.c
@@ -487,12 +487,7 @@ int dp_sysfs_add_dp(struct datapath *dp)
 	}
 
 	/* Create /sys/class/net/<devname>/bridge directory. */
-	kobject_set_name(&dp->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); /* "bridge" */
-	dp->ifobj.ktype = NULL;
-	dp->ifobj.kset = NULL;
 	dp->ifobj.parent = kobj;
-	kboject_init(&dp->ifobj);
-
 	err = kobject_add(&dp->ifobj);
 	if (err) {
 		pr_info("%s: can't add kobject (directory) %s/%s\n",
@@ -513,7 +508,6 @@ int dp_sysfs_del_dp(struct datapath *dp)
 	struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev);
 
 	kobject_del(&dp->ifobj);
-	kobject_put(&dp->ifobj);
 	sysfs_remove_group(kobj, &bridge_group);
 
 	return 0;
diff --git a/datapath/dp_sysfs_if.c b/datapath/dp_sysfs_if.c
index 2771d65..178afbd 100644
--- a/datapath/dp_sysfs_if.c
+++ b/datapath/dp_sysfs_if.c
@@ -266,18 +266,6 @@ struct sysfs_ops brport_sysfs_ops = {
 	.store = brport_store,
 };
 
-static void release_nbp(struct kobject *kobj)
-{
-	struct net_bridge_port *p
-		= container_of(kobj, struct net_bridge_port, kobj);
-	kfree(p);
-}
-
-struct kobj_type brport_ktype = {
-	.sysfs_ops = &brport_sysfs_ops,
-	.release = release_nbp
-};
-
 /*
  * Add sysfs entries to ethernet device added to a bridge.
  * Creates a brport subdirectory with bridge attributes.
@@ -290,12 +278,6 @@ int dp_sysfs_add_if(struct net_bridge_port *p)
 	int err;
 
 	/* Create /sys/class/net/<devname>/brport directory. */
-	kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); /* "brport" */
-	p->kobj.ktype = &brport_ktype;
-	p->kobj.kset = NULL;
-	p->kobj.parent = &(p->dev->class_dev.kobj);
-	kobject_init(&p->kobj);
-
 	err = kobject_add(&p->kobj);
 	if (err)
 		goto err_put;
@@ -303,7 +285,7 @@ int dp_sysfs_add_if(struct net_bridge_port *p)
 	/* Create symlink from /sys/class/net/<devname>/brport/bridge to
 	 * /sys/class/net/<bridgename>. */
 	err = sysfs_create_link(&p->kobj,
-				&dp->ports[ODPP_LOCAL]->dev->class_dev.kobj,
+				&dp->ports[ODPP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj,
 				SYSFS_BRIDGE_PORT_LINK); /* "bridge" */
 	if (err)
 		goto err_del;
@@ -329,20 +311,18 @@ err_del:
 	kobject_del(&p->kobj);
 err_put:
 	kobject_put(&p->kobj);
+
+	/* Ensure that dp_sysfs_del_if becomes a no-op. */
+	p->kobj.dentry = NULL;
 	return err;
 }
 
 int dp_sysfs_del_if(struct net_bridge_port *p)
 {
-	struct net_device *dev = p->dev;
-
-	kobject_uevent(&p->kobj, KOBJ_REMOVE);
-	kobject_del(&p->kobj);
-
-	dev_put(dev);
-
-	kobject_put(&p->kobj);
-
+	if (p->kobj.dentry) {
+		kobject_uevent(&p->kobj, KOBJ_REMOVE);
+		kobject_del(&p->kobj);
+	}
 	return 0;
 }
 #endif /* SUPPORT_SYSFS */
diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk
index 5c3540d..e5aa51d 100644
--- a/datapath/linux-2.6/Modules.mk
+++ b/datapath/linux-2.6/Modules.mk
@@ -12,6 +12,7 @@ openvswitch_headers += \
 	linux-2.6/compat-2.6/include/linux/ipv6.h \
 	linux-2.6/compat-2.6/include/linux/jiffies.h \
 	linux-2.6/compat-2.6/include/linux/kernel.h \
+	linux-2.6/compat-2.6/include/linux/kobject.h \
 	linux-2.6/compat-2.6/include/linux/log2.h \
 	linux-2.6/compat-2.6/include/linux/lockdep.h \
 	linux-2.6/compat-2.6/include/linux/mutex.h \
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kobject.h b/datapath/linux-2.6/compat-2.6/include/linux/kobject.h
new file mode 100644
index 0000000..c0de3d2
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/kobject.h
@@ -0,0 +1,16 @@
+#ifndef __LINUX_KOBJECT_WRAPPER_H
+#define __LINUX_KOBJECT_WRAPPER_H 1
+
+#include_next <linux/kobject.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+#define kobject_init(kobj, ktype) rpl_kobject_init(kobj, ktype)
+static inline void rpl_kobject_init(struct kobject *kobj, struct kobj_type *ktype)
+{
+	kobj->ktype = ktype;
+	(kobject_init)(kobj);
+}
+#endif
+
+#endif /* linux/kobject.h wrapper */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h
index 32e1735..b718283 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h
@@ -5,8 +5,18 @@
 
 struct net;
 
+/* Before 2.6.21, struct net_device has a "struct class_device" member named
+ * class_dev.  Beginning with 2.6.21, struct net_device instead has a "struct
+ * device" member named dev.  Otherwise the usage of these members is pretty
+ * much the same. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+#define NETDEV_DEV_MEMBER class_dev
+#else
+#define NETDEV_DEV_MEMBER dev
+#endif
+
 #ifndef to_net_dev
-#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define to_net_dev(class) container_of(class, struct net_device, NETDEV_DEV_MEMBER)
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
-- 
1.6.3.3





More information about the discuss mailing list