[ovs-dev] [PATCH 2/3] datapath: Fix OVS compat workqueue support.

Pravin B Shelar pshelar at nicira.com
Thu Dec 22 04:28:25 UTC 2011


Resending this patch as net-namespace patches depends on it.
--8<--------------------------cut here-------------------------->8-

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
 datapath/datapath.c                             |    9 +-
 datapath/linux/Modules.mk                       |    3 +-
 datapath/linux/compat/include/linux/workqueue.h |   74 ++++++---
 datapath/linux/compat/workqueue.c               |  213 +++++++++++++++++++++++
 4 files changed, 276 insertions(+), 23 deletions(-)
 create mode 100644 datapath/linux/compat/workqueue.c

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 025d932..72cf9f6 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -2053,10 +2053,14 @@ static int __init dp_init(void)
 	if (err)
 		goto error;
 
-	err = ovs_tnl_init();
+	err = ovs_workqueues_init();
 	if (err)
 		goto error_genl_exec;
 
+	err = ovs_tnl_init();
+	if (err)
+		goto error_wq;
+
 	err = ovs_flow_init();
 	if (err)
 		goto error_tnl_exit;
@@ -2083,6 +2087,8 @@ error_flow_exit:
 	ovs_flow_exit();
 error_tnl_exit:
 	ovs_tnl_exit();
+error_wq:
+	ovs_workqueues_exit();
 error_genl_exec:
 	genl_exec_exit();
 error:
@@ -2097,6 +2103,7 @@ static void dp_cleanup(void)
 	ovs_vport_exit();
 	ovs_flow_exit();
 	ovs_tnl_exit();
+	ovs_workqueues_exit();
 	genl_exec_exit();
 }
 
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index f6cb88e..40c3927 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -9,7 +9,8 @@ openvswitch_sources += \
 	linux/compat/netdevice.c \
 	linux/compat/reciprocal_div.c \
 	linux/compat/skbuff-openvswitch.c \
-	linux/compat/time.c
+	linux/compat/time.c	\
+	linux/compat/workqueue.c
 openvswitch_headers += \
 	linux/compat/include/linux/compiler.h \
 	linux/compat/include/linux/compiler-gcc.h \
diff --git a/datapath/linux/compat/include/linux/workqueue.h b/datapath/linux/compat/include/linux/workqueue.h
index 01c6345..919afe3 100644
--- a/datapath/linux/compat/include/linux/workqueue.h
+++ b/datapath/linux/compat/include/linux/workqueue.h
@@ -1,41 +1,73 @@
 #ifndef __LINUX_WORKQUEUE_WRAPPER_H
 #define __LINUX_WORKQUEUE_WRAPPER_H 1
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
 #include_next <linux/workqueue.h>
+static inline int __init ovs_workqueues_init(void) { return 0; }
+static inline void  ovs_workqueues_exit(void) {}
+
+#else
+#include <linux/timer.h>
+
+int __init ovs_workqueues_init(void);
+void ovs_workqueues_exit(void);
 
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
 
 /* Older kernels have an implementation of work queues with some very bad
  * characteristics when trying to cancel work (potential deadlocks, use after
- * free, etc.  Here we directly use timers instead for delayed work.  It's not
- * optimal but it is better than the alternative.  Note that work queues
- * normally run in process context but this will cause them to operate in
- * softirq context.
+ * free, etc.  Therefore we implement simple ovs specific work queue using
+ * single worker thread. work-queue API are kept similar for compatibility.
  */
+struct work_struct;
 
-#include <linux/timer.h>
+typedef void (*work_func_t)(struct work_struct *work);
 
-#undef DECLARE_DELAYED_WORK
-#define DECLARE_DELAYED_WORK(n, f) \
-	struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0)
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
 
-#define schedule_delayed_work rpl_schedule_delayed_work
-static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay)
-{
-	if (timer_pending(timer))
-		return 0;
+struct work_struct {
+#define WORK_STRUCT_PENDING 0           /* T if work item pending execution */
+	atomic_long_t data;
+	struct list_head entry;
+	work_func_t func;
+};
+
+#define WORK_DATA_INIT()        ATOMIC_LONG_INIT(0)
+
+#define work_clear_pending(work)				\
+	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+
+struct delayed_work {
+	struct work_struct work;
+	struct timer_list timer;
+};
 
-	mod_timer(timer, jiffies + delay);
-	return 1;
+#define __WORK_INITIALIZER(n, f) {				\
+	.data = WORK_DATA_INIT(),				\
+	.entry  = { &(n).entry, &(n).entry },			\
+	.func = (f),						\
 }
 
-#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
-static inline int cancel_delayed_work_sync(struct timer_list *timer)
-{
-	return del_timer_sync(timer);
+#define __DELAYED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 }
 
+#define DECLARE_DELAYED_WORK(n, f)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+
+#define schedule_delayed_work rpl_schedule_delayed_work
+int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
+
+#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
+int cancel_delayed_work_sync(struct delayed_work *dwork);
+
+#define INIT_WORK(_work, _func)					\
+	do {							\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		INIT_LIST_HEAD(&(_work)->entry);		\
+		(_work)->func = (_func);			\
+	} while (0)
+
 #endif /* kernel version < 2.6.23 */
 
 #endif
diff --git a/datapath/linux/compat/workqueue.c b/datapath/linux/compat/workqueue.c
new file mode 100644
index 0000000..883665b
--- /dev/null
+++ b/datapath/linux/compat/workqueue.c
@@ -0,0 +1,213 @@
+/*
+ * Derived from the kernel/workqueue.c
+ *
+ * This is the generic async execution mechanism.  Work items as are
+ * executed in process context.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/hardirq.h>
+#include <linux/mempolicy.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
+#include <linux/lockdep.h>
+#include <linux/idr.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+static spinlock_t wq_lock;
+static struct list_head workq;
+static wait_queue_head_t more_work;
+static struct task_struct *workq_thread;
+static struct work_struct *current_work;
+
+static void queue_work(struct work_struct *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&wq_lock, flags);
+	list_add_tail(&work->entry, &workq);
+	wake_up(&more_work);
+	spin_unlock_irqrestore(&wq_lock, flags);
+}
+
+static void _delayed_work_timer_fn(unsigned long __data)
+{
+	struct delayed_work *dwork = (struct delayed_work *)__data;
+	queue_work(&dwork->work);
+}
+
+static void __queue_delayed_work(struct delayed_work *dwork,
+		unsigned long delay)
+{
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
+
+	BUG_ON(timer_pending(timer));
+	BUG_ON(!list_empty(&work->entry));
+
+	timer->expires = jiffies + delay;
+	timer->data = (unsigned long)dwork;
+	timer->function = _delayed_work_timer_fn;
+
+	add_timer(timer);
+}
+
+int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
+{
+	if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(&dwork->work)))
+		return 0;
+
+	if (delay == 0)
+		queue_work(&dwork->work);
+	else
+		__queue_delayed_work(dwork, delay);
+
+	return 1;
+}
+
+struct wq_barrier {
+	struct work_struct      work;
+	struct completion       done;
+};
+
+static void wq_barrier_func(struct work_struct *work)
+{
+	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+	complete(&barr->done);
+}
+
+static void workqueue_barrier(struct work_struct *work)
+{
+	bool need_barrier;
+	struct wq_barrier barr;
+
+	spin_lock_irq(&wq_lock);
+	if (current_work != work)
+		need_barrier = false;
+	else {
+		INIT_WORK(&barr.work, wq_barrier_func);
+		init_completion(&barr.done);
+		list_add(&barr.work.entry, &workq);
+		wake_up(&more_work);
+		need_barrier = true;
+	}
+	spin_unlock_irq(&wq_lock);
+
+	if (need_barrier)
+		wait_for_completion(&barr.done);
+}
+
+static int try_to_grab_pending(struct work_struct *work)
+{
+	int ret;
+
+	BUG_ON(in_interrupt());
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
+		return 0;
+
+	spin_lock_irq(&wq_lock);
+	if (!list_empty(&work->entry)) {
+		list_del_init(&work->entry);
+		ret = 0;
+	} else
+		/* Already executed, retry. */
+		ret = -1;
+	spin_unlock_irq(&wq_lock);
+
+	return ret;
+}
+
+static int __cancel_work_timer(struct work_struct *work,
+			       struct timer_list *timer)
+{
+	int ret;
+
+	for (;;) {
+		ret = (timer && likely(del_timer(timer)));
+		if (ret) /* Was active timer, return true. */
+			break;
+
+		/* Inactive timer case */
+		ret = try_to_grab_pending(work);
+		if (!ret)
+			break;
+	}
+	workqueue_barrier(work);
+	work_clear_pending(work);
+	return ret;
+}
+
+int cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+	return __cancel_work_timer(&dwork->work, &dwork->timer);
+}
+
+static void run_workqueue(void)
+{
+	spin_lock_irq(&wq_lock);
+	while (!list_empty(&workq)) {
+		struct work_struct *work = list_entry(workq.next,
+				struct work_struct, entry);
+
+		work_func_t f = work->func;
+		list_del_init(workq.next);
+		current_work = work;
+		spin_unlock_irq(&wq_lock);
+
+		work_clear_pending(work);
+		f(work);
+
+		BUG_ON(in_interrupt());
+		spin_lock_irq(&wq_lock);
+		current_work = NULL;
+	}
+	spin_unlock_irq(&wq_lock);
+}
+
+static int worker_thread(void *dummy)
+{
+	for (;;) {
+		wait_event_interruptible(more_work,
+				(kthread_should_stop() || !list_empty(&workq)));
+
+		if (kthread_should_stop())
+			break;
+
+		run_workqueue();
+	}
+
+	return 0;
+}
+
+int __init ovs_workqueues_init(void)
+{
+	spin_lock_init(&wq_lock);
+	INIT_LIST_HEAD(&workq);
+	init_waitqueue_head(&more_work);
+
+	workq_thread = kthread_create(worker_thread, NULL, "ovs_workq");
+	if (IS_ERR(workq_thread))
+		return PTR_ERR(workq_thread);
+
+	wake_up_process(workq_thread);
+	return 0;
+}
+
+void  ovs_workqueues_exit(void)
+{
+	BUG_ON(!list_empty(&workq));
+	kthread_stop(workq_thread);
+}
+#endif
-- 
1.7.1




More information about the dev mailing list