[ovs-dev] [rwlock 3/5] ofproto-dpif-xlate: Avoid recursively taking read side of ofgroup rwlock.

Ben Pfaff blp at nicira.com
Fri Feb 21 18:46:51 UTC 2014


On Wed, Feb 19, 2014 at 02:53:11PM -0800, Joe Stringer wrote:
> This looks good to me, although aren't we meant to report something at the
> OpenFlow layer?
> 
> Is this equivalent to the OpenFlow "chaining" description? So if we don't
> support chaining groups together, we should return an error message with
> reason OFPGMFC_CHAINING_UNSUPPORTED, rather than accepting the rules then
> misbehaving?

You're right.  Here's a version that does that, and adds a test.

--8<--------------------------cut here-------------------------->8--

From: Ben Pfaff <blp at nicira.com>
Date: Fri, 21 Feb 2014 10:45:00 -0800
Subject: [PATCH] ofproto-dpif-xlate: Avoid recursively taking read side of
 ofgroup rwlock.

With glibc, rwlocks by default allow recursive read-locking even if a
thread is blocked waiting for the write-lock.  POSIX allows such attempts
to deadlock, and it appears that the libc used in NetBSD, at least, does
deadlock.  ofproto-dpif-xlate could take the ofgroup rwlock recursively if
an ofgroup's actions caused the ofgroup to be executed again.  This commit
avoids that issue by preventing recursive translation of groups (the same
group or another group).  This is not the most user friendly solution,
but OpenFlow allows this restriction, and we can always remove the
restriction later (probably requiring more complicated code) if it
proves to be a real problem to real users.

Found by inspection.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 ofproto/ofproto-dpif-xlate.c |   32 +++++++++++++++++++++++++++++++-
 ofproto/ofproto-dpif.c       |   14 ++++++++++++++
 tests/ofproto-dpif.at        |   11 +++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index ccf0b75..89d92af 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -178,6 +178,7 @@ struct xlate_ctx {
     /* Resubmit statistics, via xlate_table_action(). */
     int recurse;                /* Current resubmit nesting depth. */
     int resubmits;              /* Total number of resubmits. */
+    bool in_group;              /* Currently translating ofgroup, if true. */
 
     uint32_t orig_skb_priority; /* Priority when packet arrived. */
     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
@@ -1980,6 +1981,8 @@ xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
 static void
 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
 {
+    ctx->in_group = true;
+
     switch (group_dpif_get_type(group)) {
     case OFPGT11_ALL:
     case OFPGT11_INDIRECT:
@@ -1995,12 +1998,38 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
         OVS_NOT_REACHED();
     }
     group_dpif_release(group);
+
+    ctx->in_group = false;
+}
+
+static bool
+xlate_group_resource_check(struct xlate_ctx *ctx)
+{
+    if (!xlate_resubmit_resource_check(ctx)) {
+        return false;
+    } else if (ctx->in_group) {
+        /* Prevent nested translation of OpenFlow groups.
+         *
+         * OpenFlow allows this restriction.  We enforce this restriction only
+         * because, with the current architecture, we would otherwise have to
+         * take a possibly recursive read lock on the ofgroup rwlock, which is
+         * unsafe given that POSIX allows taking a read lock to block if there
+         * is a thread blocked on taking the write lock.  Other solutions
+         * without this restriction are also possible, but seem unwarranted
+         * given the current limited use of groups. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+        VLOG_ERR_RL(&rl, "cannot recursively translate OpenFlow group");
+        return false;
+    } else {
+        return true;
+    }
 }
 
 static bool
 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
 {
-    if (xlate_resubmit_resource_check(ctx)) {
+    if (xlate_group_resource_check(ctx)) {
         struct group_dpif *group;
         bool got_group;
 
@@ -2974,6 +3003,7 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
 
     ctx.recurse = 0;
     ctx.resubmits = 0;
+    ctx.in_group = false;
     ctx.orig_skb_priority = flow->skb_priority;
     ctx.table_id = 0;
     ctx.exit = false;
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 328b215..7c5b941 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3291,6 +3291,20 @@ static enum ofperr
 group_construct(struct ofgroup *group_)
 {
     struct group_dpif *group = group_dpif_cast(group_);
+    const struct ofputil_bucket *bucket;
+
+    /* Prevent group chaining because our locking structure makes it hard to
+     * implement deadlock-free.  (See xlate_group_resource_check().) */
+    LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
+        const struct ofpact *a;
+
+        OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
+            if (a->type == OFPACT_GROUP) {
+                return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
+            }
+        }
+    }
+
     ovs_mutex_init_adaptive(&group->stats_mutex);
     ovs_mutex_lock(&group->stats_mutex);
     group_construct_stats(group);
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 06c4046..6d48e5a 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -112,6 +112,17 @@ AT_CHECK([tail -1 stdout], [0],
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif - group chaining not supported])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], [1], [10], [11])
+AT_CHECK([ovs-ofctl -O OpenFlow12 add-group br0 'group_id=1234,type=all,bucket=output:10,set_field:192.168.3.90->ip_src,group:123,bucket=output:11'],
+  [1], [], [stderr])
+AT_CHECK([STRIP_XIDS stderr | sed 1q], [0],
+  [OFPT_ERROR (OF1.2): OFPGMFC_CHAINING_UNSUPPORTED
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - all group in action list])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [10], [11])
-- 
1.7.10.4




More information about the dev mailing list