[ovs-dev] [PATCH v7] ofproto: Add reference count for Openflow groups.

Ryan Wilson wryan at nicira.com
Thu May 22 00:12:02 UTC 2014


When adding support for OpenFlow group and bucket stats, a group entry is added
to the xlate_cache. If the main thread removes the group from an ofproto, we
need to guarantee that the group remains accessible to users of
xlate_group_actions and the xlate_cache, as the xlate_cache will not be cleaned
up until the corresponding datapath flows are revalidated.

Before, modify_group could change the bucket list for a group. With group
entries in the xlate_cache, this could leave already-freed bucket pointers in
the cache. Modify_group now recreates the group and replaces the old group in
ofproto's ofgroup hash map. Thus, any subsequent group lookup will find the new
group while the old group and buckets will still exist until the xlate_cache
entries unref the group.

Since ofgroup's properties were only written in-place by modify_group and this
is no longer done, this eliminates the need for ofgroup's lock.

Signed-off-by: Ryan Wilson <wryan at nicira.com>

---
v2: Fixed bug with group stats all buckets, cleaned up ofgroup unref code,
    added Andy Zhou's test for more complete test coverage
v3: Split group reference count, support for group and bucket stats into 2
patches. Commit Andy Zhou's test patch separately.
v4: Addressed Joe's comments making it more clear why group refcount is needed
for the xlate cache.
v5: Added Joe's comments in git commit for more accuracy regarding xlate cache,
edited modify_group in ofproto.c to be compatible with group ref counting
v6: Fix lock annotations in ofproto-provider.h
v7: Add more clear git commit message, addressed Andy's comments.
---
 ofproto/ofproto-dpif-xlate.c |    4 +-
 ofproto/ofproto-dpif.c       |   22 ++---
 ofproto/ofproto-dpif.h       |   18 ++++-
 ofproto/ofproto-provider.h   |   36 +++++----
 ofproto/ofproto.c            |  182 ++++++++++++++++++++++--------------------
 5 files changed, 137 insertions(+), 125 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a87db54..2095f37 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -854,7 +854,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
 
     hit = group_first_live_bucket(ctx, group, depth) != NULL;
 
-    group_dpif_release(group);
+    group_dpif_unref(group);
     return hit;
 }
 
@@ -2202,7 +2202,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
     default:
         OVS_NOT_REACHED();
     }
-    group_dpif_release(group);
+    group_dpif_unref(group);
 
     ctx->in_group = false;
 }
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 7fdd71f..c79602c 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3596,18 +3596,9 @@ group_destruct(struct ofgroup *group_)
 }
 
 static enum ofperr
-group_modify(struct ofgroup *group_, struct ofgroup *victim_)
+group_modify(struct ofgroup *group_)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
-    struct group_dpif *group = group_dpif_cast(group_);
-    struct group_dpif *victim = group_dpif_cast(victim_);
-
-    ovs_mutex_lock(&group->stats_mutex);
-    if (victim->up.n_buckets < group->up.n_buckets) {
-        group_destruct__(group);
-    }
-    group_construct_stats(group);
-    ovs_mutex_unlock(&group->stats_mutex);
 
     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
 
@@ -3629,6 +3620,10 @@ group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
     return 0;
 }
 
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call group_dpif_unref() after no longer needing to maintain
+ * a reference to the group. */
 bool
 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
                   struct group_dpif **group)
@@ -3645,13 +3640,6 @@ group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
 }
 
 void
-group_dpif_release(struct group_dpif *group)
-    OVS_RELEASES(group->up.rwlock)
-{
-    ofproto_group_release(&group->up);
-}
-
-void
 group_dpif_get_buckets(const struct group_dpif *group,
                        const struct list **buckets)
 {
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index e49616e..9542c5a 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -128,8 +128,6 @@ void choose_miss_rule(enum ofputil_port_config,
 bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
                        struct group_dpif **group);
 
-void group_dpif_release(struct group_dpif *group);
-
 void group_dpif_get_buckets(const struct group_dpif *group,
                             const struct list **buckets);
 enum ofp11_group_type group_dpif_get_type(const struct group_dpif *group);
@@ -232,6 +230,22 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
 
 /* struct rule_dpif has struct rule as it's first member. */
 #define RULE_CAST(RULE) ((struct rule *)RULE)
+#define GROUP_CAST(GROUP) ((struct ofgroup *)GROUP)
+
+static inline struct group_dpif* group_dpif_ref(struct group_dpif *group)
+{
+    if (group) {
+        ofproto_group_ref(GROUP_CAST(group));
+    }
+    return group;
+}
+
+static inline void group_dpif_unref(struct group_dpif *group)
+{
+    if (group) {
+        ofproto_group_unref(GROUP_CAST(group));
+    }
+}
 
 static inline void rule_dpif_ref(struct rule_dpif *rule)
 {
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 141adec..e5f71c7 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -474,17 +474,22 @@ bool ofoperation_has_out_port(const struct ofoperation *, ofp_port_t out_port)
  * With few exceptions, ofproto implementations may look at these fields but
  * should not modify them. */
 struct ofgroup {
-    /* The rwlock is used to prevent groups from being deleted while child
-     * threads are using them to xlate flows.  A read lock means the
-     * group is currently being used.  A write lock means the group is
-     * in the process of being deleted or updated.  Note that since
-     * a read lock on the groups container is held while searching, and
-     * a group is ever write locked only while holding a write lock
-     * on the container, the user's of groups will never face a group
-     * in the write locked state. */
-    struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
-    struct hmap_node hmap_node; /* In struct ofproto's "groups" hmap. */
-    struct ofproto *ofproto;    /* The ofproto that contains this group. */
+    struct hmap_node hmap_node OVS_GUARDED; /* In ofproto's "groups" hmap. */
+
+    /* Number of references.
+     *
+     * This is needed to keep track of references to the group in the xlate
+     * module.
+     *
+     * If the main thread removes the group from an ofproto, we need to
+     * guarantee that the group remains accessible to users of
+     * xlate_group_actions and the xlate_cache, as the xlate_cache will not be
+     * cleaned up until the corresponding datapath flows are revalidated. */
+    struct ovs_refcount ref_count;
+
+    /* No lock is needed to protect the fields below since they are not
+     * modified after construction. */
+    struct ofproto *ofproto;   /* The ofproto that contains this group. */
     uint32_t group_id;
     enum ofp11_group_type type; /* One of OFPGT_*. */
 
@@ -496,11 +501,10 @@ struct ofgroup {
 };
 
 bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
-                          struct ofgroup **group)
-    OVS_TRY_RDLOCK(true, (*group)->rwlock);
+                          struct ofgroup **group);
 
-void ofproto_group_release(struct ofgroup *group)
-    OVS_RELEASES(group->rwlock);
+void ofproto_group_ref(struct ofgroup *);
+void ofproto_group_unref(struct ofgroup *);
 
 /* ofproto class structure, to be defined by each ofproto implementation.
  *
@@ -1684,7 +1688,7 @@ struct ofproto_class {
     void (*group_destruct)(struct ofgroup *);
     void (*group_dealloc)(struct ofgroup *);
 
-    enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim);
+    enum ofperr (*group_modify)(struct ofgroup *);
 
     enum ofperr (*group_get_stats)(const struct ofgroup *,
                                    struct ofputil_group_stats *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e728921..ad69d4c 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2647,6 +2647,24 @@ ofproto_rule_unref(struct rule *rule)
     }
 }
 
+void
+ofproto_group_ref(struct ofgroup *group)
+{
+    if (group) {
+        ovs_refcount_ref(&group->ref_count);
+    }
+}
+
+void
+ofproto_group_unref(struct ofgroup *group)
+{
+    if (group && ovs_refcount_unref(&group->ref_count) == 1) {
+        group->ofproto->ofproto_class->group_destruct(group);
+        ofputil_bucket_list_destroy(&group->buckets);
+        group->ofproto->ofproto_class->group_dealloc(group);
+    }
+}
+
 static uint32_t get_provider_meter_id(const struct ofproto *,
                                       uint32_t of_meter_id);
 
@@ -5334,16 +5352,19 @@ handle_meter_request(struct ofconn *ofconn, const struct ofp_header *request,
     return 0;
 }
 
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call ofproto_group_unref() after no longer needing to maintain
+ * a reference to the group. */
 bool
 ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
                      struct ofgroup **group)
-    OVS_TRY_RDLOCK(true, (*group)->rwlock)
 {
     ovs_rwlock_rdlock(&ofproto->groups_rwlock);
     HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
                              hash_int(group_id, 0), &ofproto->groups) {
         if ((*group)->group_id == group_id) {
-            ovs_rwlock_rdlock(&(*group)->rwlock);
+            ofproto_group_ref(*group);
             ovs_rwlock_unlock(&ofproto->groups_rwlock);
             return true;
         }
@@ -5352,28 +5373,18 @@ ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
     return false;
 }
 
-void
-ofproto_group_release(struct ofgroup *group)
-    OVS_RELEASES(group->rwlock)
-{
-    ovs_rwlock_unlock(&group->rwlock);
-}
-
 static bool
 ofproto_group_write_lookup(const struct ofproto *ofproto, uint32_t group_id,
                            struct ofgroup **group)
-    OVS_TRY_WRLOCK(true, ofproto->groups_rwlock)
-    OVS_TRY_WRLOCK(true, (*group)->rwlock)
+    OVS_ACQUIRES(ofproto->groups_rwlock)
 {
     ovs_rwlock_wrlock(&ofproto->groups_rwlock);
     HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
                              hash_int(group_id, 0), &ofproto->groups) {
         if ((*group)->group_id == group_id) {
-            ovs_rwlock_wrlock(&(*group)->rwlock);
             return true;
         }
     }
-    ovs_rwlock_unlock(&ofproto->groups_rwlock);
     return false;
 }
 
@@ -5432,7 +5443,6 @@ group_get_ref_count(struct ofgroup *group)
 
 static void
 append_group_stats(struct ofgroup *group, struct list *replies)
-    OVS_REQ_RDLOCK(group->rwlock)
 {
     struct ofputil_group_stats ogs;
     struct ofproto *ofproto = group->ofproto;
@@ -5476,15 +5486,13 @@ handle_group_request(struct ofconn *ofconn,
     if (group_id == OFPG_ALL) {
         ovs_rwlock_rdlock(&ofproto->groups_rwlock);
         HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) {
-            ovs_rwlock_rdlock(&group->rwlock);
             cb(group, &replies);
-            ovs_rwlock_unlock(&group->rwlock);
         }
         ovs_rwlock_unlock(&ofproto->groups_rwlock);
     } else {
         if (ofproto_group_lookup(ofproto, group_id, &group)) {
             cb(group, &replies);
-            ofproto_group_release(group);
+            ofproto_group_unref(group);
         }
     }
     ofconn_send_replies(ofconn, &replies);
@@ -5584,6 +5592,43 @@ handle_queue_get_config_request(struct ofconn *ofconn,
    return 0;
 }
 
+static enum ofperr
+init_group(struct ofproto *ofproto, struct ofputil_group_mod *gm,
+           struct ofgroup **ofgroup)
+{
+    enum ofperr error;
+
+    if (gm->group_id > OFPG_MAX) {
+        return OFPERR_OFPGMFC_INVALID_GROUP;
+    }
+    if (gm->type > OFPGT11_FF) {
+        return OFPERR_OFPGMFC_BAD_TYPE;
+    }
+
+    *ofgroup = ofproto->ofproto_class->group_alloc();
+    if (!*ofgroup) {
+        VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
+        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+    }
+
+    (*ofgroup)->ofproto  = ofproto;
+    (*ofgroup)->group_id = gm->group_id;
+    (*ofgroup)->type     = gm->type;
+    (*ofgroup)->created = (*ofgroup)->modified = time_msec();
+    ovs_refcount_init(&(*ofgroup)->ref_count);
+
+    list_move(&(*ofgroup)->buckets, &gm->buckets);
+    (*ofgroup)->n_buckets = list_size(&(*ofgroup)->buckets);
+
+    /* Construct called BEFORE any locks are held. */
+    error = ofproto->ofproto_class->group_construct(*ofgroup);
+    if (error) {
+        ofputil_bucket_list_destroy(&(*ofgroup)->buckets);
+        ofproto->ofproto_class->group_dealloc(*ofgroup);
+    }
+    return error;
+}
+
 /* Implements OFPGC11_ADD
  * in which no matching flow already exists in the flow table.
  *
@@ -5603,33 +5648,10 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
     struct ofgroup *ofgroup;
     enum ofperr error;
 
-    if (gm->group_id > OFPG_MAX) {
-        return OFPERR_OFPGMFC_INVALID_GROUP;
-    }
-    if (gm->type > OFPGT11_FF) {
-        return OFPERR_OFPGMFC_BAD_TYPE;
-    }
-
     /* Allocate new group and initialize it. */
-    ofgroup = ofproto->ofproto_class->group_alloc();
-    if (!ofgroup) {
-        VLOG_WARN_RL(&rl, "%s: failed to create group", ofproto->name);
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
-    }
-
-    ovs_rwlock_init(&ofgroup->rwlock);
-    ofgroup->ofproto  = ofproto;
-    ofgroup->group_id = gm->group_id;
-    ofgroup->type     = gm->type;
-    ofgroup->created = ofgroup->modified = time_msec();
-
-    list_move(&ofgroup->buckets, &gm->buckets);
-    ofgroup->n_buckets = list_size(&ofgroup->buckets);
-
-    /* Construct called BEFORE any locks are held. */
-    error = ofproto->ofproto_class->group_construct(ofgroup);
+    error = init_group(ofproto, gm, &ofgroup);
     if (error) {
-        goto free_out;
+        return error;
     }
 
     /* We wrlock as late as possible to minimize the time we jam any other
@@ -5659,7 +5681,6 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
  unlock_out:
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
     ofproto->ofproto_class->group_destruct(ofgroup);
- free_out:
     ofputil_bucket_list_destroy(&ofgroup->buckets);
     ofproto->ofproto_class->group_dealloc(ofgroup);
 
@@ -5669,66 +5690,59 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
 /* Implements OFPFC_MODIFY.  Returns 0 on success or an OpenFlow error code on
  * failure.
  *
+ * Note that the group is re-created and then replaces the old group in
+ * ofproto's ofgroup hash map. Thus, the group is never altered while users of
+ * the xlate module hold a pointer to the group.
+ *
  * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id,
  * if any. */
 static enum ofperr
 modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
 {
-    struct ofgroup *ofgroup;
-    struct ofgroup *victim;
+    struct ofgroup *ofgroup, *new_ofgroup;
     enum ofperr error;
 
-    if (gm->group_id > OFPG_MAX) {
-        return OFPERR_OFPGMFC_INVALID_GROUP;
-    }
-
-    if (gm->type > OFPGT11_FF) {
-        return OFPERR_OFPGMFC_BAD_TYPE;
-    }
-
-    victim = ofproto->ofproto_class->group_alloc();
-    if (!victim) {
-        VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+    error = init_group(ofproto, gm, &new_ofgroup);
+    if (error) {
+        return error;
     }
 
     if (!ofproto_group_write_lookup(ofproto, gm->group_id, &ofgroup)) {
         error = OFPERR_OFPGMFC_UNKNOWN_GROUP;
         goto free_out;
     }
-    /* Both group's and its container's write locks held now.
-     * Also, n_groups[] is protected by ofproto->groups_rwlock. */
+
+    /* Ofproto's group write lock is held now. */
     if (ofgroup->type != gm->type
         && ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
         error = OFPERR_OFPGMFC_OUT_OF_GROUPS;
-        goto unlock_out;
+        goto free_out;
     }
 
-    *victim = *ofgroup;
-    list_move(&victim->buckets, &ofgroup->buckets);
-
-    ofgroup->type = gm->type;
-    list_move(&ofgroup->buckets, &gm->buckets);
-    ofgroup->n_buckets = list_size(&ofgroup->buckets);
+    /* The group creation time does not change during modification. */
+    new_ofgroup->created = ofgroup->created;
+    new_ofgroup->modified = time_msec();
 
-    error = ofproto->ofproto_class->group_modify(ofgroup, victim);
-    if (!error) {
-        ofputil_bucket_list_destroy(&victim->buckets);
-        ofproto->n_groups[victim->type]--;
-        ofproto->n_groups[ofgroup->type]++;
-        ofgroup->modified = time_msec();
-    } else {
-        ofputil_bucket_list_destroy(&ofgroup->buckets);
+    error = ofproto->ofproto_class->group_modify(new_ofgroup);
+    if (error) {
+        goto free_out;
+    }
 
-        *ofgroup = *victim;
-        list_move(&ofgroup->buckets, &victim->buckets);
+    /* Replace ofgroup in ofproto's groups hash map with new_ofgroup. */
+    hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
+    hmap_insert(&ofproto->groups, &new_ofgroup->hmap_node,
+                hash_int(new_ofgroup->group_id, 0));
+    if (ofgroup->type != new_ofgroup->type) {
+        ofproto->n_groups[ofgroup->type]--;
+        ofproto->n_groups[new_ofgroup->type]++;
     }
+    ofproto_group_unref(ofgroup);
+    goto unlock_out;
 
+ free_out:
+    ofproto_group_unref(new_ofgroup);
  unlock_out:
-    ovs_rwlock_unlock(&ofgroup->rwlock);
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
- free_out:
-    ofproto->ofproto_class->group_dealloc(victim);
     return error;
 }
 
@@ -5745,19 +5759,11 @@ delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup)
     fm.out_group = ofgroup->group_id;
     handle_flow_mod__(ofproto, NULL, &fm, NULL);
 
-    /* Must wait until existing readers are done,
-     * while holding the container's write lock at the same time. */
-    ovs_rwlock_wrlock(&ofgroup->rwlock);
     hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
     /* No-one can find this group any more. */
     ofproto->n_groups[ofgroup->type]--;
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
-
-    ofproto->ofproto_class->group_destruct(ofgroup);
-    ofputil_bucket_list_destroy(&ofgroup->buckets);
-    ovs_rwlock_unlock(&ofgroup->rwlock);
-    ovs_rwlock_destroy(&ofgroup->rwlock);
-    ofproto->ofproto_class->group_dealloc(ofgroup);
+    ofproto_group_unref(ofgroup);
 }
 
 /* Implements OFPGC_DELETE. */
-- 
1.7.9.5




More information about the dev mailing list