[ovs-dev] [PATCH v6] ofproto: Add reference count for Openflow groups.

Ryan Wilson wryan at nicira.com
Wed May 21 21:14:47 UTC 2014


When adding support for OpenFlow group and bucket stats, a group entry is added
to the xlate_cache. If the main thread removes the group from an ofproto, we
need to guarantee that the group remains accessible to users of
xlate_group_actions and the xlate_cache, as the xlate_cache will not be cleaned
up until the corresponding datapath flows are revalidated.

To make modify_group compatible with group reference counting, the group is
re-created and then replaces the old group in ofproto's ofgroup hash map. Thus,
the group is never altered while users of the xlate module hold a pointer to the
group. This also eliminates the need for ofgroup's lock as all properties of
ofgroup are read-only.

Signed-off-by: Ryan Wilson <wryan at nicira.com>

---
v2: Fixed bug with group stats all buckets, cleaned up ofgroup unref code,
    added Andy Zhou's test for more complete test coverage
v3: Split group reference count, support for group and bucket stats into 2
patches. Commit Andy Zhou's test patch separately.
v4: Addressed Joe's comments making it more clear why group refcount is needed
for the xlate cache.
v5: Added Joe's comments in git commit for more accuracy regarding xlate cache,
edited modify_group in ofproto.c to be compatible with group ref counting
v6: Fix lock annotations in ofproto-provider.h
---
 ofproto/ofproto-dpif-xlate.c |    4 +-
 ofproto/ofproto-dpif.c       |   22 ++---
 ofproto/ofproto-dpif.h       |   16 ++++
 ofproto/ofproto-provider.h   |   29 ++++---
 ofproto/ofproto.c            |  195 +++++++++++++++++++++++-------------------
 5 files changed, 143 insertions(+), 123 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a87db54..2095f37 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -854,7 +854,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
 
     hit = group_first_live_bucket(ctx, group, depth) != NULL;
 
-    group_dpif_release(group);
+    group_dpif_unref(group);
     return hit;
 }
 
@@ -2202,7 +2202,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
     default:
         OVS_NOT_REACHED();
     }
-    group_dpif_release(group);
+    group_dpif_unref(group);
 
     ctx->in_group = false;
 }
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 7fdd71f..c79602c 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3596,18 +3596,9 @@ group_destruct(struct ofgroup *group_)
 }
 
 static enum ofperr
-group_modify(struct ofgroup *group_, struct ofgroup *victim_)
+group_modify(struct ofgroup *group_)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
-    struct group_dpif *group = group_dpif_cast(group_);
-    struct group_dpif *victim = group_dpif_cast(victim_);
-
-    ovs_mutex_lock(&group->stats_mutex);
-    if (victim->up.n_buckets < group->up.n_buckets) {
-        group_destruct__(group);
-    }
-    group_construct_stats(group);
-    ovs_mutex_unlock(&group->stats_mutex);
 
     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
 
@@ -3629,6 +3620,10 @@ group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
     return 0;
 }
 
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call group_dpif_unref() after no longer needing to maintain
+ * a reference to the group. */
 bool
 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
                   struct group_dpif **group)
@@ -3645,13 +3640,6 @@ group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
 }
 
 void
-group_dpif_release(struct group_dpif *group)
-    OVS_RELEASES(group->up.rwlock)
-{
-    ofproto_group_release(&group->up);
-}
-
-void
 group_dpif_get_buckets(const struct group_dpif *group,
                        const struct list **buckets)
 {
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index e49616e..345117a 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -232,6 +232,22 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
 
 /* struct rule_dpif has struct rule as it's first member. */
 #define RULE_CAST(RULE) ((struct rule *)RULE)
+#define GROUP_CAST(GROUP) ((struct ofgroup *)GROUP)
+
+static inline struct group_dpif* group_dpif_ref(struct group_dpif *group)
+{
+    if (group) {
+        ofproto_group_ref(GROUP_CAST(group));
+    }
+    return group;
+}
+
+static inline void group_dpif_unref(struct group_dpif *group)
+{
+    if (group) {
+        ofproto_group_unref(GROUP_CAST(group));
+    }
+}
 
 static inline void rule_dpif_ref(struct rule_dpif *rule)
 {
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 141adec..e2af522 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -474,20 +474,22 @@ bool ofoperation_has_out_port(const struct ofoperation *, ofp_port_t out_port)
  * With few exceptions, ofproto implementations may look at these fields but
  * should not modify them. */
 struct ofgroup {
-    /* The rwlock is used to prevent groups from being deleted while child
-     * threads are using them to xlate flows.  A read lock means the
-     * group is currently being used.  A write lock means the group is
-     * in the process of being deleted or updated.  Note that since
-     * a read lock on the groups container is held while searching, and
-     * a group is ever write locked only while holding a write lock
-     * on the container, the user's of groups will never face a group
-     * in the write locked state. */
-    struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
     struct hmap_node hmap_node; /* In struct ofproto's "groups" hmap. */
     struct ofproto *ofproto;    /* The ofproto that contains this group. */
     uint32_t group_id;
     enum ofp11_group_type type; /* One of OFPGT_*. */
 
+    /* Number of references.
+     *
+     * This is needed to keep track of references to the group in the xlate
+     * module.
+     *
+     * If the main thread removes the group from an ofproto, we need to
+     * guarantee that the group remains accessible to users of
+     * xlate_group_actions and the xlate_cache, as the xlate_cache will not be
+     * cleaned up until the corresponding datapath flows are revalidated. */
+    struct ovs_refcount ref_count;
+
     long long int created;      /* Creation time. */
     long long int modified;     /* Time of last modification. */
 
@@ -496,11 +498,10 @@ struct ofgroup {
 };
 
 bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
-                          struct ofgroup **group)
-    OVS_TRY_RDLOCK(true, (*group)->rwlock);
+                          struct ofgroup **group);
 
-void ofproto_group_release(struct ofgroup *group)
-    OVS_RELEASES(group->rwlock);
+void ofproto_group_ref(struct ofgroup *);
+void ofproto_group_unref(struct ofgroup *);
 
 /* ofproto class structure, to be defined by each ofproto implementation.
  *
@@ -1684,7 +1685,7 @@ struct ofproto_class {
     void (*group_destruct)(struct ofgroup *);
     void (*group_dealloc)(struct ofgroup *);
 
-    enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim);
+    enum ofperr (*group_modify)(struct ofgroup *);
 
     enum ofperr (*group_get_stats)(const struct ofgroup *,
                                    struct ofputil_group_stats *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e728921..fb0244a 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2647,6 +2647,30 @@ ofproto_rule_unref(struct rule *rule)
     }
 }
 
+static void
+group_destroy_cb(struct ofgroup *group)
+{
+    group->ofproto->ofproto_class->group_destruct(group);
+    ofputil_bucket_list_destroy(&group->buckets);
+    group->ofproto->ofproto_class->group_dealloc(group);
+}
+
+void
+ofproto_group_ref(struct ofgroup *group)
+{
+    if (group) {
+        ovs_refcount_ref(&group->ref_count);
+    }
+}
+
+void
+ofproto_group_unref(struct ofgroup *group)
+{
+    if (group && ovs_refcount_unref(&group->ref_count) == 1) {
+        ovsrcu_postpone(group_destroy_cb, group);
+    }
+}
+
 static uint32_t get_provider_meter_id(const struct ofproto *,
                                       uint32_t of_meter_id);
 
@@ -5334,16 +5358,19 @@ handle_meter_request(struct ofconn *ofconn, const struct ofp_header *request,
     return 0;
 }
 
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call ofproto_group_unref() after no longer needing to maintain
+ * a reference to the group. */
 bool
 ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
                      struct ofgroup **group)
-    OVS_TRY_RDLOCK(true, (*group)->rwlock)
 {
     ovs_rwlock_rdlock(&ofproto->groups_rwlock);
     HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
                              hash_int(group_id, 0), &ofproto->groups) {
         if ((*group)->group_id == group_id) {
-            ovs_rwlock_rdlock(&(*group)->rwlock);
+            ofproto_group_ref(*group);
             ovs_rwlock_unlock(&ofproto->groups_rwlock);
             return true;
         }
@@ -5352,24 +5379,16 @@ ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
     return false;
 }
 
-void
-ofproto_group_release(struct ofgroup *group)
-    OVS_RELEASES(group->rwlock)
-{
-    ovs_rwlock_unlock(&group->rwlock);
-}
-
 static bool
 ofproto_group_write_lookup(const struct ofproto *ofproto, uint32_t group_id,
                            struct ofgroup **group)
     OVS_TRY_WRLOCK(true, ofproto->groups_rwlock)
-    OVS_TRY_WRLOCK(true, (*group)->rwlock)
 {
     ovs_rwlock_wrlock(&ofproto->groups_rwlock);
     HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
                              hash_int(group_id, 0), &ofproto->groups) {
         if ((*group)->group_id == group_id) {
-            ovs_rwlock_wrlock(&(*group)->rwlock);
+            ofproto_group_ref(*group);
             return true;
         }
     }
@@ -5432,7 +5451,6 @@ group_get_ref_count(struct ofgroup *group)
 
 static void
 append_group_stats(struct ofgroup *group, struct list *replies)
-    OVS_REQ_RDLOCK(group->rwlock)
 {
     struct ofputil_group_stats ogs;
     struct ofproto *ofproto = group->ofproto;
@@ -5476,15 +5494,15 @@ handle_group_request(struct ofconn *ofconn,
     if (group_id == OFPG_ALL) {
         ovs_rwlock_rdlock(&ofproto->groups_rwlock);
         HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) {
-            ovs_rwlock_rdlock(&group->rwlock);
+            ofproto_group_ref(group);
             cb(group, &replies);
-            ovs_rwlock_unlock(&group->rwlock);
+            ofproto_group_unref(group);
         }
         ovs_rwlock_unlock(&ofproto->groups_rwlock);
     } else {
         if (ofproto_group_lookup(ofproto, group_id, &group)) {
             cb(group, &replies);
-            ofproto_group_release(group);
+            ofproto_group_unref(group);
         }
     }
     ofconn_send_replies(ofconn, &replies);
@@ -5584,6 +5602,43 @@ handle_queue_get_config_request(struct ofconn *ofconn,
    return 0;
 }
 
+static enum ofperr
+init_group(struct ofproto *ofproto, struct ofputil_group_mod *gm,
+           struct ofgroup **ofgroup)
+{
+    enum ofperr error;
+
+    if (gm->group_id > OFPG_MAX) {
+        return OFPERR_OFPGMFC_INVALID_GROUP;
+    }
+    if (gm->type > OFPGT11_FF) {
+        return OFPERR_OFPGMFC_BAD_TYPE;
+    }
+
+    *ofgroup = ofproto->ofproto_class->group_alloc();
+    if (!*ofgroup) {
+        VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
+        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+    }
+
+    (*ofgroup)->ofproto  = ofproto;
+    (*ofgroup)->group_id = gm->group_id;
+    (*ofgroup)->type     = gm->type;
+    (*ofgroup)->created = (*ofgroup)->modified = time_msec();
+    ovs_refcount_init(&(*ofgroup)->ref_count);
+
+    list_move(&(*ofgroup)->buckets, &gm->buckets);
+    (*ofgroup)->n_buckets = list_size(&(*ofgroup)->buckets);
+
+    /* Construct called BEFORE any locks are held. */
+    error = ofproto->ofproto_class->group_construct(*ofgroup);
+    if (error) {
+        ofputil_bucket_list_destroy(&(*ofgroup)->buckets);
+        ofproto->ofproto_class->group_dealloc(*ofgroup);
+    }
+    return error;
+}
+
 /* Implements OFPGC11_ADD
  * in which no matching flow already exists in the flow table.
  *
@@ -5603,33 +5658,10 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
     struct ofgroup *ofgroup;
     enum ofperr error;
 
-    if (gm->group_id > OFPG_MAX) {
-        return OFPERR_OFPGMFC_INVALID_GROUP;
-    }
-    if (gm->type > OFPGT11_FF) {
-        return OFPERR_OFPGMFC_BAD_TYPE;
-    }
-
     /* Allocate new group and initialize it. */
-    ofgroup = ofproto->ofproto_class->group_alloc();
-    if (!ofgroup) {
-        VLOG_WARN_RL(&rl, "%s: failed to create group", ofproto->name);
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
-    }
-
-    ovs_rwlock_init(&ofgroup->rwlock);
-    ofgroup->ofproto  = ofproto;
-    ofgroup->group_id = gm->group_id;
-    ofgroup->type     = gm->type;
-    ofgroup->created = ofgroup->modified = time_msec();
-
-    list_move(&ofgroup->buckets, &gm->buckets);
-    ofgroup->n_buckets = list_size(&ofgroup->buckets);
-
-    /* Construct called BEFORE any locks are held. */
-    error = ofproto->ofproto_class->group_construct(ofgroup);
+    error = init_group(ofproto, gm, &ofgroup);
     if (error) {
-        goto free_out;
+        return error;
     }
 
     /* We wrlock as late as possible to minimize the time we jam any other
@@ -5659,76 +5691,67 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
  unlock_out:
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
     ofproto->ofproto_class->group_destruct(ofgroup);
- free_out:
     ofputil_bucket_list_destroy(&ofgroup->buckets);
     ofproto->ofproto_class->group_dealloc(ofgroup);
 
     return error;
 }
 
+static void
+replace_group(struct ofproto *ofproto, struct ofgroup *ofgroup,
+              struct ofgroup *new_ofgroup)
+{
+    hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
+    hmap_insert(&ofproto->groups, &new_ofgroup->hmap_node,
+                hash_int(new_ofgroup->group_id, 0));
+    if (ofgroup->type != new_ofgroup->type) {
+        ofproto->n_groups[ofgroup->type]--;
+        ofproto->n_groups[new_ofgroup->type]++;
+    }
+    ofproto_group_unref(ofgroup);
+}
+
 /* Implements OFPFC_MODIFY.  Returns 0 on success or an OpenFlow error code on
  * failure.
  *
+ * Note that the group is re-created and then replaces the old group in
+ * ofproto's ofgroup hash map. Thus, the group is never altered while users of
+ * the xlate module hold a pointer to the group.
+ *
  * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id,
  * if any. */
 static enum ofperr
 modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
 {
-    struct ofgroup *ofgroup;
-    struct ofgroup *victim;
+    struct ofgroup *ofgroup, *new_ofgroup;
     enum ofperr error;
 
-    if (gm->group_id > OFPG_MAX) {
-        return OFPERR_OFPGMFC_INVALID_GROUP;
-    }
-
-    if (gm->type > OFPGT11_FF) {
-        return OFPERR_OFPGMFC_BAD_TYPE;
-    }
-
-    victim = ofproto->ofproto_class->group_alloc();
-    if (!victim) {
-        VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+    error = init_group(ofproto, gm, &new_ofgroup);
+    if (error) {
+        return error;
     }
 
     if (!ofproto_group_write_lookup(ofproto, gm->group_id, &ofgroup)) {
         error = OFPERR_OFPGMFC_UNKNOWN_GROUP;
-        goto free_out;
+        ofproto_group_unref(new_ofgroup);
+        return error;
     }
-    /* Both group's and its container's write locks held now.
-     * Also, n_groups[] is protected by ofproto->groups_rwlock. */
+
+    /* Ofproto's group write lock is held now. */
     if (ofgroup->type != gm->type
         && ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
         error = OFPERR_OFPGMFC_OUT_OF_GROUPS;
-        goto unlock_out;
-    }
-
-    *victim = *ofgroup;
-    list_move(&victim->buckets, &ofgroup->buckets);
-
-    ofgroup->type = gm->type;
-    list_move(&ofgroup->buckets, &gm->buckets);
-    ofgroup->n_buckets = list_size(&ofgroup->buckets);
-
-    error = ofproto->ofproto_class->group_modify(ofgroup, victim);
-    if (!error) {
-        ofputil_bucket_list_destroy(&victim->buckets);
-        ofproto->n_groups[victim->type]--;
-        ofproto->n_groups[ofgroup->type]++;
-        ofgroup->modified = time_msec();
+        ofproto_group_unref(new_ofgroup);
     } else {
-        ofputil_bucket_list_destroy(&ofgroup->buckets);
+        /* The group creation time does not change during modification. */
+        new_ofgroup->created = ofgroup->created;
 
-        *ofgroup = *victim;
-        list_move(&ofgroup->buckets, &victim->buckets);
+        replace_group(ofproto, ofgroup, new_ofgroup);
+        ofproto->ofproto_class->group_modify(new_ofgroup);
     }
 
- unlock_out:
-    ovs_rwlock_unlock(&ofgroup->rwlock);
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
- free_out:
-    ofproto->ofproto_class->group_dealloc(victim);
+    ofproto_group_unref(ofgroup);
     return error;
 }
 
@@ -5745,19 +5768,11 @@ delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup)
     fm.out_group = ofgroup->group_id;
     handle_flow_mod__(ofproto, NULL, &fm, NULL);
 
-    /* Must wait until existing readers are done,
-     * while holding the container's write lock at the same time. */
-    ovs_rwlock_wrlock(&ofgroup->rwlock);
     hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
     /* No-one can find this group any more. */
     ofproto->n_groups[ofgroup->type]--;
     ovs_rwlock_unlock(&ofproto->groups_rwlock);
-
-    ofproto->ofproto_class->group_destruct(ofgroup);
-    ofputil_bucket_list_destroy(&ofgroup->buckets);
-    ovs_rwlock_unlock(&ofgroup->rwlock);
-    ovs_rwlock_destroy(&ofgroup->rwlock);
-    ofproto->ofproto_class->group_dealloc(ofgroup);
+    ofproto_group_unref(ofgroup);
 }
 
 /* Implements OFPGC_DELETE. */
-- 
1.7.9.5




More information about the dev mailing list