[ovs-dev] [PATCH v5] ofproto: Add reference count for Openflow groups.
Ryan Wilson
wryan at nicira.com
Wed May 21 21:08:15 UTC 2014
When adding support for OpenFlow group and bucket stats, a group entry is added
to the xlate_cache. If the main thread removes the group from an ofproto, we
need to guarantee that the group remains accessible to users of
xlate_group_actions and the xlate_cache, as the xlate_cache will not be cleaned
up until the corresponding datapath flows are revalidated.
To make modify_group compatible with group reference counting, the group is
re-created and then replaces the old group in ofproto's ofgroup hash map. Thus,
the group is never altered while users of the xlate module hold a pointer to the
group. This also eliminates the need for ofgroup's lock as all properties of
ofgroup are read-only.
Signed-off-by: Ryan Wilson <wryan at nicira.com>
---
v2: Fixed bug with group stats all buckets, cleaned up ofgroup unref code,
added Andy Zhou's test for more complete test coverage
v3: Split group reference count, support for group and bucket stats into 2
patches. Commit Andy Zhou's test patch separately.
v4: Addressed Joe's comments making it more clear why group refcount is needed
for the xlate cache.
v5: Added Joe's comments in git commit for more accuracy regarding xlate cache,
edited modify_group in ofproto.c to be compatible with group ref counting
---
ofproto/ofproto-dpif-xlate.c | 4 +-
ofproto/ofproto-dpif.c | 22 ++---
ofproto/ofproto-dpif.h | 16 ++++
ofproto/ofproto-provider.h | 25 +++---
ofproto/ofproto.c | 195 +++++++++++++++++++++++-------------------
5 files changed, 143 insertions(+), 119 deletions(-)
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a87db54..2095f37 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -854,7 +854,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
hit = group_first_live_bucket(ctx, group, depth) != NULL;
- group_dpif_release(group);
+ group_dpif_unref(group);
return hit;
}
@@ -2202,7 +2202,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
default:
OVS_NOT_REACHED();
}
- group_dpif_release(group);
+ group_dpif_unref(group);
ctx->in_group = false;
}
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 7fdd71f..c79602c 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3596,18 +3596,9 @@ group_destruct(struct ofgroup *group_)
}
static enum ofperr
-group_modify(struct ofgroup *group_, struct ofgroup *victim_)
+group_modify(struct ofgroup *group_)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
- struct group_dpif *group = group_dpif_cast(group_);
- struct group_dpif *victim = group_dpif_cast(victim_);
-
- ovs_mutex_lock(&group->stats_mutex);
- if (victim->up.n_buckets < group->up.n_buckets) {
- group_destruct__(group);
- }
- group_construct_stats(group);
- ovs_mutex_unlock(&group->stats_mutex);
ofproto->backer->need_revalidate = REV_FLOW_TABLE;
@@ -3629,6 +3620,10 @@ group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
return 0;
}
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call group_dpif_unref() after no longer needing to maintain
+ * a reference to the group. */
bool
group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
struct group_dpif **group)
@@ -3645,13 +3640,6 @@ group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
}
void
-group_dpif_release(struct group_dpif *group)
- OVS_RELEASES(group->up.rwlock)
-{
- ofproto_group_release(&group->up);
-}
-
-void
group_dpif_get_buckets(const struct group_dpif *group,
const struct list **buckets)
{
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index e49616e..345117a 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -232,6 +232,22 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
/* struct rule_dpif has struct rule as it's first member. */
#define RULE_CAST(RULE) ((struct rule *)RULE)
+#define GROUP_CAST(GROUP) ((struct ofgroup *)GROUP)
+
+static inline struct group_dpif* group_dpif_ref(struct group_dpif *group)
+{
+ if (group) {
+ ofproto_group_ref(GROUP_CAST(group));
+ }
+ return group;
+}
+
+static inline void group_dpif_unref(struct group_dpif *group)
+{
+ if (group) {
+ ofproto_group_unref(GROUP_CAST(group));
+ }
+}
static inline void rule_dpif_ref(struct rule_dpif *rule)
{
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 141adec..7a52b13 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -474,20 +474,22 @@ bool ofoperation_has_out_port(const struct ofoperation *, ofp_port_t out_port)
* With few exceptions, ofproto implementations may look at these fields but
* should not modify them. */
struct ofgroup {
- /* The rwlock is used to prevent groups from being deleted while child
- * threads are using them to xlate flows. A read lock means the
- * group is currently being used. A write lock means the group is
- * in the process of being deleted or updated. Note that since
- * a read lock on the groups container is held while searching, and
- * a group is ever write locked only while holding a write lock
- * on the container, the user's of groups will never face a group
- * in the write locked state. */
- struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
struct hmap_node hmap_node; /* In struct ofproto's "groups" hmap. */
struct ofproto *ofproto; /* The ofproto that contains this group. */
uint32_t group_id;
enum ofp11_group_type type; /* One of OFPGT_*. */
+ /* Number of references.
+ *
+ * This is needed to keep track of references to the group in the xlate
+ * module.
+ *
+ * If the main thread removes the group from an ofproto, we need to
+ * guarantee that the group remains accessible to users of
+ * xlate_group_actions and the xlate_cache, as the xlate_cache will not be
+ * cleaned up until the corresponding datapath flows are revalidated. */
+ struct ovs_refcount ref_count;
+
long long int created; /* Creation time. */
long long int modified; /* Time of last modification. */
@@ -502,6 +504,9 @@ bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
void ofproto_group_release(struct ofgroup *group)
OVS_RELEASES(group->rwlock);
+void ofproto_group_ref(struct ofgroup *);
+void ofproto_group_unref(struct ofgroup *);
+
/* ofproto class structure, to be defined by each ofproto implementation.
*
*
@@ -1684,7 +1689,7 @@ struct ofproto_class {
void (*group_destruct)(struct ofgroup *);
void (*group_dealloc)(struct ofgroup *);
- enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim);
+ enum ofperr (*group_modify)(struct ofgroup *);
enum ofperr (*group_get_stats)(const struct ofgroup *,
struct ofputil_group_stats *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e728921..fb0244a 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2647,6 +2647,30 @@ ofproto_rule_unref(struct rule *rule)
}
}
+static void
+group_destroy_cb(struct ofgroup *group)
+{
+ group->ofproto->ofproto_class->group_destruct(group);
+ ofputil_bucket_list_destroy(&group->buckets);
+ group->ofproto->ofproto_class->group_dealloc(group);
+}
+
+void
+ofproto_group_ref(struct ofgroup *group)
+{
+ if (group) {
+ ovs_refcount_ref(&group->ref_count);
+ }
+}
+
+void
+ofproto_group_unref(struct ofgroup *group)
+{
+ if (group && ovs_refcount_unref(&group->ref_count) == 1) {
+ ovsrcu_postpone(group_destroy_cb, group);
+ }
+}
+
static uint32_t get_provider_meter_id(const struct ofproto *,
uint32_t of_meter_id);
@@ -5334,16 +5358,19 @@ handle_meter_request(struct ofconn *ofconn, const struct ofp_header *request,
return 0;
}
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call ofproto_group_unref() after no longer needing to maintain
+ * a reference to the group. */
bool
ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
struct ofgroup **group)
- OVS_TRY_RDLOCK(true, (*group)->rwlock)
{
ovs_rwlock_rdlock(&ofproto->groups_rwlock);
HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
hash_int(group_id, 0), &ofproto->groups) {
if ((*group)->group_id == group_id) {
- ovs_rwlock_rdlock(&(*group)->rwlock);
+ ofproto_group_ref(*group);
ovs_rwlock_unlock(&ofproto->groups_rwlock);
return true;
}
@@ -5352,24 +5379,16 @@ ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
return false;
}
-void
-ofproto_group_release(struct ofgroup *group)
- OVS_RELEASES(group->rwlock)
-{
- ovs_rwlock_unlock(&group->rwlock);
-}
-
static bool
ofproto_group_write_lookup(const struct ofproto *ofproto, uint32_t group_id,
struct ofgroup **group)
OVS_TRY_WRLOCK(true, ofproto->groups_rwlock)
- OVS_TRY_WRLOCK(true, (*group)->rwlock)
{
ovs_rwlock_wrlock(&ofproto->groups_rwlock);
HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
hash_int(group_id, 0), &ofproto->groups) {
if ((*group)->group_id == group_id) {
- ovs_rwlock_wrlock(&(*group)->rwlock);
+ ofproto_group_ref(*group);
return true;
}
}
@@ -5432,7 +5451,6 @@ group_get_ref_count(struct ofgroup *group)
static void
append_group_stats(struct ofgroup *group, struct list *replies)
- OVS_REQ_RDLOCK(group->rwlock)
{
struct ofputil_group_stats ogs;
struct ofproto *ofproto = group->ofproto;
@@ -5476,15 +5494,15 @@ handle_group_request(struct ofconn *ofconn,
if (group_id == OFPG_ALL) {
ovs_rwlock_rdlock(&ofproto->groups_rwlock);
HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) {
- ovs_rwlock_rdlock(&group->rwlock);
+ ofproto_group_ref(group);
cb(group, &replies);
- ovs_rwlock_unlock(&group->rwlock);
+ ofproto_group_unref(group);
}
ovs_rwlock_unlock(&ofproto->groups_rwlock);
} else {
if (ofproto_group_lookup(ofproto, group_id, &group)) {
cb(group, &replies);
- ofproto_group_release(group);
+ ofproto_group_unref(group);
}
}
ofconn_send_replies(ofconn, &replies);
@@ -5584,6 +5602,43 @@ handle_queue_get_config_request(struct ofconn *ofconn,
return 0;
}
+static enum ofperr
+init_group(struct ofproto *ofproto, struct ofputil_group_mod *gm,
+ struct ofgroup **ofgroup)
+{
+ enum ofperr error;
+
+ if (gm->group_id > OFPG_MAX) {
+ return OFPERR_OFPGMFC_INVALID_GROUP;
+ }
+ if (gm->type > OFPGT11_FF) {
+ return OFPERR_OFPGMFC_BAD_TYPE;
+ }
+
+ *ofgroup = ofproto->ofproto_class->group_alloc();
+ if (!*ofgroup) {
+ VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
+ return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+ }
+
+ (*ofgroup)->ofproto = ofproto;
+ (*ofgroup)->group_id = gm->group_id;
+ (*ofgroup)->type = gm->type;
+ (*ofgroup)->created = (*ofgroup)->modified = time_msec();
+ ovs_refcount_init(&(*ofgroup)->ref_count);
+
+ list_move(&(*ofgroup)->buckets, &gm->buckets);
+ (*ofgroup)->n_buckets = list_size(&(*ofgroup)->buckets);
+
+ /* Construct called BEFORE any locks are held. */
+ error = ofproto->ofproto_class->group_construct(*ofgroup);
+ if (error) {
+ ofputil_bucket_list_destroy(&(*ofgroup)->buckets);
+ ofproto->ofproto_class->group_dealloc(*ofgroup);
+ }
+ return error;
+}
+
/* Implements OFPGC11_ADD
* in which no matching flow already exists in the flow table.
*
@@ -5603,33 +5658,10 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
struct ofgroup *ofgroup;
enum ofperr error;
- if (gm->group_id > OFPG_MAX) {
- return OFPERR_OFPGMFC_INVALID_GROUP;
- }
- if (gm->type > OFPGT11_FF) {
- return OFPERR_OFPGMFC_BAD_TYPE;
- }
-
/* Allocate new group and initialize it. */
- ofgroup = ofproto->ofproto_class->group_alloc();
- if (!ofgroup) {
- VLOG_WARN_RL(&rl, "%s: failed to create group", ofproto->name);
- return OFPERR_OFPGMFC_OUT_OF_GROUPS;
- }
-
- ovs_rwlock_init(&ofgroup->rwlock);
- ofgroup->ofproto = ofproto;
- ofgroup->group_id = gm->group_id;
- ofgroup->type = gm->type;
- ofgroup->created = ofgroup->modified = time_msec();
-
- list_move(&ofgroup->buckets, &gm->buckets);
- ofgroup->n_buckets = list_size(&ofgroup->buckets);
-
- /* Construct called BEFORE any locks are held. */
- error = ofproto->ofproto_class->group_construct(ofgroup);
+ error = init_group(ofproto, gm, &ofgroup);
if (error) {
- goto free_out;
+ return error;
}
/* We wrlock as late as possible to minimize the time we jam any other
@@ -5659,76 +5691,67 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
unlock_out:
ovs_rwlock_unlock(&ofproto->groups_rwlock);
ofproto->ofproto_class->group_destruct(ofgroup);
- free_out:
ofputil_bucket_list_destroy(&ofgroup->buckets);
ofproto->ofproto_class->group_dealloc(ofgroup);
return error;
}
+static void
+replace_group(struct ofproto *ofproto, struct ofgroup *ofgroup,
+ struct ofgroup *new_ofgroup)
+{
+ hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
+ hmap_insert(&ofproto->groups, &new_ofgroup->hmap_node,
+ hash_int(new_ofgroup->group_id, 0));
+ if (ofgroup->type != new_ofgroup->type) {
+ ofproto->n_groups[ofgroup->type]--;
+ ofproto->n_groups[new_ofgroup->type]++;
+ }
+ ofproto_group_unref(ofgroup);
+}
+
/* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code on
* failure.
*
+ * Note that the group is re-created and then replaces the old group in
+ * ofproto's ofgroup hash map. Thus, the group is never altered while users of
+ * the xlate module hold a pointer to the group.
+ *
* 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id,
* if any. */
static enum ofperr
modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm)
{
- struct ofgroup *ofgroup;
- struct ofgroup *victim;
+ struct ofgroup *ofgroup, *new_ofgroup;
enum ofperr error;
- if (gm->group_id > OFPG_MAX) {
- return OFPERR_OFPGMFC_INVALID_GROUP;
- }
-
- if (gm->type > OFPGT11_FF) {
- return OFPERR_OFPGMFC_BAD_TYPE;
- }
-
- victim = ofproto->ofproto_class->group_alloc();
- if (!victim) {
- VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
- return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+ error = init_group(ofproto, gm, &new_ofgroup);
+ if (error) {
+ return error;
}
if (!ofproto_group_write_lookup(ofproto, gm->group_id, &ofgroup)) {
error = OFPERR_OFPGMFC_UNKNOWN_GROUP;
- goto free_out;
+ ofproto_group_unref(new_ofgroup);
+ return error;
}
- /* Both group's and its container's write locks held now.
- * Also, n_groups[] is protected by ofproto->groups_rwlock. */
+
+ /* Ofproto's group write lock is held now. */
if (ofgroup->type != gm->type
&& ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
error = OFPERR_OFPGMFC_OUT_OF_GROUPS;
- goto unlock_out;
- }
-
- *victim = *ofgroup;
- list_move(&victim->buckets, &ofgroup->buckets);
-
- ofgroup->type = gm->type;
- list_move(&ofgroup->buckets, &gm->buckets);
- ofgroup->n_buckets = list_size(&ofgroup->buckets);
-
- error = ofproto->ofproto_class->group_modify(ofgroup, victim);
- if (!error) {
- ofputil_bucket_list_destroy(&victim->buckets);
- ofproto->n_groups[victim->type]--;
- ofproto->n_groups[ofgroup->type]++;
- ofgroup->modified = time_msec();
+ ofproto_group_unref(new_ofgroup);
} else {
- ofputil_bucket_list_destroy(&ofgroup->buckets);
+ /* The group creation time does not change during modification. */
+ new_ofgroup->created = ofgroup->created;
- *ofgroup = *victim;
- list_move(&ofgroup->buckets, &victim->buckets);
+ replace_group(ofproto, ofgroup, new_ofgroup);
+ ofproto->ofproto_class->group_modify(new_ofgroup);
}
- unlock_out:
- ovs_rwlock_unlock(&ofgroup->rwlock);
ovs_rwlock_unlock(&ofproto->groups_rwlock);
- free_out:
- ofproto->ofproto_class->group_dealloc(victim);
+ ofproto_group_unref(ofgroup);
return error;
}
@@ -5745,19 +5768,11 @@ delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup)
fm.out_group = ofgroup->group_id;
handle_flow_mod__(ofproto, NULL, &fm, NULL);
- /* Must wait until existing readers are done,
- * while holding the container's write lock at the same time. */
- ovs_rwlock_wrlock(&ofgroup->rwlock);
hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
/* No-one can find this group any more. */
ofproto->n_groups[ofgroup->type]--;
ovs_rwlock_unlock(&ofproto->groups_rwlock);
-
- ofproto->ofproto_class->group_destruct(ofgroup);
- ofputil_bucket_list_destroy(&ofgroup->buckets);
- ovs_rwlock_unlock(&ofgroup->rwlock);
- ovs_rwlock_destroy(&ofgroup->rwlock);
- ofproto->ofproto_class->group_dealloc(ofgroup);
+ ofproto_group_unref(ofgroup);
}
/* Implements OFPGC_DELETE. */
--
1.7.9.5
More information about the dev
mailing list