[ovs-dev] [PATCH] ofproto: add refcount to ofproto to fix crash at ofproto_destroy
hepeng.0320
hepeng.0320 at bytedance.com
Thu Jul 16 13:55:17 UTC 2020
From: Peng He <hepeng.0320 at bytedance.com>
The call stack of rule_destroy_cb:
remove_rules_postponed (one grace period)
-> remove_rule_rcu
-> remove_rule_rcu__
-> ofproto_rule_unref -> ref count != 1
-> ... more grace periods.
-> rule_destroy_cb (> 2 grace periods)
-> free
Currently ofproto_destory waits only two grace periods, this means when
rule_destroy_cb is called, the rule->ofproto might be a dangling pointer.
This patch adds a refcount for ofproto to ensure ofproto exists when it
is needed to call free functions.
This patch fixes the crashes found:
Program terminated with signal SIGSEGV, Segmentation fault.
0 0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
1 0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
2 0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
3 0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
4 0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
5 0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
(gdb) p rule->ofproto->ofproto_class
$3 = (const struct ofproto_class *) 0x0
Also:
0 ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
1 0x0000558354c68514 in xlate_push_stats_entry (entry=entry at entry=0x7ff488031398, stats=stats at entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
2 0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats at entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
3 0x0000558354c5411a in revalidate_ukey (udpif=udpif at entry=0x5583583baba0, ukey=ukey at entry=0x7ff47809f770, stats=stats at entry=0x7ff49af32128, odp_actions=odp_actions at entry=0x7ff49af30c50, reval_seq=reval_seq at entry=275670456,
recircs=recircs at entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
4 0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
5 0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
6 0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
7 0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
8 0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
(gdb) p ofproto->up.ofproto_class
$3 = (const struct ofproto_class *) 0x0
Signed-off-by: Peng He <hepeng.0320 at bytedance.com>
---
ofproto/ofproto-dpif-xlate-cache.c | 1 +
ofproto/ofproto-dpif.c | 22 +++++++++--------
ofproto/ofproto-provider.h | 2 ++
ofproto/ofproto.c | 39 +++++++++++++++++++++++++++---
ofproto/ofproto.h | 4 +++
5 files changed, 54 insertions(+), 14 deletions(-)
diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
index dcc91cb38..0deee365d 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
{
switch (entry->type) {
case XC_TABLE:
+ ofproto_unref(&(entry->table.ofproto->up));
break;
case XC_RULE:
ofproto_rule_unref(&entry->rule->up);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 4f0638f23..6480b3c78 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4414,11 +4414,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
}
if (xcache) {
struct xc_entry *entry;
-
- entry = xlate_cache_add_entry(xcache, XC_TABLE);
- entry->table.ofproto = ofproto;
- entry->table.id = *table_id;
- entry->table.match = true;
+ if (ofproto_try_ref(&ofproto->up)) {
+ entry = xlate_cache_add_entry(xcache, XC_TABLE);
+ entry->table.ofproto = ofproto;
+ entry->table.id = *table_id;
+ entry->table.match = true;
+ }
}
return rule;
}
@@ -4450,11 +4451,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
}
if (xcache) {
struct xc_entry *entry;
-
- entry = xlate_cache_add_entry(xcache, XC_TABLE);
- entry->table.ofproto = ofproto;
- entry->table.id = next_id;
- entry->table.match = (rule != NULL);
+ if (ofproto_try_ref(&ofproto->up)) {
+ entry = xlate_cache_add_entry(xcache, XC_TABLE);
+ entry->table.ofproto = ofproto;
+ entry->table.id = next_id;
+ entry->table.match = (rule != NULL);
+ }
}
if (rule) {
goto out; /* Match. */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index afecb24cb..34074fd62 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -139,6 +139,8 @@ struct ofproto {
/* Variable length mf_field mapping. Stores all configured variable length
* meta-flow fields (struct mf_field) in a switch. */
struct vl_mff_map vl_mff_map;
+
+ struct ovs_refcount refcount;
};
void ofproto_init_tables(struct ofproto *, int n_tables);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 59f06aa94..704ac4146 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -474,6 +474,24 @@ ofproto_bump_tables_version(struct ofproto *ofproto)
ofproto->tables_version);
}
+void
+ofproto_ref(struct ofproto *ofproto)
+{
+ ovs_refcount_ref(&ofproto->refcount);
+}
+
+bool
+ofproto_try_ref(struct ofproto *ofproto)
+{
+ return ovs_refcount_try_ref_rcu(&ofproto->refcount);
+}
+
+void
+ofproto_unref(struct ofproto *ofproto)
+{
+ ovs_refcount_unref(&ofproto->refcount);
+}
+
int
ofproto_create(const char *datapath_name, const char *datapath_type,
struct ofproto **ofprotop)
@@ -545,6 +563,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
ovs_mutex_init(&ofproto->vl_mff_map.mutex);
cmap_init(&ofproto->vl_mff_map.cmap);
+ ovs_refcount_init(&ofproto->refcount);
error = ofproto->ofproto_class->construct(ofproto);
if (error) {
@@ -1696,14 +1715,24 @@ ofproto_destroy__(struct ofproto *ofproto)
ofproto->ofproto_class->dealloc(ofproto);
}
-/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
- * - 1st we defer the removal of the rules from the classifier
- * - 2nd we defer the actual destruction of the rules. */
+/* destroying a rule has a very long calling chains, and may have
+ * to wait multiple grace periods:
+ *
+ * for example:
+ * remove_rules_postponed -> remove_rule_rcu -> ofproto_rule_unref
+ * -> ovsrcu_postpone(rule_destroy_cb, rule)
+ *
+ * So we have to check if the ofproto refcount reaches to 1 for sure
+ * all the rules have been destoryed.
+ */
static void
ofproto_destroy_defer__(struct ofproto *ofproto)
OVS_EXCLUDED(ofproto_mutex)
{
- ovsrcu_postpone(ofproto_destroy__, ofproto);
+ if (ovs_refcount_read(&ofproto->refcount) != 1) {
+ ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
+ } else
+ ovsrcu_postpone(ofproto_destroy__, ofproto);
}
void
@@ -2927,6 +2956,7 @@ ofproto_rule_destroy__(struct rule *rule)
cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
rule_actions_destroy(rule_get_actions(rule));
ovs_mutex_destroy(&rule->mutex);
+ ofproto_unref(rule->ofproto);
rule->ofproto->ofproto_class->rule_dealloc(rule);
}
@@ -5265,6 +5295,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
/* Initialize base state. */
*CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
+ ofproto_ref(ofproto);
cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
ovs_refcount_init(&rule->ref_count);
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 2dd253167..3d3937b35 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -562,6 +562,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
uint8_t table_id);
+void ofproto_ref(struct ofproto *p);
+void ofproto_unref(struct ofproto *p);
+bool ofproto_try_ref(struct ofproto *p);
+
#ifdef __cplusplus
}
#endif
--
2.20.1
More information about the dev
mailing list