[ovs-dev] [PATCH] ofproto: Fix resource usage explosion due to removal of large number of flows.

Ilya Maximets i.maximets at ovn.org
Mon Nov 22 15:23:10 UTC 2021


While removing flows, removal itself is deferred, so classifier changes
performed already from the RCU thread.  This way every deferred removal
triggers classifier change and reallocation of a pvector.  Freeing of
old version of a pvector is postponed.  Since all this is happening
from an RCU thread, all these copies of the same pvector will be freed
only after the next grace period.

Below is the example output of the 'valgrind --tool=massif' from an OVN
deployment, where copies of that pvector took 5 GB of memory while
processing a bundled flow removal:

 -------------------------------------------------------------------
   n        time(i)         total(B)   useful-heap(B) extra-heap(B)
 -------------------------------------------------------------------
  89 176,257,987,954    5,329,763,160    5,318,171,607    11,591,553
 99.78% (5,318,171,607B) (heap allocation functions) malloc/new/new[]
 ->98.45% (5,247,008,392B) xmalloc__ (util.c:137)
 |->98.17% (5,232,137,408B) pvector_impl_dup (pvector.c:48)
 ||->98.16% (5,231,472,896B) pvector_remove (pvector.c:159)
 |||->98.16% (5,231,472,800B) destroy_subtable (classifier.c:1558)
 ||||->98.16% (5,231,472,800B) classifier_remove (classifier.c:792)
 |||| ->98.16% (5,231,472,800B) classifier_remove_assert (classifier.c:832)
 ||||  ->98.16% (5,231,472,800B) remove_rule_rcu__ (ofproto.c:2978)
 ||||   ->98.16% (5,231,472,800B) remove_rule_rcu (ofproto.c:2990)
 ||||    ->98.16% (5,231,472,800B) ovsrcu_call_postponed (ovs-rcu.c:346)
 ||||     ->98.16% (5,231,472,800B) ovsrcu_postpone_thread (ovs-rcu.c:362)
 ||||      ->98.16% (5,231,472,800B) ovsthread_wrapper
 ||||       ->98.16% (5,231,472,800B) start_thread
 ||||        ->98.16% (5,231,472,800B) clone

Collecting all the flows to be removed and postponing removal for
all of them together to avoid the problem.  This way all removals
will trigger only a single pvector re-allocation greatly reducing
the CPU and memory usage.

Reported-by: Vladislav Odintsov <odivlad at gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389538.html
Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
---
 ofproto/ofproto-provider.h |  4 ++++
 ofproto/ofproto.c          | 31 +++++++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 57c7d17cb..04c97102f 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -66,6 +66,7 @@ struct bfd_cfg;
 struct meter;
 struct ofoperation;
 struct ofproto_packet_out;
+struct rule_collection;
 struct smap;
 
 extern struct ovs_mutex ofproto_mutex;
@@ -115,6 +116,9 @@ struct ofproto {
     /* List of expirable flows, in all flow tables. */
     struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex);
 
+    /* List of flows to remove from flow tables. */
+    struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex);
+
     /* Meter table.  */
     struct ofputil_meter_features meter_features;
     struct hmap meters;             /* uint32_t indexed 'struct meter *'.  */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index bd6103b1c..cbc376f63 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *)
     OVS_REQUIRES(ofproto_mutex);
 static void ofproto_rule_remove__(struct ofproto *, struct rule *)
     OVS_REQUIRES(ofproto_mutex);
+static void remove_rules_postponed(struct rule_collection *)
+    OVS_REQUIRES(ofproto_mutex);
 
 /* The source of an OpenFlow request.
  *
@@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
     hindex_init(&ofproto->cookies);
     hmap_init(&ofproto->learned_cookies);
     ovs_list_init(&ofproto->expirable);
+    ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove);
+    rule_collection_init(ofproto->to_remove);
     ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
     ofproto->min_mtu = INT_MAX;
     cmap_init(&ofproto->groups);
@@ -1631,6 +1635,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del)
     }
     ofproto_group_delete_all__(ofproto);
     meter_delete_all(ofproto);
+    remove_rules_postponed(ofproto->to_remove);
     /* XXX: Concurrent handler threads may insert new learned flows based on
      * learn actions of the now deleted flows right after we release
      * 'ofproto_mutex'. */
@@ -1682,6 +1687,11 @@ ofproto_destroy__(struct ofproto *ofproto)
     ovs_assert(hmap_is_empty(&ofproto->learned_cookies));
     hmap_destroy(&ofproto->learned_cookies);
 
+    ovs_mutex_lock(&ofproto_mutex);
+    rule_collection_destroy(ofproto->to_remove);
+    free(ofproto->to_remove);
+    ovs_mutex_unlock(&ofproto_mutex);
+
     ofproto->ofproto_class->dealloc(ofproto);
 }
 
@@ -1878,6 +1888,9 @@ ofproto_run(struct ofproto *p)
 
     connmgr_run(p->connmgr, handle_openflow);
 
+    ovs_mutex_lock(&ofproto_mutex);
+    remove_rules_postponed(p->to_remove);
+    ovs_mutex_unlock(&ofproto_mutex);
     return error;
 }
 
@@ -4437,6 +4450,20 @@ rule_criteria_destroy(struct rule_criteria *criteria)
     criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */
 }
 
+/* Adds rules to the 'to_remove' collection, so they can be destroyed
+ * later all together.  Destroys 'rules'. */
+static void
+rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules)
+    OVS_REQUIRES(ofproto_mutex)
+{
+    struct rule *rule;
+
+    RULE_COLLECTION_FOR_EACH (rule, rules) {
+        rule_collection_add(ofproto->to_remove, rule);
+    }
+    rule_collection_destroy(rules);
+}
+
 /* Schedules postponed removal of rules, destroys 'rules'. */
 static void
 remove_rules_postponed(struct rule_collection *rules)
@@ -5833,7 +5860,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
             }
         }
         learned_cookies_flush(ofproto, &dead_cookies);
-        remove_rules_postponed(old_rules);
+        rules_mark_for_removal(ofproto, old_rules);
     }
 
     return error;
@@ -5941,7 +5968,7 @@ delete_flows_finish__(struct ofproto *ofproto,
             learned_cookies_dec(ofproto, rule_get_actions(rule),
                                 &dead_cookies);
         }
-        remove_rules_postponed(rules);
+        rules_mark_for_removal(ofproto, rules);
 
         learned_cookies_flush(ofproto, &dead_cookies);
     }
-- 
2.31.1



More information about the dev mailing list