[ovs-dev] [PATCH ovn] ovn-controller: Fix use of dangling pointers in I-P runtime_data.

Dumitru Ceara dceara at redhat.com
Tue Oct 29 15:11:27 UTC 2019


The incremental processing engine might stop a run before the
en_runtime_data node is processed. In such cases the ed_runtime_data
fields might contain pointers to already deleted SB records. For
example, if a port binding corresponding to a patch port is removed from
the SB database and the incremental processing engine aborts before the
en_runtime_data node is processed then the corresponding local_datapath
hashtable entry in ed_runtime_data is stale and will store a pointer to
the already freed sbrec_port_binding record.

This will cause invalid memory accesses in various places (e.g.,
pinctrl_run() -> prepare_ipv6_ras()).

To fix the issue we need a way to track how each node was processed
during an engine run. This commit transforms the 'changed' field in
struct engine_node in a 'state' field. Possible node states are:
- "Stale": data in the node is not up to date with the DB.
- "Updated": data in the node is valid but was updated during
  the last run of the engine.
- "Valid": data in the node is valid and didn't change during
  the last run of the engine.
- "Aborted": during the last run, processing was aborted for
  this node.

The commit also simplifies the logic of calling engine_run and
engine_need_run in order to reduce the number of external variables
required to track the result of the last engine execution.

Functions that need to be called from the main loop and depend on
various data contents of the engine's nodes are now called only if
the data is up to date.

CC: Han Zhou <hzhou8 at ebay.com>
Fixes: a6b7d9f4f047 ("ovn-controller: Fix flow installation latency
caused by recompute.")
Signed-off-by: Dumitru Ceara <dceara at redhat.com>
---
 controller/ovn-controller.c | 144 +++++++++++++++++++++++++-------------------
 lib/inc-proc-eng.c          | 117 +++++++++++++++++++++++++++++------
 lib/inc-proc-eng.h          |  47 ++++++++++++---
 3 files changed, 221 insertions(+), 87 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 9ab98be..8e07d5e 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -758,10 +758,10 @@ en_ofctrl_is_connected_run(struct engine_node *node)
         (struct ed_type_ofctrl_is_connected *)node->data;
     if (data->connected != ofctrl_is_connected()) {
         data->connected = !data->connected;
-        node->changed = true;
+        engine_set_node_state(node, EN_UPDATED);
         return;
     }
-    node->changed = false;
+    engine_set_node_state(node, EN_VALID);
 }
 
 struct ed_type_addr_sets {
@@ -811,7 +811,7 @@ en_addr_sets_run(struct engine_node *node)
     addr_sets_init(as_table, &as->addr_sets);
 
     as->change_tracked = false;
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -830,11 +830,14 @@ addr_sets_sb_address_set_handler(struct engine_node *node)
     addr_sets_update(as_table, &as->addr_sets, &as->new,
                      &as->deleted, &as->updated);
 
-    node->changed = !sset_is_empty(&as->new) || !sset_is_empty(&as->deleted)
-                    || !sset_is_empty(&as->updated);
+    if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||
+            !sset_is_empty(&as->updated)) {
+        engine_set_node_state(node, EN_UPDATED);
+    } else {
+        engine_set_node_state(node, EN_VALID);
+    }
 
     as->change_tracked = true;
-    node->changed = true;
     return true;
 }
 
@@ -885,7 +888,7 @@ en_port_groups_run(struct engine_node *node)
     port_groups_init(pg_table, &pg->port_groups);
 
     pg->change_tracked = false;
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -904,11 +907,14 @@ port_groups_sb_port_group_handler(struct engine_node *node)
     port_groups_update(pg_table, &pg->port_groups, &pg->new,
                      &pg->deleted, &pg->updated);
 
-    node->changed = !sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted)
-                    || !sset_is_empty(&pg->updated);
+    if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
+            !sset_is_empty(&pg->updated)) {
+        engine_set_node_state(node, EN_UPDATED);
+    } else {
+        engine_set_node_state(node, EN_VALID);
+    }
 
     pg->change_tracked = true;
-    node->changed = true;
     return true;
 }
 
@@ -1091,7 +1097,7 @@ en_runtime_data_run(struct engine_node *node)
     update_ct_zones(local_lports, local_datapaths, ct_zones,
                     ct_zone_bitmap, pending_ct_zones);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -1157,10 +1163,10 @@ en_mff_ovn_geneve_run(struct engine_node *node)
     enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
     if (data->mff_ovn_geneve != mff_ovn_geneve) {
         data->mff_ovn_geneve = mff_ovn_geneve;
-        node->changed = true;
+        engine_set_node_state(node, EN_UPDATED);
         return;
     }
-    node->changed = false;
+    engine_set_node_state(node, EN_VALID);
 }
 
 struct ed_type_flow_output {
@@ -1322,7 +1328,7 @@ en_flow_output_run(struct engine_node *node)
                  active_tunnels,
                  flow_table);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -1404,7 +1410,7 @@ flow_output_sb_logical_flow_handler(struct engine_node *node)
               flow_table, group_table, meter_table, lfrr,
               conj_id_ofs);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
     return handled;
 }
 
@@ -1427,7 +1433,7 @@ flow_output_sb_mac_binding_handler(struct engine_node *node)
     lflow_handle_changed_neighbors(sbrec_port_binding_by_name,
             mac_binding_table, flow_table);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
     return true;
 }
 
@@ -1531,7 +1537,7 @@ flow_output_sb_port_binding_handler(struct engine_node *node)
             chassis, ct_zones, local_datapaths,
             active_tunnels, flow_table);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
     return true;
 }
 
@@ -1580,7 +1586,7 @@ flow_output_sb_multicast_group_handler(struct engine_node *node)
             mff_ovn_geneve, chassis, ct_zones, local_datapaths,
             flow_table);
 
-    node->changed = true;
+    engine_set_node_state(node, EN_UPDATED);
     return true;
 
 }
@@ -1694,7 +1700,9 @@ _flow_output_resource_ref_handler(struct engine_node *node,
                     conj_id_ofs, &changed)) {
             return false;
         }
-        node->changed = changed || node->changed;
+        if (changed) {
+            engine_set_node_state(node, EN_UPDATED);
+        }
     }
     SSET_FOR_EACH (ref_name, updated) {
         if (!lflow_handle_changed_ref(ref_type, ref_name,
@@ -1707,7 +1715,9 @@ _flow_output_resource_ref_handler(struct engine_node *node,
                     conj_id_ofs, &changed)) {
             return false;
         }
-        node->changed = changed || node->changed;
+        if (changed) {
+            engine_set_node_state(node, EN_UPDATED);
+        }
     }
     SSET_FOR_EACH (ref_name, new) {
         if (!lflow_handle_changed_ref(ref_type, ref_name,
@@ -1720,7 +1730,9 @@ _flow_output_resource_ref_handler(struct engine_node *node,
                     conj_id_ofs, &changed)) {
             return false;
         }
-        node->changed = changed || node->changed;
+        if (changed) {
+            engine_set_node_state(node, EN_UPDATED);
+        }
     }
 
     return true;
@@ -1942,8 +1954,6 @@ main(int argc, char *argv[])
                              &pending_pkt);
 
     uint64_t engine_run_id = 0;
-    uint64_t old_engine_run_id = 0;
-    bool engine_run_done = true;
 
     unsigned int ovs_cond_seqno = UINT_MAX;
     unsigned int ovnsb_cond_seqno = UINT_MAX;
@@ -1952,10 +1962,11 @@ main(int argc, char *argv[])
     exiting = false;
     restart = false;
     while (!exiting) {
+        engine_run_id++;
+
         update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl);
         update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl));
         ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl));
-        old_engine_run_id = engine_run_id;
 
         struct ovsdb_idl_txn *ovs_idl_txn = ovsdb_idl_loop_run(&ovs_idl_loop);
         unsigned int new_ovs_cond_seqno
@@ -2044,15 +2055,13 @@ main(int argc, char *argv[])
                              * this round of engine_run and continue processing
                              * acculated changes incrementally later when
                              * ofctrl_can_put() returns true. */
-                            if (engine_run_done) {
+                            if (!engine_aborted(&en_flow_output)) {
                                 engine_set_abort_recompute(true);
-                                engine_run_done = engine_run(&en_flow_output,
-                                                             ++engine_run_id);
+                                engine_run(&en_flow_output, engine_run_id);
                             }
                         } else {
                             engine_set_abort_recompute(false);
-                            engine_run_done = true;
-                            engine_run(&en_flow_output, ++engine_run_id);
+                            engine_run(&en_flow_output, engine_run_id);
                         }
                     }
                     stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
@@ -2066,28 +2075,42 @@ main(int argc, char *argv[])
                                     ovnsb_idl_loop.idl),
                                 sbrec_sb_global_table_get(ovnsb_idl_loop.idl));
                     }
-                    ofctrl_put(&ed_flow_output.flow_table,
-                               &ed_runtime_data.pending_ct_zones,
-                               sbrec_meter_table_get(ovnsb_idl_loop.idl),
-                               get_nb_cfg(sbrec_sb_global_table_get(
-                                              ovnsb_idl_loop.idl)),
-                               en_flow_output.changed);
-                    pinctrl_run(ovnsb_idl_txn,
-                                sbrec_datapath_binding_by_key,
-                                sbrec_port_binding_by_datapath,
-                                sbrec_port_binding_by_key,
-                                sbrec_port_binding_by_name,
-                                sbrec_mac_binding_by_lport_ip,
-                                sbrec_igmp_group,
-                                sbrec_ip_multicast,
-                                sbrec_dns_table_get(ovnsb_idl_loop.idl),
-                                sbrec_controller_event_table_get(
-                                    ovnsb_idl_loop.idl),
-                                br_int, chassis,
-                                &ed_runtime_data.local_datapaths,
-                                &ed_runtime_data.active_tunnels);
 
-                    if (en_runtime_data.changed) {
+                    /* We need to make sure that at least the runtime data
+                     * (e.g., local datapaths, ct zones) are fresh before
+                     * calling ofctrl_put and pinctrl_run to avoid using
+                     * potentially freed references to database records.
+                     */
+                    if (engine_node_data_valid(&en_runtime_data,
+                                               engine_run_id)) {
+                        ofctrl_put(&ed_flow_output.flow_table,
+                                   &ed_runtime_data.pending_ct_zones,
+                                   sbrec_meter_table_get(ovnsb_idl_loop.idl),
+                                   get_nb_cfg(sbrec_sb_global_table_get(
+                                                   ovnsb_idl_loop.idl)),
+                                   engine_node_data_changed(&en_flow_output,
+                                                            engine_run_id));
+                        pinctrl_run(ovnsb_idl_txn,
+                                    sbrec_datapath_binding_by_key,
+                                    sbrec_port_binding_by_datapath,
+                                    sbrec_port_binding_by_key,
+                                    sbrec_port_binding_by_name,
+                                    sbrec_mac_binding_by_lport_ip,
+                                    sbrec_igmp_group,
+                                    sbrec_ip_multicast,
+                                    sbrec_dns_table_get(ovnsb_idl_loop.idl),
+                                    sbrec_controller_event_table_get(
+                                        ovnsb_idl_loop.idl),
+                                    br_int, chassis,
+                                    &ed_runtime_data.local_datapaths,
+                                    &ed_runtime_data.active_tunnels);
+                    }
+
+                    /* If the runtime data changed we might need to update
+                     * the tables we need to monitor from the SB DB.
+                     */
+                    if (engine_node_data_changed(&en_runtime_data,
+                                                 engine_run_id)) {
                         update_sb_monitors(ovnsb_idl_loop.idl, chassis,
                                            &ed_runtime_data.local_lports,
                                            &ed_runtime_data.local_datapaths);
@@ -2095,17 +2118,16 @@ main(int argc, char *argv[])
                 }
 
             }
-            if (old_engine_run_id == engine_run_id || !engine_run_done) {
-                if (!engine_run_done || engine_need_run(&en_flow_output)) {
-                    VLOG_DBG("engine did not run, force recompute next time: "
-                             "br_int %p, chassis %p", br_int, chassis);
-                    engine_set_force_recompute(true);
-                    poll_immediate_wake();
-                } else {
-                    VLOG_DBG("engine did not run, and it was not needed"
-                             " either: br_int %p, chassis %p",
-                             br_int, chassis);
-                }
+            if (engine_need_run(&en_flow_output, engine_run_id)) {
+                VLOG_DBG("engine did not run, force recompute next time: "
+                            "br_int %p, chassis %p", br_int, chassis);
+                engine_set_force_recompute(true);
+                poll_immediate_wake();
+            } else if (engine_aborted(&en_flow_output)) {
+                VLOG_DBG("engine was aborted, force recompute next time: "
+                         "br_int %p, chassis %p", br_int, chassis);
+                engine_set_force_recompute(true);
+                poll_immediate_wake();
             } else {
                 engine_set_force_recompute(false);
             }
diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
index 1064a08..6ab4148 100644
--- a/lib/inc-proc-eng.c
+++ b/lib/inc-proc-eng.c
@@ -34,6 +34,13 @@ static bool engine_force_recompute = false;
 static bool engine_abort_recompute = false;
 static const struct engine_context *engine_context;
 
+static const char *engine_node_state_name[EN_STATE_MAX] = {
+    [EN_STALE]   = "Stale",
+    [EN_UPDATED] = "Updated",
+    [EN_VALID]   = "Valid",
+    [EN_ABORTED] = "Aborted",
+};
+
 void
 engine_set_force_recompute(bool val)
 {
@@ -128,24 +135,72 @@ engine_ovsdb_node_add_index(struct engine_node *node, const char *name,
     ed->n_indexes ++;
 }
 
+void
+engine_set_node_state_at(struct engine_node *node,
+                         enum engine_node_state state,
+                         const char *where)
+{
+    if (node->state == state) {
+        return;
+    }
+
+    VLOG_DBG("%s: node: %s (run-id %lu), old_state %s, new_state %s",
+             where, node->name, node->run_id,
+             engine_node_state_name[node->state],
+             engine_node_state_name[state]);
+
+    node->state = state;
+}
+
+bool
+engine_node_data_valid(struct engine_node *node, uint64_t run_id)
+{
+    return node->run_id == run_id &&
+        (node->state == EN_UPDATED || node->state == EN_VALID);
+}
+
+bool
+engine_node_data_changed(struct engine_node *node, uint64_t run_id)
+{
+    return node->run_id == run_id && node->state == EN_UPDATED;
+}
+
 bool
+engine_aborted(struct engine_node *node)
+{
+    return node->state == EN_ABORTED;
+}
+
+void
 engine_run(struct engine_node *node, uint64_t run_id)
 {
     if (node->run_id == run_id) {
-        return true;
+        /* The node was already updated in this run (could be input for
+         * multiple other nodes). Stop processing.
+         */
+        return;
     }
+
+    /* Initialize the node for this run. */
     node->run_id = run_id;
+    engine_set_node_state(node, EN_STALE);
 
-    node->changed = false;
     if (!node->n_inputs) {
+        /* Run the node handler which might change state. */
         node->run(node);
-        VLOG_DBG("node: %s, changed: %d", node->name, node->changed);
-        return true;
+        return;
     }
 
     for (size_t i = 0; i < node->n_inputs; i++) {
-        if (!engine_run(node->inputs[i].node, run_id)) {
-            return false;
+        engine_run(node->inputs[i].node, run_id);
+        if (!engine_node_data_valid(node->inputs[i].node, run_id)) {
+            /* If the input node aborted computation, move to EN_ABORTED to
+             * propagate the result, otherwise stay in EN_STALE.
+             */
+            if (engine_aborted(node->inputs[i].node)) {
+                engine_set_node_state(node, EN_ABORTED);
+            }
+            return;
         }
     }
 
@@ -155,9 +210,14 @@ engine_run(struct engine_node *node, uint64_t run_id)
     if (engine_force_recompute) {
         need_recompute = true;
     } else {
+        /* If one of the inputs updated data then we need to recompute the
+         * current node too.
+         */
         for (size_t i = 0; i < node->n_inputs; i++) {
-            if (node->inputs[i].node->changed) {
+            if (node->inputs[i].node->state == EN_UPDATED) {
                 need_compute = true;
+
+                /* Trigger a recompute if we don't have a change handler. */
                 if (!node->inputs[i].change_handler) {
                     need_recompute = true;
                     break;
@@ -171,46 +231,67 @@ engine_run(struct engine_node *node, uint64_t run_id)
                  engine_force_recompute ? "forced" : "triggered");
         if (engine_abort_recompute) {
             VLOG_DBG("node: %s, recompute aborted", node->name);
-            return false;
+            engine_set_node_state(node, EN_ABORTED);
+            return;
         }
+        /* Run the node handler which might change state. */
         node->run(node);
-    } else if (need_compute) {
+        return;
+    }
+
+    if (need_compute) {
         for (size_t i = 0; i < node->n_inputs; i++) {
-            if (node->inputs[i].node->changed) {
+            /* If the input node data changed call its change handler. */
+            if (node->inputs[i].node->state == EN_UPDATED) {
                 VLOG_DBG("node: %s, handle change for input %s",
                          node->name, node->inputs[i].node->name);
+
+                /* If the input change can't be handled incrementally, run
+                 * the node handler.
+                 */
                 if (!node->inputs[i].change_handler(node)) {
                     VLOG_DBG("node: %s, can't handle change for input %s, "
                              "fall back to recompute",
                              node->name, node->inputs[i].node->name);
                     if (engine_abort_recompute) {
                         VLOG_DBG("node: %s, recompute aborted", node->name);
-                        return false;
+                        engine_set_node_state(node, EN_ABORTED);
+                        return;
                     }
+                    /* Run the node handler which might change state. */
                     node->run(node);
-                    break;
+                    return;
                 }
             }
         }
     }
 
-    VLOG_DBG("node: %s, changed: %d", node->name, node->changed);
-    return true;
+    /* If we reached this point, either the node was updated or its state is
+     * still valid.
+     */
+    if (!engine_node_data_changed(node, run_id)) {
+        engine_set_node_state(node, EN_VALID);
+    }
 }
 
 bool
-engine_need_run(struct engine_node *node)
+engine_need_run(struct engine_node *node, uint64_t run_id)
 {
     size_t i;
 
+    if (node->run_id == run_id) {
+        return false;
+    }
+
     if (!node->n_inputs) {
         node->run(node);
-        VLOG_DBG("input node: %s, changed: %d", node->name, node->changed);
-        return node->changed;
+        VLOG_DBG("input node: %s, state: %s", node->name,
+                 engine_node_state_name[node->state]);
+        return node->state == EN_UPDATED;
     }
 
     for (i = 0; i < node->n_inputs; i++) {
-        if (engine_need_run(node->inputs[i].node)) {
+        if (engine_need_run(node->inputs[i].node, run_id)) {
             return true;
         }
     }
diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 3a69dc2..9def911 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -82,6 +82,20 @@ struct engine_node_input {
     bool (*change_handler)(struct engine_node *node);
 };
 
+enum engine_node_state {
+    EN_STALE,     /* Data in the node is not up to date with the DB. */
+    EN_UPDATED,   /* Data in the node is valid but was updated during the
+                   * last run.
+                   */
+    EN_VALID,     /* Data in the node is valid and didn't change during the
+                   * last run.
+                   */
+    EN_ABORTED,   /* During the last run, processing was aborted for
+                   * this node.
+                   */
+    EN_STATE_MAX,
+};
+
 struct engine_node {
     /* A unique id to distinguish each iteration of the engine_run(). */
     uint64_t run_id;
@@ -102,8 +116,8 @@ struct engine_node {
      * node. */
     void *data;
 
-    /* Whether the data changed in the last engine run. */
-    bool changed;
+    /* State of the node after the last engine run. */
+    enum engine_node_state state;
 
     /* Method to initialize data. It may be NULL. */
     void (*init)(struct engine_node *);
@@ -121,9 +135,9 @@ struct engine_node {
 void engine_init(struct engine_node *);
 
 /* Execute the processing recursively, which should be called in the main
- * loop. Returns true if the execution is compelte, false if it is aborted,
- * which could happen when engine_abort_recompute is set. */
-bool engine_run(struct engine_node *, uint64_t run_id);
+ * loop. Updates the engine node's states accordingly.
+ */
+void engine_run(struct engine_node *, uint64_t run_id);
 
 /* Clean up the data for the engine nodes recursively. It calls each node's
  * cleanup() method if not NULL. It should be called before the program
@@ -132,7 +146,7 @@ void engine_cleanup(struct engine_node *);
 
 /* Check if engine needs to run, i.e. any change to be processed. */
 bool
-engine_need_run(struct engine_node *);
+engine_need_run(struct engine_node *, uint64_t run_id);
 
 /* Get the input node with <name> for <node> */
 struct engine_node * engine_get_input(const char *input_name,
@@ -159,6 +173,22 @@ const struct engine_context * engine_get_context(void);
 
 void engine_set_context(const struct engine_context *);
 
+void engine_set_node_state_at(struct engine_node *node,
+                              enum engine_node_state state,
+                              const char *where);
+/* Return true if the node's data is up to date with the database contents. */
+bool engine_node_data_valid(struct engine_node *node, uint64_t run_id);
+
+/* Return true if during the last engine run the node's data was updated. */
+bool engine_node_data_changed(struct engine_node *node, uint64_t run_id);
+
+/* Returns true if during the last engine run we had to abort processing. */
+bool engine_aborted(struct engine_node *node);
+
+/* Set the state of the node and log changes. */
+#define engine_set_node_state(node, state) \
+    engine_set_node_state_at(node, state, OVS_SOURCE_LOCATOR)
+
 struct ed_ovsdb_index {
     const char *name;
     struct ovsdb_idl_index *index;
@@ -184,6 +214,7 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
     struct engine_node en_##NAME = { \
         .name = NAME_STR, \
         .data = &ed_##NAME, \
+        .state = EN_STALE, \
         .init = en_##NAME##_init, \
         .run = en_##NAME##_run, \
         .cleanup = en_##NAME##_cleanup, \
@@ -198,10 +229,10 @@ en_##DB_NAME##_##TBL_NAME##_run(struct engine_node *node) \
     const struct DB_NAME##rec_##TBL_NAME##_table *table = \
         EN_OVSDB_GET(node); \
     if (DB_NAME##rec_##TBL_NAME##_table_track_get_first(table)) { \
-        node->changed = true; \
+        engine_set_node_state(node, EN_UPDATED); \
         return; \
     } \
-    node->changed = false; \
+    engine_set_node_state(node, EN_VALID); \
 } \
 static void (*en_##DB_NAME##_##TBL_NAME##_init)(struct engine_node *node) \
             = NULL; \
-- 
1.8.3.1



More information about the dev mailing list