[ovs-dev] [PATCH v4 1/2] ovn-controller: Fix busy loop when sb disconnected.

Han Zhou zhouhan at gmail.com
Tue Apr 16 18:42:03 UTC 2019


From: Han Zhou <hzhou8 at ebay.com>

In the main loop, if the SB DB is disconnected when there is a pending
transaction, there can be busy loop causing 100% CPU of ovn-controller,
until SB DB is connected again.

The root cause is that when a transaction is pending, ovsdb_idl_loop_run()
will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when
ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not
satisfied and so ofctrl_run() is not executed in the main loop. If there
is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or
OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again
because those messages are not processed because ofctrl_run() is not
invoked.

This patch fixes the problem by moving ofctrl_run() above and run it
whenever br_int is not NULL, and not care about chassis because this
function doesn't depend on it.

It also moves out sbrec_chassis_set_nb_cfg() from the "if (ovs_idl_txn)"
just to avoid adding more indentation of the whole block to avoid >79
line length.

Note: the changes of this patch is better to be shown with "-w" because
most of them are indent changes.

Acked-by: Numan Siddique <nusiddiq at redhat.com>
Acked-by: Mark Michelson <mmichels at redhat.com>
Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
v3->v4: rebase on master

 ovn/controller/ovn-controller.c | 97 ++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 46 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index ad626f6..cf4907d 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -722,38 +722,40 @@ main(int argc, char *argv[])
                             &active_tunnels, &local_datapaths,
                             &local_lports, &local_lport_ids);
             }
-            if (br_int && chassis) {
-                struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
-                addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
-                               &addr_sets);
-                struct shash port_groups = SHASH_INITIALIZER(&port_groups);
-                port_groups_init(
-                    sbrec_port_group_table_get(ovnsb_idl_loop.idl),
-                    &port_groups);
-
-                patch_run(ovs_idl_txn,
-                          ovsrec_bridge_table_get(ovs_idl_loop.idl),
-                          ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
-                          ovsrec_port_table_get(ovs_idl_loop.idl),
-                          sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
-                          br_int, chassis);
 
+            if (br_int) {
                 enum mf_field_id mff_ovn_geneve = ofctrl_run(
                     br_int, &pending_ct_zones);
 
-                pinctrl_run(ovnsb_idl_txn,
-                            sbrec_datapath_binding_by_key,
-                            sbrec_port_binding_by_datapath,
-                            sbrec_port_binding_by_key,
-                            sbrec_port_binding_by_name,
-                            sbrec_mac_binding_by_lport_ip,
-                            sbrec_dns_table_get(ovnsb_idl_loop.idl),
-                            br_int, chassis,
-                            &local_datapaths, &active_tunnels);
-                update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
-                                ct_zone_bitmap, &pending_ct_zones);
-                if (ovs_idl_txn) {
-                    if (ofctrl_can_put()) {
+                if (chassis) {
+                    struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
+                    addr_sets_init(
+                        sbrec_address_set_table_get(ovnsb_idl_loop.idl),
+                        &addr_sets);
+                    struct shash port_groups = SHASH_INITIALIZER(&port_groups);
+                    port_groups_init(
+                        sbrec_port_group_table_get(ovnsb_idl_loop.idl),
+                        &port_groups);
+
+                    patch_run(ovs_idl_txn,
+                              ovsrec_bridge_table_get(ovs_idl_loop.idl),
+                              ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
+                              ovsrec_port_table_get(ovs_idl_loop.idl),
+                              sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
+                              br_int, chassis);
+
+                    pinctrl_run(ovnsb_idl_txn,
+                                sbrec_datapath_binding_by_key,
+                                sbrec_port_binding_by_datapath,
+                                sbrec_port_binding_by_key,
+                                sbrec_port_binding_by_name,
+                                sbrec_mac_binding_by_lport_ip,
+                                sbrec_dns_table_get(ovnsb_idl_loop.idl),
+                                br_int, chassis,
+                                &local_datapaths, &active_tunnels);
+                    update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
+                                    ct_zone_bitmap, &pending_ct_zones);
+                    if (ovs_idl_txn && ofctrl_can_put()) {
                         stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
                                         time_msec());
 
@@ -807,28 +809,31 @@ main(int argc, char *argv[])
                             sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
                         }
                     }
-                }
 
-                if (pending_pkt.conn) {
-                    char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s,
-                                                    &addr_sets, &port_groups);
-                    if (error) {
-                        unixctl_command_reply_error(pending_pkt.conn, error);
-                        free(error);
-                    } else {
-                        unixctl_command_reply(pending_pkt.conn, NULL);
+                    if (pending_pkt.conn) {
+                        char *error = ofctrl_inject_pkt(br_int,
+                                                        pending_pkt.flow_s,
+                                                        &addr_sets,
+                                                        &port_groups);
+                        if (error) {
+                            unixctl_command_reply_error(pending_pkt.conn,
+                                                        error);
+                            free(error);
+                        } else {
+                            unixctl_command_reply(pending_pkt.conn, NULL);
+                        }
+                        pending_pkt.conn = NULL;
+                        free(pending_pkt.flow_s);
                     }
-                    pending_pkt.conn = NULL;
-                    free(pending_pkt.flow_s);
-                }
 
-                update_sb_monitors(ovnsb_idl_loop.idl, chassis,
-                                   &local_lports, &local_datapaths);
+                    update_sb_monitors(ovnsb_idl_loop.idl, chassis,
+                                       &local_lports, &local_datapaths);
 
-                expr_const_sets_destroy(&addr_sets);
-                shash_destroy(&addr_sets);
-                expr_const_sets_destroy(&port_groups);
-                shash_destroy(&port_groups);
+                    expr_const_sets_destroy(&addr_sets);
+                    shash_destroy(&addr_sets);
+                    expr_const_sets_destroy(&port_groups);
+                    shash_destroy(&port_groups);
+                }
             }
 
             /* If we haven't handled the pending packet insertion
-- 
2.1.0



More information about the dev mailing list