[ovs-dev] [PATCH v2 1/2] ovn-controller: Fix busy loop when sb disconnected.

Han Zhou zhouhan at gmail.com
Wed Apr 3 17:07:06 UTC 2019


From: Han Zhou <hzhou8 at ebay.com>

In the main loop, if the SB DB is disconnected when there is a pending
transaction, there can be busy loop causing 100% CPU of ovn-controller,
until SB DB is connected again.

The root cause is that when a transaction is pending, ovsdb_idl_loop_run()
will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when
ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not
satisfied and so ofctrl_run() is not executed in the main loop. If there
is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or
OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again
because those messages are not processed because ofctrl_run() is not
invoked.

This patch fixes the problem by moving ofctrl_run() above and run it
whenever br_int is not NULL, and not care about chassis because this
function doesn't depend on it.

Note: the changes of this patch is better to be shown with "-w" because
most of them are indent changes.

Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
Notes:
    v1->v2: Updates according to Numan's comments.

 ovn/controller/ovn-controller.c | 197 ++++++++++++++++++++--------------------
 1 file changed, 100 insertions(+), 97 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 882cc19..e924909 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -721,116 +721,119 @@ main(int argc, char *argv[])
                             &active_tunnels, &local_datapaths,
                             &local_lports, &local_lport_ids);
             }
-            if (br_int && chassis) {
-                struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
-                addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
-                               &addr_sets);
-                struct shash port_groups = SHASH_INITIALIZER(&port_groups);
-                port_groups_init(
-                    sbrec_port_group_table_get(ovnsb_idl_loop.idl),
-                    &port_groups);
-
-                patch_run(ovs_idl_txn,
-                          ovsrec_bridge_table_get(ovs_idl_loop.idl),
-                          ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
-                          ovsrec_port_table_get(ovs_idl_loop.idl),
-                          sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
-                          br_int, chassis);
 
+            if (br_int) {
                 enum mf_field_id mff_ovn_geneve = ofctrl_run(
                     br_int, &pending_ct_zones);
 
-                pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
-                            sbrec_datapath_binding_by_key,
-                            sbrec_port_binding_by_datapath,
-                            sbrec_port_binding_by_key,
-                            sbrec_port_binding_by_name,
-                            sbrec_mac_binding_by_lport_ip,
-                            sbrec_dns_table_get(ovnsb_idl_loop.idl),
-                            br_int, chassis,
-                            &local_datapaths, &active_tunnels);
-                update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
-                                ct_zone_bitmap, &pending_ct_zones);
-                if (ovs_idl_txn) {
-                    if (ofctrl_can_put()) {
-                        stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
-                                        time_msec());
-
-                        commit_ct_zones(br_int, &pending_ct_zones);
-
-                        struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
-                        lflow_run(
-                            sbrec_chassis_by_name,
-                            sbrec_multicast_group_by_name_datapath,
-                            sbrec_port_binding_by_name,
-                            sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
-                            sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
-                            sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
-                            sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
-                            chassis,
-                            &local_datapaths, &addr_sets,
-                            &port_groups, &active_tunnels, &local_lport_ids,
-                            &flow_table, &group_table, &meter_table);
-
-                        if (chassis_id) {
-                            bfd_run(
-                                sbrec_chassis_by_name,
+                if (chassis) {
+                    struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
+                    addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
+                                   &addr_sets);
+                    struct shash port_groups = SHASH_INITIALIZER(&port_groups);
+                    port_groups_init(
+                        sbrec_port_group_table_get(ovnsb_idl_loop.idl),
+                        &port_groups);
+
+                    patch_run(ovs_idl_txn,
+                              ovsrec_bridge_table_get(ovs_idl_loop.idl),
+                              ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
+                              ovsrec_port_table_get(ovs_idl_loop.idl),
+                              sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
+                              br_int, chassis);
+
+                    pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
+                                sbrec_datapath_binding_by_key,
                                 sbrec_port_binding_by_datapath,
-                                ovsrec_interface_table_get(ovs_idl_loop.idl),
+                                sbrec_port_binding_by_key,
+                                sbrec_port_binding_by_name,
+                                sbrec_mac_binding_by_lport_ip,
+                                sbrec_dns_table_get(ovnsb_idl_loop.idl),
                                 br_int, chassis,
-                                sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
-                                &local_datapaths);
+                                &local_datapaths, &active_tunnels);
+                    update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
+                                    ct_zone_bitmap, &pending_ct_zones);
+                    if (ovs_idl_txn) {
+                        if (ofctrl_can_put()) {
+                            stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
+                                            time_msec());
+
+                            commit_ct_zones(br_int, &pending_ct_zones);
+
+                            struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
+                            lflow_run(
+                                sbrec_chassis_by_name,
+                                sbrec_multicast_group_by_name_datapath,
+                                sbrec_port_binding_by_name,
+                                sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
+                                sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
+                                sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
+                                sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
+                                chassis,
+                                &local_datapaths, &addr_sets,
+                                &port_groups, &active_tunnels, &local_lport_ids,
+                                &flow_table, &group_table, &meter_table);
+
+                            if (chassis_id) {
+                                bfd_run(
+                                    sbrec_chassis_by_name,
+                                    sbrec_port_binding_by_datapath,
+                                    ovsrec_interface_table_get(ovs_idl_loop.idl),
+                                    br_int, chassis,
+                                    sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
+                                    &local_datapaths);
+                            }
+                            physical_run(
+                                sbrec_chassis_by_name,
+                                sbrec_port_binding_by_name,
+                                sbrec_multicast_group_table_get(
+                                    ovnsb_idl_loop.idl),
+                                sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
+                                mff_ovn_geneve,
+                                br_int, chassis, &ct_zones,
+                                &local_datapaths, &local_lports,
+                                &active_tunnels,
+                                &flow_table);
+
+                            stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
+                                           time_msec());
+
+                            ofctrl_put(&flow_table, &pending_ct_zones,
+                                       sbrec_meter_table_get(ovnsb_idl_loop.idl),
+                                       get_nb_cfg(sbrec_sb_global_table_get(
+                                                      ovnsb_idl_loop.idl)));
+
+                            hmap_destroy(&flow_table);
                         }
-                        physical_run(
-                            sbrec_chassis_by_name,
-                            sbrec_port_binding_by_name,
-                            sbrec_multicast_group_table_get(
-                                ovnsb_idl_loop.idl),
-                            sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
-                            mff_ovn_geneve,
-                            br_int, chassis, &ct_zones,
-                            &local_datapaths, &local_lports,
-                            &active_tunnels,
-                            &flow_table);
-
-                        stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
-                                       time_msec());
-
-                        ofctrl_put(&flow_table, &pending_ct_zones,
-                                   sbrec_meter_table_get(ovnsb_idl_loop.idl),
-                                   get_nb_cfg(sbrec_sb_global_table_get(
-                                                  ovnsb_idl_loop.idl)));
-
-                        hmap_destroy(&flow_table);
-                    }
-                    if (ovnsb_idl_txn) {
-                        int64_t cur_cfg = ofctrl_get_cur_cfg();
-                        if (cur_cfg && cur_cfg != chassis->nb_cfg) {
-                            sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
+                        if (ovnsb_idl_txn) {
+                            int64_t cur_cfg = ofctrl_get_cur_cfg();
+                            if (cur_cfg && cur_cfg != chassis->nb_cfg) {
+                                sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
+                            }
                         }
                     }
-                }
 
-                if (pending_pkt.conn) {
-                    char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s,
-                                                    &addr_sets, &port_groups);
-                    if (error) {
-                        unixctl_command_reply_error(pending_pkt.conn, error);
-                        free(error);
-                    } else {
-                        unixctl_command_reply(pending_pkt.conn, NULL);
+                    if (pending_pkt.conn) {
+                        char *error = ofctrl_inject_pkt(br_int, pending_pkt.flow_s,
+                                                        &addr_sets, &port_groups);
+                        if (error) {
+                            unixctl_command_reply_error(pending_pkt.conn, error);
+                            free(error);
+                        } else {
+                            unixctl_command_reply(pending_pkt.conn, NULL);
+                        }
+                        pending_pkt.conn = NULL;
+                        free(pending_pkt.flow_s);
                     }
-                    pending_pkt.conn = NULL;
-                    free(pending_pkt.flow_s);
-                }
 
-                update_sb_monitors(ovnsb_idl_loop.idl, chassis,
-                                   &local_lports, &local_datapaths);
+                    update_sb_monitors(ovnsb_idl_loop.idl, chassis,
+                                       &local_lports, &local_datapaths);
 
-                expr_const_sets_destroy(&addr_sets);
-                shash_destroy(&addr_sets);
-                expr_const_sets_destroy(&port_groups);
-                shash_destroy(&port_groups);
+                    expr_const_sets_destroy(&addr_sets);
+                    shash_destroy(&addr_sets);
+                    expr_const_sets_destroy(&port_groups);
+                    shash_destroy(&port_groups);
+                }
             }
 
             /* If we haven't handled the pending packet insertion
-- 
2.1.0



More information about the dev mailing list