[ovs-dev] [PATCH v2 1/2] ovn-controller: Fix busy loop when sb disconnected.

Numan Siddique nusiddiq at redhat.com
Wed Apr 3 17:51:08 UTC 2019


On Wed, Apr 3, 2019 at 10:37 PM Han Zhou <zhouhan at gmail.com> wrote:

> From: Han Zhou <hzhou8 at ebay.com>
>
> In the main loop, if the SB DB is disconnected when there is a pending
> transaction, there can be busy loop causing 100% CPU of ovn-controller,
> until SB DB is connected again.
>
> The root cause is that when a transaction is pending, ovsdb_idl_loop_run()
> will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when
> ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not
> satisfied and so ofctrl_run() is not executed in the main loop. If there
> is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or
> OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again
> because those messages are not processed because ofctrl_run() is not
> invoked.
>
> This patch fixes the problem by moving ofctrl_run() above and run it
> whenever br_int is not NULL, and not care about chassis because this
> function doesn't depend on it.
>
> Note: the changes of this patch is better to be shown with "-w" because
> most of them are indent changes.
>
> Signed-off-by: Han Zhou <hzhou8 at ebay.com>
>

Acked-by: Numan Siddique <nusiddiq at redhat.com>


> ---
> Notes:
>     v1->v2: Updates according to Numan's comments.
>
>  ovn/controller/ovn-controller.c | 197
> ++++++++++++++++++++--------------------
>  1 file changed, 100 insertions(+), 97 deletions(-)
>
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 882cc19..e924909 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -721,116 +721,119 @@ main(int argc, char *argv[])
>                              &active_tunnels, &local_datapaths,
>                              &local_lports, &local_lport_ids);
>              }
> -            if (br_int && chassis) {
> -                struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
> -
> addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
> -                               &addr_sets);
> -                struct shash port_groups =
> SHASH_INITIALIZER(&port_groups);
> -                port_groups_init(
> -                    sbrec_port_group_table_get(ovnsb_idl_loop.idl),
> -                    &port_groups);
> -
> -                patch_run(ovs_idl_txn,
> -                          ovsrec_bridge_table_get(ovs_idl_loop.idl),
> -                          ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
> -                          ovsrec_port_table_get(ovs_idl_loop.idl),
> -
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> -                          br_int, chassis);
>
> +            if (br_int) {
>                  enum mf_field_id mff_ovn_geneve = ofctrl_run(
>                      br_int, &pending_ct_zones);
>
> -                pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> -                            sbrec_datapath_binding_by_key,
> -                            sbrec_port_binding_by_datapath,
> -                            sbrec_port_binding_by_key,
> -                            sbrec_port_binding_by_name,
> -                            sbrec_mac_binding_by_lport_ip,
> -                            sbrec_dns_table_get(ovnsb_idl_loop.idl),
> -                            br_int, chassis,
> -                            &local_datapaths, &active_tunnels);
> -                update_ct_zones(&local_lports, &local_datapaths,
> &ct_zones,
> -                                ct_zone_bitmap, &pending_ct_zones);
> -                if (ovs_idl_txn) {
> -                    if (ofctrl_can_put()) {
> -                        stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> -                                        time_msec());
> -
> -                        commit_ct_zones(br_int, &pending_ct_zones);
> -
> -                        struct hmap flow_table =
> HMAP_INITIALIZER(&flow_table);
> -                        lflow_run(
> -                            sbrec_chassis_by_name,
> -                            sbrec_multicast_group_by_name_datapath,
> -                            sbrec_port_binding_by_name,
> -
> sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
> -                            chassis,
> -                            &local_datapaths, &addr_sets,
> -                            &port_groups, &active_tunnels,
> &local_lport_ids,
> -                            &flow_table, &group_table, &meter_table);
> -
> -                        if (chassis_id) {
> -                            bfd_run(
> -                                sbrec_chassis_by_name,
> +                if (chassis) {
> +                    struct shash addr_sets =
> SHASH_INITIALIZER(&addr_sets);
> +
> addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
> +                                   &addr_sets);
> +                    struct shash port_groups =
> SHASH_INITIALIZER(&port_groups);
> +                    port_groups_init(
> +                        sbrec_port_group_table_get(ovnsb_idl_loop.idl),
> +                        &port_groups);
> +
> +                    patch_run(ovs_idl_txn,
> +                              ovsrec_bridge_table_get(ovs_idl_loop.idl),
> +
> ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
> +                              ovsrec_port_table_get(ovs_idl_loop.idl),
> +
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> +                              br_int, chassis);
> +
> +                    pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> +                                sbrec_datapath_binding_by_key,
>                                  sbrec_port_binding_by_datapath,
> -
> ovsrec_interface_table_get(ovs_idl_loop.idl),
> +                                sbrec_port_binding_by_key,
> +                                sbrec_port_binding_by_name,
> +                                sbrec_mac_binding_by_lport_ip,
> +                                sbrec_dns_table_get(ovnsb_idl_loop.idl),
>                                  br_int, chassis,
> -
> sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
> -                                &local_datapaths);
> +                                &local_datapaths, &active_tunnels);
> +                    update_ct_zones(&local_lports, &local_datapaths,
> &ct_zones,
> +                                    ct_zone_bitmap, &pending_ct_zones);
> +                    if (ovs_idl_txn) {
> +                        if (ofctrl_can_put()) {
> +
> stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> +                                            time_msec());
> +
> +                            commit_ct_zones(br_int, &pending_ct_zones);
> +
> +                            struct hmap flow_table =
> HMAP_INITIALIZER(&flow_table);
> +                            lflow_run(
> +                                sbrec_chassis_by_name,
> +                                sbrec_multicast_group_by_name_datapath,
> +                                sbrec_port_binding_by_name,
> +
> sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
> +                                chassis,
> +                                &local_datapaths, &addr_sets,
> +                                &port_groups, &active_tunnels,
> &local_lport_ids,
> +                                &flow_table, &group_table, &meter_table);
> +
> +                            if (chassis_id) {
> +                                bfd_run(
> +                                    sbrec_chassis_by_name,
> +                                    sbrec_port_binding_by_datapath,
> +
> ovsrec_interface_table_get(ovs_idl_loop.idl),
> +                                    br_int, chassis,
> +
> sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
> +                                    &local_datapaths);
> +                            }
> +                            physical_run(
> +                                sbrec_chassis_by_name,
> +                                sbrec_port_binding_by_name,
> +                                sbrec_multicast_group_table_get(
> +                                    ovnsb_idl_loop.idl),
> +
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> +                                mff_ovn_geneve,
> +                                br_int, chassis, &ct_zones,
> +                                &local_datapaths, &local_lports,
> +                                &active_tunnels,
> +                                &flow_table);
> +
> +                            stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
> +                                           time_msec());
> +
> +                            ofctrl_put(&flow_table, &pending_ct_zones,
> +
>  sbrec_meter_table_get(ovnsb_idl_loop.idl),
> +
>  get_nb_cfg(sbrec_sb_global_table_get(
> +
> ovnsb_idl_loop.idl)));
> +
> +                            hmap_destroy(&flow_table);
>                          }
> -                        physical_run(
> -                            sbrec_chassis_by_name,
> -                            sbrec_port_binding_by_name,
> -                            sbrec_multicast_group_table_get(
> -                                ovnsb_idl_loop.idl),
> -
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> -                            mff_ovn_geneve,
> -                            br_int, chassis, &ct_zones,
> -                            &local_datapaths, &local_lports,
> -                            &active_tunnels,
> -                            &flow_table);
> -
> -                        stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
> -                                       time_msec());
> -
> -                        ofctrl_put(&flow_table, &pending_ct_zones,
> -
>  sbrec_meter_table_get(ovnsb_idl_loop.idl),
> -                                   get_nb_cfg(sbrec_sb_global_table_get(
> -                                                  ovnsb_idl_loop.idl)));
> -
> -                        hmap_destroy(&flow_table);
> -                    }
> -                    if (ovnsb_idl_txn) {
> -                        int64_t cur_cfg = ofctrl_get_cur_cfg();
> -                        if (cur_cfg && cur_cfg != chassis->nb_cfg) {
> -                            sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
> +                        if (ovnsb_idl_txn) {
> +                            int64_t cur_cfg = ofctrl_get_cur_cfg();
> +                            if (cur_cfg && cur_cfg != chassis->nb_cfg) {
> +                                sbrec_chassis_set_nb_cfg(chassis,
> cur_cfg);
> +                            }
>                          }
>                      }
> -                }
>
> -                if (pending_pkt.conn) {
> -                    char *error = ofctrl_inject_pkt(br_int,
> pending_pkt.flow_s,
> -                                                    &addr_sets,
> &port_groups);
> -                    if (error) {
> -                        unixctl_command_reply_error(pending_pkt.conn,
> error);
> -                        free(error);
> -                    } else {
> -                        unixctl_command_reply(pending_pkt.conn, NULL);
> +                    if (pending_pkt.conn) {
> +                        char *error = ofctrl_inject_pkt(br_int,
> pending_pkt.flow_s,
> +                                                        &addr_sets,
> &port_groups);
> +                        if (error) {
> +                            unixctl_command_reply_error(pending_pkt.conn,
> error);
> +                            free(error);
> +                        } else {
> +                            unixctl_command_reply(pending_pkt.conn, NULL);
> +                        }
> +                        pending_pkt.conn = NULL;
> +                        free(pending_pkt.flow_s);
>                      }
> -                    pending_pkt.conn = NULL;
> -                    free(pending_pkt.flow_s);
> -                }
>
> -                update_sb_monitors(ovnsb_idl_loop.idl, chassis,
> -                                   &local_lports, &local_datapaths);
> +                    update_sb_monitors(ovnsb_idl_loop.idl, chassis,
> +                                       &local_lports, &local_datapaths);
>
> -                expr_const_sets_destroy(&addr_sets);
> -                shash_destroy(&addr_sets);
> -                expr_const_sets_destroy(&port_groups);
> -                shash_destroy(&port_groups);
> +                    expr_const_sets_destroy(&addr_sets);
> +                    shash_destroy(&addr_sets);
> +                    expr_const_sets_destroy(&port_groups);
> +                    shash_destroy(&port_groups);
> +                }
>              }
>
>              /* If we haven't handled the pending packet insertion
> --
> 2.1.0
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list