[ovs-dev] [PATCH v2 1/2] ovn-controller: Fix busy loop when sb disconnected.
Numan Siddique
nusiddiq at redhat.com
Wed Apr 3 17:51:08 UTC 2019
On Wed, Apr 3, 2019 at 10:37 PM Han Zhou <zhouhan at gmail.com> wrote:
> From: Han Zhou <hzhou8 at ebay.com>
>
> In the main loop, if the SB DB is disconnected when there is a pending
> transaction, there can be busy loop causing 100% CPU of ovn-controller,
> until SB DB is connected again.
>
> The root cause is that when a transaction is pending, ovsdb_idl_loop_run()
> will return NULL for ovnsb_idl_txn, and chassis_run() returns NULL when
> ovnsb_idl_txn is NULL, so the condition if (br_int && chassis) is not
> satisfied and so ofctrl_run() is not executed in the main loop. If there
> is any message pending from br-int.mgmt, such as OFPTYPE_BARRIER_REPLY or
> OFPTYPE_ECHO_REQUEST, the main loop will be woken up again and again
> because those messages are not processed because ofctrl_run() is not
> invoked.
>
> This patch fixes the problem by moving ofctrl_run() above and run it
> whenever br_int is not NULL, and not care about chassis because this
> function doesn't depend on it.
>
> Note: the changes of this patch is better to be shown with "-w" because
> most of them are indent changes.
>
> Signed-off-by: Han Zhou <hzhou8 at ebay.com>
>
Acked-by: Numan Siddique <nusiddiq at redhat.com>
> ---
> Notes:
> v1->v2: Updates according to Numan's comments.
>
> ovn/controller/ovn-controller.c | 197
> ++++++++++++++++++++--------------------
> 1 file changed, 100 insertions(+), 97 deletions(-)
>
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 882cc19..e924909 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -721,116 +721,119 @@ main(int argc, char *argv[])
> &active_tunnels, &local_datapaths,
> &local_lports, &local_lport_ids);
> }
> - if (br_int && chassis) {
> - struct shash addr_sets = SHASH_INITIALIZER(&addr_sets);
> -
> addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
> - &addr_sets);
> - struct shash port_groups =
> SHASH_INITIALIZER(&port_groups);
> - port_groups_init(
> - sbrec_port_group_table_get(ovnsb_idl_loop.idl),
> - &port_groups);
> -
> - patch_run(ovs_idl_txn,
> - ovsrec_bridge_table_get(ovs_idl_loop.idl),
> - ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
> - ovsrec_port_table_get(ovs_idl_loop.idl),
> -
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> - br_int, chassis);
>
> + if (br_int) {
> enum mf_field_id mff_ovn_geneve = ofctrl_run(
> br_int, &pending_ct_zones);
>
> - pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> - sbrec_datapath_binding_by_key,
> - sbrec_port_binding_by_datapath,
> - sbrec_port_binding_by_key,
> - sbrec_port_binding_by_name,
> - sbrec_mac_binding_by_lport_ip,
> - sbrec_dns_table_get(ovnsb_idl_loop.idl),
> - br_int, chassis,
> - &local_datapaths, &active_tunnels);
> - update_ct_zones(&local_lports, &local_datapaths,
> &ct_zones,
> - ct_zone_bitmap, &pending_ct_zones);
> - if (ovs_idl_txn) {
> - if (ofctrl_can_put()) {
> - stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> - time_msec());
> -
> - commit_ct_zones(br_int, &pending_ct_zones);
> -
> - struct hmap flow_table =
> HMAP_INITIALIZER(&flow_table);
> - lflow_run(
> - sbrec_chassis_by_name,
> - sbrec_multicast_group_by_name_datapath,
> - sbrec_port_binding_by_name,
> -
> sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
> -
> sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
> - chassis,
> - &local_datapaths, &addr_sets,
> - &port_groups, &active_tunnels,
> &local_lport_ids,
> - &flow_table, &group_table, &meter_table);
> -
> - if (chassis_id) {
> - bfd_run(
> - sbrec_chassis_by_name,
> + if (chassis) {
> + struct shash addr_sets =
> SHASH_INITIALIZER(&addr_sets);
> +
> addr_sets_init(sbrec_address_set_table_get(ovnsb_idl_loop.idl),
> + &addr_sets);
> + struct shash port_groups =
> SHASH_INITIALIZER(&port_groups);
> + port_groups_init(
> + sbrec_port_group_table_get(ovnsb_idl_loop.idl),
> + &port_groups);
> +
> + patch_run(ovs_idl_txn,
> + ovsrec_bridge_table_get(ovs_idl_loop.idl),
> +
> ovsrec_open_vswitch_table_get(ovs_idl_loop.idl),
> + ovsrec_port_table_get(ovs_idl_loop.idl),
> +
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> + br_int, chassis);
> +
> + pinctrl_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> + sbrec_datapath_binding_by_key,
> sbrec_port_binding_by_datapath,
> -
> ovsrec_interface_table_get(ovs_idl_loop.idl),
> + sbrec_port_binding_by_key,
> + sbrec_port_binding_by_name,
> + sbrec_mac_binding_by_lport_ip,
> + sbrec_dns_table_get(ovnsb_idl_loop.idl),
> br_int, chassis,
> -
> sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
> - &local_datapaths);
> + &local_datapaths, &active_tunnels);
> + update_ct_zones(&local_lports, &local_datapaths,
> &ct_zones,
> + ct_zone_bitmap, &pending_ct_zones);
> + if (ovs_idl_txn) {
> + if (ofctrl_can_put()) {
> +
> stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> + time_msec());
> +
> + commit_ct_zones(br_int, &pending_ct_zones);
> +
> + struct hmap flow_table =
> HMAP_INITIALIZER(&flow_table);
> + lflow_run(
> + sbrec_chassis_by_name,
> + sbrec_multicast_group_by_name_datapath,
> + sbrec_port_binding_by_name,
> +
> sbrec_dhcp_options_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_dhcpv6_options_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_logical_flow_table_get(ovnsb_idl_loop.idl),
> +
> sbrec_mac_binding_table_get(ovnsb_idl_loop.idl),
> + chassis,
> + &local_datapaths, &addr_sets,
> + &port_groups, &active_tunnels,
> &local_lport_ids,
> + &flow_table, &group_table, &meter_table);
> +
> + if (chassis_id) {
> + bfd_run(
> + sbrec_chassis_by_name,
> + sbrec_port_binding_by_datapath,
> +
> ovsrec_interface_table_get(ovs_idl_loop.idl),
> + br_int, chassis,
> +
> sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
> + &local_datapaths);
> + }
> + physical_run(
> + sbrec_chassis_by_name,
> + sbrec_port_binding_by_name,
> + sbrec_multicast_group_table_get(
> + ovnsb_idl_loop.idl),
> +
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> + mff_ovn_geneve,
> + br_int, chassis, &ct_zones,
> + &local_datapaths, &local_lports,
> + &active_tunnels,
> + &flow_table);
> +
> + stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
> + time_msec());
> +
> + ofctrl_put(&flow_table, &pending_ct_zones,
> +
> sbrec_meter_table_get(ovnsb_idl_loop.idl),
> +
> get_nb_cfg(sbrec_sb_global_table_get(
> +
> ovnsb_idl_loop.idl)));
> +
> + hmap_destroy(&flow_table);
> }
> - physical_run(
> - sbrec_chassis_by_name,
> - sbrec_port_binding_by_name,
> - sbrec_multicast_group_table_get(
> - ovnsb_idl_loop.idl),
> -
> sbrec_port_binding_table_get(ovnsb_idl_loop.idl),
> - mff_ovn_geneve,
> - br_int, chassis, &ct_zones,
> - &local_datapaths, &local_lports,
> - &active_tunnels,
> - &flow_table);
> -
> - stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
> - time_msec());
> -
> - ofctrl_put(&flow_table, &pending_ct_zones,
> -
> sbrec_meter_table_get(ovnsb_idl_loop.idl),
> - get_nb_cfg(sbrec_sb_global_table_get(
> - ovnsb_idl_loop.idl)));
> -
> - hmap_destroy(&flow_table);
> - }
> - if (ovnsb_idl_txn) {
> - int64_t cur_cfg = ofctrl_get_cur_cfg();
> - if (cur_cfg && cur_cfg != chassis->nb_cfg) {
> - sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
> + if (ovnsb_idl_txn) {
> + int64_t cur_cfg = ofctrl_get_cur_cfg();
> + if (cur_cfg && cur_cfg != chassis->nb_cfg) {
> + sbrec_chassis_set_nb_cfg(chassis,
> cur_cfg);
> + }
> }
> }
> - }
>
> - if (pending_pkt.conn) {
> - char *error = ofctrl_inject_pkt(br_int,
> pending_pkt.flow_s,
> - &addr_sets,
> &port_groups);
> - if (error) {
> - unixctl_command_reply_error(pending_pkt.conn,
> error);
> - free(error);
> - } else {
> - unixctl_command_reply(pending_pkt.conn, NULL);
> + if (pending_pkt.conn) {
> + char *error = ofctrl_inject_pkt(br_int,
> pending_pkt.flow_s,
> + &addr_sets,
> &port_groups);
> + if (error) {
> + unixctl_command_reply_error(pending_pkt.conn,
> error);
> + free(error);
> + } else {
> + unixctl_command_reply(pending_pkt.conn, NULL);
> + }
> + pending_pkt.conn = NULL;
> + free(pending_pkt.flow_s);
> }
> - pending_pkt.conn = NULL;
> - free(pending_pkt.flow_s);
> - }
>
> - update_sb_monitors(ovnsb_idl_loop.idl, chassis,
> - &local_lports, &local_datapaths);
> + update_sb_monitors(ovnsb_idl_loop.idl, chassis,
> + &local_lports, &local_datapaths);
>
> - expr_const_sets_destroy(&addr_sets);
> - shash_destroy(&addr_sets);
> - expr_const_sets_destroy(&port_groups);
> - shash_destroy(&port_groups);
> + expr_const_sets_destroy(&addr_sets);
> + shash_destroy(&addr_sets);
> + expr_const_sets_destroy(&port_groups);
> + shash_destroy(&port_groups);
> + }
> }
>
> /* If we haven't handled the pending packet insertion
> --
> 2.1.0
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
More information about the dev
mailing list