[ovs-dev] What's the problem of high cpu usage of pmd thread in ovs-dpdk version 2.13

Simon Jones batmanustc at gmail.com
Sat Oct 9 06:35:35 UTC 2021


This is perf result, it shows mlx5_rx_burst_vec function is called
frequently. But there is no packet and no ovs-flows in datapath.


[root at host01 temp]# cat /tmp/perf_record_a/perf_report_perf_record_1.txt
> # To display the perf.data header info, please use --header/--header-only
> options.
> #
> #
> # Total Lost Samples: 1764
> #
> # Samples: 20K of event 'cycles:ppp'
> # Event count (approx.): 365753201591
> #
> # Overhead  Command       Shared Object       Symbol
> # ........  ............  ..................
>  .......................................
> #
>     39.22%  pmd-c01/id:7  ovs-vswitchd        [.] mlx5_rx_burst_vec
>      9.44%  pmd-c01/id:7  [vdso]              [.] __vdso_clock_gettime
>      8.80%  pmd-c01/id:7  ovs-vswitchd        [.] ccmap_try_inc
>      7.46%  pmd-c01/id:7  ovs-vswitchd        [.] ccmap_dec
>      6.87%  pmd-c01/id:7  ovs-vswitchd        [.] ofp_to_string__
>      6.52%  pmd-c01/id:7  [kernel.kallsyms]   [k] nmi
>      5.43%  pmd-c01/id:7  ovs-vswitchd        [.]
> rstp_set_bridge_forward_delay
>      4.55%  pmd-c01/id:7  ovs-vswitchd        [.] ct_dpif_format_entry
>      4.22%  pmd-c01/id:7  ovs-vswitchd        [.] ct_dpif_parse_tuple
>      1.57%  pmd-c01/id:7  ovs-vswitchd        [.] ofp_print_bit_names
>      1.48%  pmd-c01/id:7  ovs-vswitchd        [.] hindex_remove
>      1.32%  pmd-c01/id:7  ovs-vswitchd        [.]
> ct_dpif_parse_zone_limit_tuple
>      0.98%  pmd-c01/id:7  libc-2.17.so        [.] __clock_gettime
>      0.60%  pmd-c01/id:7  ovs-vswitchd        [.]
> rstp_set_bridge_transmit_hold_count
>      0.43%  pmd-c01/id:7  ovs-vswitchd        [.] rstp_set_bridge_address
>      0.41%  pmd-c01/id:7  libpthread-2.17.so  [.] __pthread_once
>      0.15%  pmd-c01/id:7  ovs-vswitchd        [.] search_sse_4
>      0.12%  pmd-c01/id:7  ovs-vswitchd        [.] 0x0000000000bd1a4f
>      0.09%  pmd-c01/id:7  ovs-vswitchd        [.] 0x0000000000bd1a4a
>      0.07%  pmd-c01/id:7  [kernel.kallsyms]   [k] read_tsc
>      0.06%  pmd-c01/id:7  [kernel.kallsyms]   [k] _raw_qspin_lock
>      0.04%  pmd-c01/id:7  ovs-vswitchd        [.] ofpacts_format
>      0.03%  pmd-c01/id:7  [kernel.kallsyms]   [k] update_vsyscall
>      0.02%  pmd-c01/id:7  ovs-vswitchd        [.] __gmon_start__ at plt
>      0.02%  11_scheduler  beam.smp            [.] 0x00000000000ed12f
>      0.02%  pmd-c01/id:7  ovs-vswitchd        [.] ipf_preprocess_conntrack
>      0.01%  pmd-c01/id:7  ovs-vswitchd        [.] cmap_try_insert
>      0.01%  11_scheduler  beam.smp            [.] 0x00000000000ed140
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k]
> retint_userspace_restore_args
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] irq_return
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] _raw_spin_lock_irqsave
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] sk_run_filter
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] apic_timer_interrupt
>      0.00%  11_scheduler  beam.smp            [.] 0x00000000000ed12b
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] trigger_load_balance
>      0.00%  11_scheduler  beam.smp            [.] 0x00000000000ed13e
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] __x86_indirect_thunk_rax
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] tick_do_update_jiffies64
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] __local_bh_enable
>      0.00%  pmd-c01/id:7  ovs-vswitchd        [.] ofpmp_append
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] native_write_msr_safe
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] notifier_call_chain
>      0.00%  11_scheduler  beam.smp            [.] 0x00000000000729e4
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k]
> ktime_get_update_offsets_now
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] pvclock_gtod_notify
>      0.00%  pmd-c01/id:7  libpthread-2.17.so  [.] pthread_getspecific
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] change_pte_range
>      0.00%  11_scheduler  [kernel.kallsyms]   [k]
> retint_userspace_restore_args
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] raw_notifier_call_chain
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] account_entity_dequeue
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] irq_exit
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] clockevents_program_event
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] calc_global_load
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] update_cfs_shares
>      0.00%  11_scheduler  libc-2.17.so        [.] _getopt_long_only_r
>      0.00%  pmd-c01/id:7  ovs-vswitchd        [.] ofpacts_get_meter
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] run_timer_softirq
>      0.00%  pmd-c01/id:7  libpthread-2.17.so  [.] pthread_mutex_unlock
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] update_fast_timekeeper
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] handle_mm_fault
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] _raw_qspin_lock_irq
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] idle_cpu
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] smp_apic_timer_interrupt
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] __schedule
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] account_entity_dequeue
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] cpuacct_account_field
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] seccomp_bpf_load
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] intel_pstate_update_util
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] tick_sched_timer
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] intel_pstate_update_util
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] hrtimer_forward
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] __hrtimer_run_queues
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] rcu_process_gp_end
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] timerqueue_del
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] ktime_get
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] update_curr
>      0.00%  11_scheduler  beam.smp            [.] 0x00000000000ed136
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] profile_tick
>      0.00%  pmd-c01/id:7  libpthread-2.17.so  [.] pthread_mutex_trylock
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] call_softirq
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] irq_enter
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] get_futex_key
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] irq_return
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] __update_cpu_load
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] native_read_msr_safe
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] pick_next_entity
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] rcu_note_context_switch
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] account_entity_enqueue
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] task_tick_fair
>      0.00%  pmd-c01/id:7  ovs-vswitchd        [.] pinsched_run
>      0.00%  11_scheduler  [kernel.kallsyms]   [k] system_call_after_swapgs
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] x86_pmu_disable
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] hrtimer_run_queues
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] update_min_vruntime
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] set_normalized_timespec
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] tick_program_event
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] update_wall_time
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] ioremap_page_range
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] acpi_map_lookup
>      0.00%  pmd-c01/id:7  [kernel.kallsyms]   [k] intel_bts_enable_local
>
>
> #
> # (Tip: Create an archive with symtabs to analyse on other machine: perf
> archive)
> #
> [root at host01 temp]# cat /tmp/perf_record_a/pmds.txt
> pmd-c01/id:7 with PID 23444 in on pCPU 1
> [root at host01 temp]# cat /tmp/perf_record_a/p
> perf_report_perf_record_1.txt     perf_report_perf_record_-g_1.txt
>  pmds.txt
> [root at host01 temp]# cat
> /tmp/perf_record_a/perf_report_perf_record_-g_1.txt
> # To display the perf.data header info, please use --header/--header-only
> options.
> #
> #
> # Total Lost Samples: 2582
> #
> # Samples: 30K of event 'cycles:ppp'
> # Event count (approx.): 223748262979
> #
> # Children      Self  Command       Shared Object       Symbol
> # ........  ........  ............  ..................
>  ..............................................
> #
>     42.51%    37.30%  pmd-c01/id:7  ovs-vswitchd        [.]
> mlx5_rx_burst_vec
>             |
>             |--35.83%--mlx5_rx_burst_vec
>             |
>             |--3.85%--0x7f23473fb700
>             |          0x154dde0
>             |          |
>             |           --3.73%--mlx5_rx_burst_vec
>             |
>              --2.74%--0
>                        0x508c2ac0
>                        mlx5_rx_burst_vec
>
>     15.20%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0000000000000000
>             |
>             ---0
>                |
>                |--4.37%--__clock_gettime
>                |          __vdso_clock_gettime
>                |
>                |--3.83%--ccmap_try_inc
>                |
>                |--3.28%--0x508c2ac0
>                |          mlx5_rx_burst_vec
>                |
>                |--2.46%--ct_dpif_parse_tuple
>                |
>                 --0.92%--ct_dpif_parse_zone_limit_tuple
>
>      9.54%     9.21%  pmd-c01/id:7  ovs-vswitchd        [.] ccmap_dec
>             |
>              --8.97%--ccmap_dec
>
>      9.49%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0x000000000154dde0
>             |
>             ---0x154dde0
>                |
>                |--4.35%--rstp_set_bridge_forward_delay
>                |
>                |--4.03%--mlx5_rx_burst_vec
>                |
>                 --0.78%--rstp_set_bridge_transmit_hold_count
>
>      9.37%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0x00007f23473fb700
>             |
>             ---0x7f23473fb700
>                0x154dde0
>                |
>                |--4.32%--rstp_set_bridge_forward_delay
>                |
>                |--3.95%--mlx5_rx_burst_vec
>                |
>                 --0.78%--rstp_set_bridge_transmit_hold_count
>
>      8.88%     6.31%  pmd-c01/id:7  ovs-vswitchd        [.] ofp_to_string__
>             |
>             |--6.04%--ofp_to_string__
>             |
>              --2.13%--0x18
>                        |
>                         --1.78%--ofp_to_string__
>
>      8.55%     8.23%  pmd-c01/id:7  ovs-vswitchd        [.] ccmap_try_inc
>             |
>             |--4.55%--ccmap_try_inc
>             |
>              --3.98%--0
>                        |
>                         --3.65%--ccmap_try_inc
>
>      8.44%     1.60%  pmd-c01/id:7  libc-2.17.so        [.]
> __clock_gettime
>             |
>             |--6.83%--__clock_gettime
>             |          |
>             |           --6.68%--__vdso_clock_gettime
>             |
>              --0.64%--0x18
>
>      8.25%     7.53%  pmd-c01/id:7  [vdso]              [.]
> __vdso_clock_gettime
>             |
>             |--4.29%--0
>             |          __clock_gettime
>             |          __vdso_clock_gettime
>             |
>             |--0.72%--__vdso_clock_gettime
>             |
>              --0.60%--0x18
>                        |
>                         --0.60%--__clock_gettime
>                                   __vdso_clock_gettime
>
>      7.58%     7.58%  pmd-c01/id:7  [kernel.kallsyms]   [k] nmi
>             |
>             |--4.33%--mlx5_rx_burst_vec
>             |
>             |--0.94%--0
>             |          |
>             |           --0.54%--0x508c2ac0
>             |                     mlx5_rx_burst_vec
>             |
>             |--0.60%--0x7f23473fb700
>             |          0x154dde0
>             |
>              --0.56%--0x18
>
>      5.83%     4.73%  pmd-c01/id:7  ovs-vswitchd        [.]
> rstp_set_bridge_forward_delay
>             |
>             |--3.80%--0x7f23473fb700
>             |          0x154dde0
>             |          |
>             |           --3.69%--rstp_set_bridge_forward_delay
>             |
>             |--1.11%--rstp_set_bridge_forward_delay
>             |
>              --0.91%--0x7f2317ffd100
>                        rstp_set_bridge_forward_delay
>
>      5.19%     5.08%  pmd-c01/id:7  ovs-vswitchd        [.]
> ct_dpif_format_entry
>             |
>             ---ct_dpif_format_entry
>
>      4.72%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0x0000000000000018
>             |
>             ---0x18
>                |
>                |--2.67%--ofp_to_string__
>                |
>                 --1.03%--__clock_gettime
>                           |
>                            --0.65%--__vdso_clock_gettime
>
>      3.79%     3.53%  pmd-c01/id:7  ovs-vswitchd        [.]
> ct_dpif_parse_tuple
>             |
>             |--2.76%--0
>             |          |
>             |           --2.29%--ct_dpif_parse_tuple
>             |
>              --1.03%--ct_dpif_parse_tuple
>
>      3.28%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0x00000000508c2ac0
>             |
>             ---0x508c2ac0
>                mlx5_rx_burst_vec
>
>      2.65%     0.00%  pmd-c01/id:7  [unknown]           [.]
> 0x00007f2317ffd100
>             |
>             ---0x7f2317ffd100
>                |
>                |--1.38%--rstp_set_bridge_forward_delay
>                |
>                 --0.99%--hindex_remove
>
>      1.98%     1.46%  pmd-c01/id:7  ovs-vswitchd        [.]
> ofp_print_bit_names
>             |
>             |--1.08%--ofp_print_bit_names
>             |
>              --0.90%--ofp_to_string__
>
>      1.85%     1.27%  pmd-c01/id:7  ovs-vswitchd        [.]
> ct_dpif_parse_zone_limit_tuple
>             |
>              --1.41%--ct_dpif_parse_zone_limit_tuple
>
>      1.75%     1.21%  pmd-c01/id:7  ovs-vswitchd        [.] hindex_remove
>             |
>             |--1.20%--0x7f2317ffd100
>             |          |
>             |           --0.78%--hindex_remove
>             |
>              --0.54%--hindex_remove
>
>      1.23%     0.86%  pmd-c01/id:7  ovs-vswitchd        [.]
> rstp_set_bridge_transmit_hold_count
>      1.12%     0.00%  11_scheduler  libc-2.17.so        [.]
> _getopt_long_only_r
>             |
>              --1.12%--_getopt_long_only_r
>                        |
>                         --0.74%--tracesys
>
>      0.96%     0.00%  11_scheduler  [kernel.kallsyms]   [k] tracesys
>             |
>              --0.95%--tracesys
>
>      0.73%     0.56%  pmd-c01/id:7  libpthread-2.17.so  [.] __pthread_once
>             |
>              --0.56%--0x18
>                        |
>                         --0.55%--ofp_to_string__
>

(ovsdpdk-vswitchd)[root at host01 openvswitch-2.13.4]# ovs-ofctl dump-flows
> br_data
>  cookie=0xaae523e5bd8b5026, duration=66518.807s, table=0, n_packets=0,
> n_bytes=0, priority=4,in_port="phy-br_data",dl_vlan=1
> actions=mod_vlan_vid:1100,NORMAL
>  cookie=0xaae523e5bd8b5026, duration=66524.833s, table=0, n_packets=0,
> n_bytes=0, priority=2,in_port="phy-br_data" actions=drop
>  cookie=0xaae523e5bd8b5026, duration=66525.025s, table=0, n_packets=0,
> n_bytes=0, priority=0 actions=NORMAL
> (ovsdpdk-vswitchd)[root at host01 openvswitch-2.13.4]# ovs-appctl
> dpctl/dump-flows
> 2021-10-09T06:33:08Z|00001|daemon_unix|WARN|/run/openvswitch/ovs-vswitchd.pid:
> open: No such file or directory
> ovs-appctl: cannot read pidfile "/run/openvswitch/ovs-vswitchd.pid" (No
> such file or directory)
>

----
Simon Jones


Simon Jones <batmanustc at gmail.com> 于2021年10月9日周六 下午1:22写道:

> Hi all,
>
> I'm using ovs-dpdk version 2.13 in openstack(openstack is deploy by
> kolla-ansible(https://docs.openstack.org/kolla-ansible/latest/)).
>
> After I reboot hipervisor, I found high cpu usage of pmd thread in
> ovs-dpdk. But there is no large log or large packets, how to know what's
> the problem?
>
> I want to use perf like this:
> https://access.redhat.com/documentation/en-us/red_hat_openstack_platform/10/html/ovs-dpdk_end_to_end_troubleshooting_guide/troubleshoot_ovs_dpdk_pmd_cpu_usage_with_perf_and_collect_and_send_the_troubleshooting_data
> But it's hang there.
>
> So I want to know, if it's my config error?
> Like there should be some other config in Open_vSwitch.other_config of
> ovsdb?
>
> Thank you!!!
>
> ```
> No more log:
> [root at host01 ~]# tailf /var/log/kolla/openvswitch/ovs
> ovsdb-server.log  ovs-vswitchd.log
> [root at host01 ~]# tailf /var/log/kolla/openvswitch/ovs-vswitchd.log
> 2021-10-09T02:41:16.175Z|00351|dpif_netdev|WARN|There's no available
> (non-isolated) pmd thread on numa node 0. Queue 0 on port 'enp5s0f0' will
> be assigned to the pmd on core 1 (numa node 1). Expect reduced performance.
> 2021-10-09T02:41:16.177Z|00352|bridge|INFO|bridge br_mgmt: added interface
> enp5s0f1.1001 on port 1
> 2021-10-09T02:41:16.181Z|00353|bridge|INFO|bridge br_mgmt: using datapath
> ID 0000043f72a49981
> 2021-10-09T02:41:16.181Z|00354|rconn|INFO|br_mgmt<->tcp:127.0.0.1:6633:
> disconnecting
> 2021-10-09T02:41:16.187Z|00355|netdev_linux|WARN|error sending Ethernet
> packet on enp5s0f1.1001: Network is down
> 2021-10-09T02:41:17.210Z|00356|rconn|INFO|br_mgmt<->tcp:127.0.0.1:6633:
> connecting...
> 2021-10-09T02:41:18.035Z|00357|rconn|INFO|br_mgmt<->tcp:127.0.0.1:6633:
> connected
> ^C
>
> no packet on this NIC:
> [root at host01 ~]# tcpdump -i enp5s0f0 -c 100
> tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
> listening on enp5s0f0, link-type EN10MB (Ethernet), capture size 262144
> bytes
> ^C
> 0 packets captured
> 0 packets received by filter
> 0 packets dropped by kernel
>
> top result:
>   PID USER      PR  NI    VIRT    RES    SHR S %CPU %MEM     TIME+ COMMAND
> 23387 root      20   0  256.9g 467624  24984 S  96.1  0.7 882:43.45
> ovs-vswitchd
>
> top -H -p 23387 result:
>   PID USER      PR  NI    VIRT    RES    SHR S %CPU %MEM     TIME+ COMMAND
> 23444 root      20   0  256.9g 467624  24984 R 95.7  0.7 883:35.70
> pmd-c01/id:7
>
> (ovsdpdk-db)[root at host01 openvswitch-2.13.4]# ovs-vsctl show | grep -C 4
> enp5s0f0
>         Port phy-br_data
>             Interface phy-br_data
>                 type: patch
>                 options: {peer=int-br_data}
>         Port enp5s0f0                                 (This is dpdk type
> netdev, which is PMD by pmd-c01)
>             Interface enp5s0f0
>                 type: dpdk
>                 options: {dpdk-devargs="0000:05:00.0"}
>         Port br_data
>             Interface br_data
> (ovsdpdk-db)[root at host01 openvswitch-2.13.4]# ovs-vsctl list Open_vSwitch
> _uuid               : 427f600b-7a06-46e9-b273-5e63e08b1c72
> bridges             : [606f95bb-0637-4df9-809a-6b85f618ac89,
> ce70bf43-ebad-4540-a7e3-e22a6e82fc4d, eae3413a-c859-44b3-a0dc-3b11c1f60a77,
> fa0eeef2-c15f-4f49-8ad6-72deb0d2b120]
> cur_cfg             : 90
> datapath_types      : [netdev, system]
> datapaths           : {}
> db_version          : []
> dpdk_initialized    : true
> dpdk_version        : "DPDK 19.11.8"
> external_ids        : {}
> iface_types         : [dpdk, dpdkr, dpdkvhostuser, dpdkvhostuserclient,
> erspan, geneve, gre, internal, ip6erspan, ip6gre, lisp, patch, stt, system,
> tap, vxlan]
> manager_options     : [65e211d5-a792-4f28-9fab-4eb42034acaf]
> next_cfg            : 90
> other_config        : {dpdk-extra=" --proc-type primary ",
> dpdk-hugepage-dir="/dev/hugepages", dpdk-init=True, dpdk-lcore-mask="0x1",
> dpdk-mem-channels="4", dpdk-socket-mem="1024", pmd-cpu-mask="0x2"}
> ovs_version         : []
> ssl                 : []
> statistics          : {}
> system_type         : []
> system_version      : []
> ```
>
>
> ----
> Simon Jones
>


More information about the dev mailing list