[ovs-dev] [PATCH ovn] ofctrl: Fix the assert seen when flood removing flows.

numans at ovn.org numans at ovn.org
Tue Feb 16 19:48:29 UTC 2021


From: Numan Siddique <numans at ovn.org>

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

***
 (gdb) bt
   0  raise () from /lib64/libc.so.6
   1  abort () from /lib64/libc.so.6
   2  ovs_abort_valist ("%s: assertion %s failed in %s()") at lib/util.c:419
   3  vlog_abort_valist ("%s: assertion %s failed in %s()") at lib/vlog.c:1249
   4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
   5  ovs_assert_failure (where="controller/ofctrl.c:1198",
                          function="flood_remove_flows_for_sb_uuid",
                          condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
   6  flood_remove_flows_for_sb_uuid (sb_uuid=...538,
        flood_remove_nodes=...ed0) at controller/ofctrl.c:1205
   7  flood_remove_flows_for_sb_uuid (sb_uuid=...898,
        flood_remove_nodes=...ed0) at controller/ofctrl.c:1230
   8  flood_remove_flows_for_sb_uuid (sb_uuid=...bf0,
        flood_remove_nodes=...ed0) at controller/ofctrl.c:1230
   9  ofctrl_flood_remove_flows (flood_remove_nodes=...ed0) at controller/ofctrl.c:1250
   10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
        ref_name= "5564_pg_64...bac") at controller/lflow.c:612
   11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
        at controller/ovn-controller.c:2181
   12 engine_compute () at lib/inc-proc-eng.c:306
   13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
   14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
   15 main () at controller/ovn-controller.c:2794
***

This assertion is seen when a port group gets updated and it is referenced by many
logical flows (with conj actions).  The function ofctrl_flood_remove_flows(), calls
flood_remove_flows_for_sb_uuid() for each sb uuid in the hmap - flood_remove_nodes
using HMAP_FOR_EACH (flood_remove_nodes). flood_remove_flows_for_sb_uuid() also takes
the hmap 'flood_remove_nodes' as an argument and it inserts few items into it when
it has to call itself recursively.  When an item is inserted, its possible that the
hmap may get expanded.  And if this happens, the HMAP_FOR_EACH () skips few entries
causing some of the desired flows not getting cleared.

Later when ofctrl_add_or_append_flow() is called, there would be multiple
'struct sb_flow_ref' references for the same desired flow.  And this causes the
above assertion later when the same port group gets updated.

This patch fixes this issue by cloning the hmap 'flood_remove_nodes' and using it to
iterate the flood remove nodes.  Also a test case is added to cover this scenario.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1928012
Suggested-by: Ilya Maximetes <i.maximets at ovn.org>
Signed-off-by: Numan Siddique <numans at ovn.org>
---
 controller/ofctrl.c | 15 ++++++++-
 tests/ovn.at        | 75 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/controller/ofctrl.c b/controller/ofctrl.c
index 9d62e1260..9de912478 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -1247,10 +1247,23 @@ ofctrl_flood_remove_flows(struct ovn_desired_flow_table *flow_table,
                           struct hmap *flood_remove_nodes)
 {
     struct ofctrl_flood_remove_node *ofrn;
+
+    /* flood_remove_flows_for_sb_uuid() will modify the 'flood_remove_nodes'
+     * hash map by inserting new items, so we can't use it for iteration.
+     * Copying the sb_uuids into an array. */
+    struct uuid *sb_uuids;
+    int i, n = 0;
+    sb_uuids = xmalloc(hmap_count(flood_remove_nodes) * sizeof *sb_uuids);
+    struct hmap flood_remove_uuids = HMAP_INITIALIZER(&flood_remove_uuids);
     HMAP_FOR_EACH (ofrn, hmap_node, flood_remove_nodes) {
-        flood_remove_flows_for_sb_uuid(flow_table, &ofrn->sb_uuid,
+        sb_uuids[n++] = ofrn->sb_uuid;
+    }
+
+    for (i = 0; i < n; i++) {
+        flood_remove_flows_for_sb_uuid(flow_table, &sb_uuids[i],
                                        flood_remove_nodes);
     }
+    free(sb_uuids);
 
     /* remove any related group and meter info */
     HMAP_FOR_EACH (ofrn, hmap_node, flood_remove_nodes) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 55ea6d170..a02098bac 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -24049,3 +24049,78 @@ wait_column "true" nb:Logical_Switch_Port up name=lsp1
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+# Test case to check that ovn-controller doesn't assert when
+# handling port group updates.
+AT_SETUP([ovn -- No ovn-controller assert for port group updates])
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.10
+
+as hv1
+ovs-vsctl \
+    -- add-port br-int vif1 \
+    -- set Interface vif1 external_ids:iface-id=sw0-port1 \
+    ofport-request=1
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl lsp-add sw0 sw0-port1
+check ovn-nbctl lsp-set-addresses sw0-port1 "10:14:00:00:00:01 192.168.0.2"
+
+check ovn-nbctl lsp-add sw0 sw0-port2
+check ovn-nbctl lsp-add sw0 sw0-port3
+check ovn-nbctl lsp-add sw0 sw0-port4
+check ovn-nbctl lsp-add sw0 sw0-port5
+check ovn-nbctl lsp-add sw0 sw0-port6
+check ovn-nbctl lsp-add sw0 sw0-port7
+
+ovn-nbctl create address_set name=as1
+ovn-nbctl set address_set . addresses="10.0.0.10,10.0.0.11,10.0.0.12"
+
+ovn-nbctl pg-add pg1 sw0-port1 sw0-port2 sw0-port3
+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && icmp4" drop
+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && tcp && tcp.dst >=10000 && tcp.dst <= 20000" drop
+ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && udp && udp.dst >=10000 && udp.dst <= 20000" drop
+
+ovn-nbctl pg-add pg2 sw0-port2 sw0-port3 sw0-port4 sw0-port5
+ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && icmp4" allow-related
+ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && tcp && tcp.dst >=30000 && tcp.dst <= 40000" drop
+ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && udp && udp.dst >=30000 && udp.dst <= 40000" drop
+
+ovn-nbctl pg-add pg3 sw0-port1 sw0-port5
+ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && icmp4" allow-related
+ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && tcp && tcp.dst >=20000 && tcp.dst <= 30000" allow-related
+ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && udp && udp.dst >=20000 && udp.dst <= 30000" allow-related
+
+AS_BOX([Delete and add the port groups multiple times])
+
+i=0
+while [[ $i -lt 10 ]]
+do
+    ovn-nbctl --wait=sb clear port_Group pg1 ports
+    ovn-nbctl --wait=sb clear port_Group pg2 ports
+    ovn-nbctl --wait=sb clear port_Group pg3 ports
+    ovn-nbctl --wait=sb pg-set-ports pg1 sw0-port1
+    ovn-nbctl --wait=sb pg-set-ports pg1 sw0-port1 sw0-port4
+    ovn-nbctl --wait=sb pg-set-ports pg1 sw0-port1 sw0-port4 sw0-port5
+
+    ovn-nbctl --wait=sb pg-set-ports pg2 sw0-port2
+    ovn-nbctl --wait=sb pg-set-ports pg2 sw0-port2 sw0-port6
+    ovn-nbctl --wait=sb pg-set-ports pg2 sw0-port2 sw0-port6 sw0-port7
+
+    ovn-nbctl --wait=sb pg-set-ports pg3 sw0-port1
+    ovn-nbctl --wait=sb pg-set-ports pg3 sw0-port1 sw0-port3
+    ovn-nbctl --wait=sb pg-set-ports pg3 sw0-port1 sw0-port3 sw0-port6
+
+    i=$((i+1))
+    # Make sure that ovn-controller has not asserted.
+    AT_CAPTURE_FILE([hv1/ovn-controller.log])
+    AT_CHECK([test 0 = $(grep -c "assertion ovs_list_is_empty(&f->list_node) failed in flood_remove_flows_for_sb_uuid()" hv1/ovn-controller.log)])
+done
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
-- 
2.29.2



More information about the dev mailing list