[ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

numans at ovn.org numans at ovn.org
Sun Feb 21 11:34:24 UTC 2021


From: Numan Siddique <numans at ovn.org>

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

    ***
     (gdb) bt
       0  raise () from /lib64/libc.so.6
       1  abort () from /lib64/libc.so.6
       2  ovs_abort_valist ("%s: assertion %s failed in %s()") at lib/util.c:419
       3  vlog_abort_valist ("%s: assertion %s failed in %s()") at lib/vlog.c:1249
       4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
       5  ovs_assert_failure (where="controller/ofctrl.c:1216",
                              function="flood_remove_flows_for_sb_uuid",
                              condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
       6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
            flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
       9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at controller/ofctrl.c:1280
       10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
            ref_name= "5564_pg_14...aac") at controller/lflow.c:522
       11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
            at controller/ovn-controller.c:2220
       12 engine_compute () at lib/inc-proc-eng.c:306
       13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
       14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
       15 main () at controller/ovn-controller.c:2887
    ***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
   first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
   already a reference for the flow uuid 'S' in the existing desired
   flows and only append the actions if it doesn't exist.

This patch has taken the approach (2) as that seems better and
effecient.

Signed-off-by: Numan Siddique <numans at ovn.org>
---
 controller/ofctrl.c |  38 +++++++++++++---
 tests/ovn.at        | 104 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 7 deletions(-)

diff --git a/controller/ofctrl.c b/controller/ofctrl.c
index 415d9b7e16..ccfb8b4627 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -231,6 +231,8 @@ static struct desired_flow *desired_flow_lookup_conjunctive(
     struct ovn_desired_flow_table *,
     const struct ovn_flow *target);
 static void desired_flow_destroy(struct desired_flow *);
+static bool check_sb_uuid_in_flow_references(const struct desired_flow *f,
+                                             const struct uuid *sb_uuid);
 
 static struct installed_flow *installed_flow_lookup(
     const struct ovn_flow *target);
@@ -1076,9 +1078,24 @@ ofctrl_add_or_append_flow(struct ovn_desired_flow_table *desired_flows,
     existing = desired_flow_lookup_conjunctive(desired_flows, &f->flow);
     if (existing) {
         /* There's already a flow with this particular match and action
-         * 'conjunction'. Append the action to that flow rather than
-         * adding a new flow.
-         */
+         * 'conjunction'.  Check that if the desired flow
+         * 'existing->references' has 'struct sb_flow_ref' entry for 'sb_uuid'.
+         * If so, then log a warning and return.  Its possible that
+         * 'ofctrl_add_or_append_flow()' is called twice for the same
+         * 'sb_uuid'. */
+        if (check_sb_uuid_in_flow_references(existing, sb_uuid)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+            char *f_str = ovn_flow_to_string(&f->flow);
+            char *existingf_str = ovn_flow_to_string(&existing->flow);
+            VLOG_WARN_RL(&rl, "The existing flow [%s] already has a reference "
+                         "for SB UUID : "UUID_FMT". Ignoring the flow [%s]",
+                         existingf_str, UUID_ARGS(sb_uuid), f_str);
+            free(f_str);
+            free(existingf_str);
+            return;
+        }
+
+        /* Append the action to that flow rather than adding a new flow. */
         uint64_t compound_stub[64 / 8];
         struct ofpbuf compound;
         ofpbuf_use_stub(&compound, compound_stub, sizeof(compound_stub));
@@ -1372,13 +1389,12 @@ desired_flow_lookup(struct ovn_desired_flow_table *flow_table,
 }
 
 static bool
-flow_lookup_match_uuid_cb(const struct desired_flow *candidate,
-                          const void *arg)
+check_sb_uuid_in_flow_references(const struct desired_flow *f,
+                                 const struct uuid *sb_uuid)
 {
-    const struct uuid *sb_uuid = arg;
     struct sb_flow_ref *sfr;
 
-    LIST_FOR_EACH (sfr, sb_list, &candidate->references) {
+    LIST_FOR_EACH (sfr, sb_list, &f->references) {
         if (uuid_equals(sb_uuid, &sfr->sb_uuid)) {
             return true;
         }
@@ -1386,6 +1402,14 @@ flow_lookup_match_uuid_cb(const struct desired_flow *candidate,
     return false;
 }
 
+static bool
+flow_lookup_match_uuid_cb(const struct desired_flow *candidate,
+                          const void *arg)
+{
+    const struct uuid *sb_uuid = arg;
+    return check_sb_uuid_in_flow_references(candidate, sb_uuid);
+}
+
 /* Finds and returns a desired_flow in 'flow_table' whose key is identical to
  * 'target''s key, or NULL if there is none.
  *
diff --git a/tests/ovn.at b/tests/ovn.at
index 9bc73680b7..737c109d65 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -24213,6 +24213,110 @@ done
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 
+# Test case to check that ovn-controller doesn't assert when
+# handling conjunction flows.  When ovn-controller claims
+# the first port of a logical switch datapath, it programs the flows
+# for this datapath incrementally (without full recompute).  If
+# suppose, in the same SB update from ovsdb-server, a logical flow is added
+# which results in conjunction action, then this logical flow is also
+# handled incrementally.  The newly added logical flow is processed
+# twice which results in wrong oflows and it results in an assertion
+# in ovn-controller.  Test this ovn-controller handles this scenario
+# properly and doesn't assert.
+AT_SETUP([ovn -- No ovn-controller assert when generating conjunction flows])
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.10
+
+as hv1
+ovs-vsctl \
+    -- add-port br-int vif1 \
+    -- set Interface vif1 external_ids:iface-id=sw0-p1 \
+    ofport-request=1
+
+check as hv1
+ovs-vsctl set open . external_ids:ovn-monitor-all=true
+
+check ovn-nbctl ls-add sw0
+check ovn-nbctl pg-add pg1
+check ovn-nbctl pg-add pg2
+check ovn-nbctl lsp-add sw0 sw0-p2
+check ovn-nbctl lsp-set-addresses sw0-p2 "00:00:00:00:00:02 192.168.47.2"
+check ovn-nbctl lsp-add sw0 sw0-p3
+check ovn-nbctl lsp-set-addresses sw0-p3 "00:00:00:00:00:03 192.168.47.3"
+
+# Pause ovn-northd. When it is resumed, all the below NB updates
+# will be sent in one transaction.
+
+check as northd ovn-appctl -t ovn-northd pause
+check as northd-backup ovn-appctl -t ovn-northd pause
+
+check ovn-nbctl lsp-add sw0 sw0-p1
+check ovn-nbctl lsp-set-addresses sw0-p1 "00:00:00:00:00:01 192.168.47.1"
+check ovn-nbctl pg-set-ports pg1 sw0-p1 sw0-p2
+check ovn-nbctl pg-set-ports pg2 sw0-p3
+check ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4 && ip4.src == \$pg2_ip4 && udp && udp.dst >= 1 && udp.dst <= 65535" allow-related
+
+# resume ovn-northd now. This should result in a single update message
+# from SB ovsdb-server to ovn-controller for all the above NB updates.
+check as northd ovn-appctl -t ovn-northd resume
+
+AS_BOX([Wait for sw0-p1 to be up])
+wait_for_ports_up sw0-p1
+
+# When the port group pg1 is updated, it should not result in
+# any assert in ovn-controller.
+ovn-nbctl --wait=hv pg-set-ports pg1 sw0-p1 sw0-p2 sw0-p3
+AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)])
+check ovn-nbctl --wait=hv sync
+
+# Check OVS flows are installed properly.
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=45 | ofctl_strip_all | \
+    grep "priority=2002" | grep conjunction | \
+    sed 's/conjunction([[^)]]*)/conjunction()/g' | sort], [0], [dnl
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x100/0x100,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x10/0xfff0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x100/0xff00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x1000/0xf000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2/0xfffe actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x20/0xffe0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x200/0xfe00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x2000/0xe000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4/0xfffc actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x40/0xffc0 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x400/0xfc00 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x4000/0xc000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8/0xfff8 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x80/0xff80 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x800/0xf800 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=0x8000/0x8000 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,metadata=0x1,nw_src=192.168.47.3,tp_dst=1 actions=conjunction()
+ table=45, priority=2002,udp,reg0=0x80/0x80,reg15=0x3,metadata=0x1,nw_src=192.168.47.3 actions=conjunction()
+])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+
 AT_SETUP([ovn -- OVN FDB (MAC learning) - 2 HVs, 2 LS, 1 LR ])
 ovn_start
 
-- 
2.29.2



More information about the dev mailing list