[ovs-dev] [PATCH v2 08/10] ovn-controller: port-binding incremental processing for physical flows

Han Zhou zhouhan at gmail.com
Thu May 24 22:10:49 UTC 2018


This patch implements change handler for port-binding in flow_output
for physical flows computing, so that physical flow computing will
be incremental.

This patch together with previous incremental processing engine
related changes supports incremental processing for lflow changes
and port-binding changes of lports on other HVs, which are the most
common scenarios in a cloud where workloads come up and down.

In ovn-scale-test env [1], the total execution time of creating and
binding 10k ports on 1k HVs with 40 lswitches and 8 lrouters
(5 lswitches/lrouter), decreased from 3h40m to 1h50m because of the
less CPU on HVs. The CPU time of ovn-controller for additional 500
lports creating and binding (on top of already existed 10k lports)
decreased 90% comparing with master. Latency for end-to-end operations
of one extra port on top of the 10k lports, start from port-creation
until all flows installation on all related HVs is also improved
significantly from 20.6s to 7.3s.

[1] https://github.com/openvswitch/ovn-scale-test

Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
 ovn/controller/ovn-controller.c |  76 ++++++++++++++++++++++-
 ovn/controller/physical.c       | 129 +++++++++++++++++++++++++++++-----------
 ovn/controller/physical.h       |   8 +++
 3 files changed, 178 insertions(+), 35 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 215146b..e939d03 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -903,6 +903,80 @@ flow_output_sb_logical_flow_handler(struct engine_node *node)
     return handled;
 }
 
+static bool
+flow_output_sb_port_binding_handler(struct engine_node *node)
+{
+    struct controller_ctx *ctx = (struct controller_ctx *)node->context;
+    struct ed_type_runtime_data *data =
+        (struct ed_type_runtime_data *)engine_get_input(
+                "runtime_data", node)->data;
+    struct hmap *local_datapaths = &data->local_datapaths;
+    struct sset *active_tunnels = &data->active_tunnels;
+    struct chassis_index *chassis_index = &data->chassis_index;
+    struct simap *ct_zones = &data->ct_zones;
+    const struct ovsrec_bridge *br_int = get_br_int(ctx);
+
+    const char *chassis_id = get_chassis_id(ctx->ovs_idl);
+
+
+    const struct sbrec_chassis *chassis = NULL;
+    if (chassis_id) {
+        chassis = get_chassis(ctx->ovnsb_idl, chassis_id);
+    }
+
+    ovs_assert(br_int && chassis);
+
+    struct ed_type_flow_output *fo =
+        (struct ed_type_flow_output *)node->data;
+    struct ovn_desired_flow_table *flow_table = &fo->flow_table;
+
+    /* XXX: now we handles port-binding changes for physical flow processing
+     * only, but port-binding change can have impact to logical flow
+     * processing, too, in below circumstances:
+     *
+     *  - When a port-binding for a lport is inserted/deleted but the lflow
+     *    using that lport doesn't change.
+     *
+     *    This is likely to happen only when the lport name is used by ACL
+     *    match condition, which is specified by user. Even in that case, when
+     *    port is actually bound on the chassis it will trigger recompute on
+     *    that chassis since ovs interface is updated. So the only situation
+     *    this would have real impact is when user defines an ACL that includes
+     *    lport that is not the ingress/egress lport, e.g.:
+     *
+     *    to-lport 1000 'outport=="A" && inport=="B"' allow-related
+     *
+     *    If "B" is created and bound after the ACL is created, the ACL may not
+     *    take effect on the chassis where "A" is bound, until a recompute is
+     *    triggered there later.
+     *
+     *  - When is_chassis_resident is used in lflow. In this case the port
+     *    binding is patch type, since this condition is used only for lrouter
+     *    ports. In current "runtime_data" handling, port-binding changes of
+     *    patch ports always trigger recomputing. So it is fine even if we do
+     *    not handle it here.
+     *
+     *  - When a mac-binding doesn't change but the port-binding related to
+     *    that mac-binding is deleted. In this case the neighbor flow generated
+     *    for the mac-binding should be deleted. This would not cause any real
+     *    issue for now, since mac-binding change triggers recomputing.
+     *
+     * To address the above issues, we will need to maintain a mapping between
+     * lport names and the lflows that uses them, and reprocess the related
+     * lflows when a port-binding corresponding to a lport name changes.
+     */
+
+    enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
+    physical_handle_port_binding_changes(flow_table,
+                                         ctx, mff_ovn_geneve,
+                                         chassis, ct_zones,
+                                         local_datapaths,
+                                         chassis_index, active_tunnels);
+
+    node->changed = true;
+    return true;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -988,7 +1062,7 @@ main(int argc, char *argv[])
     engine_add_input(&en_flow_output, &en_sb_encap, NULL);
     engine_add_input(&en_flow_output, &en_sb_multicast_group, NULL);
     engine_add_input(&en_flow_output, &en_sb_datapath_binding, NULL);
-    engine_add_input(&en_flow_output, &en_sb_port_binding, NULL);
+    engine_add_input(&en_flow_output, &en_sb_port_binding, flow_output_sb_port_binding_handler);
     engine_add_input(&en_flow_output, &en_sb_mac_binding, NULL);
     engine_add_input(&en_flow_output, &en_sb_logical_flow, flow_output_sb_logical_flow_handler);
     engine_add_input(&en_flow_output, &en_sb_dhcp_options, NULL);
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 8ae14cd..d860686 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -360,7 +360,7 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
         ofpact_finish_CLONE(ofpacts_p, &clone);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
         return;
     }
 
@@ -428,7 +428,7 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
         }
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
 
         goto out;
     }
@@ -571,7 +571,8 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
         /* Resubmit to first logical ingress pipeline table. */
         put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
-                        tag ? 150 : 100, 0, &match, ofpacts_p, hc_uuid);
+                        tag ? 150 : 100, 0, &match, ofpacts_p,
+                        &binding->header_.uuid);
 
         if (!tag && (!strcmp(binding->type, "localnet")
                      || !strcmp(binding->type, "l2gateway"))) {
@@ -581,7 +582,8 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
              * action. */
             ofpbuf_pull(ofpacts_p, ofpacts_orig_size);
             match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
-            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p, hc_uuid);
+            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p,
+                            &binding->header_.uuid);
         }
 
         /* Table 65, Priority 100.
@@ -609,7 +611,7 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
             ofpact_put_STRIP_VLAN(ofpacts_p);
         }
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else if (!tun && !is_ha_remote) {
         /* Remote port connected by localnet port */
         /* Table 33, priority 100.
@@ -632,7 +634,7 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
         /* Resubmit to table 33. */
         put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else {
         /* Remote port connected by tunnel */
 
@@ -723,7 +725,7 @@ consider_port_binding(struct ovn_desired_flow_table *flow_table,
             ofpact_finish_BUNDLE(ofpacts_p, &bundle);
         }
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     }
 out:
     if (gateway_chassis) {
@@ -737,9 +739,7 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
                   const struct simap *ct_zones,
                   struct hmap *local_datapaths,
                   const struct sbrec_chassis *chassis,
-                  const struct sbrec_multicast_group *mc,
-                  struct ofpbuf *ofpacts_p,
-                  struct ofpbuf *remote_ofpacts_p)
+                  const struct sbrec_multicast_group *mc)
 {
     uint32_t dp_key = mc->datapath->tunnel_key;
     if (!get_local_datapath(local_datapaths, dp_key)) {
@@ -765,8 +765,10 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *      would happen on every hypervisor in the multicast group,
      *      effectively duplicating the packet.)
      */
-    ofpbuf_clear(ofpacts_p);
-    ofpbuf_clear(remote_ofpacts_p);
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    struct ofpbuf remote_ofpacts;
+    ofpbuf_init(&remote_ofpacts, 0);
     for (size_t i = 0; i < mc->n_ports; i++) {
         struct sbrec_port_binding *port = mc->ports[i];
 
@@ -780,20 +782,20 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
 
         int zone_id = simap_get(ct_zones, port->logical_port);
         if (zone_id) {
-            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
         }
 
         if (!strcmp(port->type, "patch")) {
             put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, remote_ofpacts_p);
+                     &remote_ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &remote_ofpacts);
         } else if (simap_contains(&localvif_to_ofport,
                            (port->parent_port && *port->parent_port)
                            ? port->parent_port : port->logical_port)
                    || (!strcmp(port->type, "l3gateway")
                        && port->chassis == chassis)) {
-            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
+            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &ofpacts);
         } else if (port->chassis && !get_localnet_port(local_datapaths,
                                          mc->datapath->tunnel_key)) {
             /* Add remote chassis only when localnet port not exist,
@@ -808,14 +810,14 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *
      * Handle output to the local logical ports in the multicast group, if
      * any. */
-    bool local_ports = ofpacts_p->size > 0;
+    bool local_ports = ofpacts.size > 0;
     if (local_ports) {
         /* Following delivery to local logical ports, restore the multicast
          * group as the logical output port. */
-        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, &ofpacts, &mc->header_.uuid);
     }
 
     /* Table 32, priority 100.
@@ -823,12 +825,12 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *
      * Handle output to the remote chassis in the multicast group, if
      * any. */
-    if (!sset_is_empty(&remote_chassis) || remote_ofpacts_p->size > 0) {
-        if (remote_ofpacts_p->size > 0) {
+    if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
+        if (remote_ofpacts.size > 0) {
             /* Following delivery to logical patch ports, restore the
              * multicast group as the logical output port. */
             put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
+                     &remote_ofpacts);
         }
 
         const char *chassis_name;
@@ -842,20 +844,22 @@ consider_mc_group(struct ovn_desired_flow_table *flow_table,
 
             if (!prev || tun->type != prev->type) {
                 put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
-                                  mc->tunnel_key, remote_ofpacts_p);
+                                  mc->tunnel_key, &remote_ofpacts);
                 prev = tun;
             }
-            ofpact_put_OUTPUT(remote_ofpacts_p)->port = tun->ofport;
+            ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
         }
 
-        if (remote_ofpacts_p->size) {
+        if (remote_ofpacts.size) {
             if (local_ports) {
-                put_resubmit(OFTABLE_LOCAL_OUTPUT, remote_ofpacts_p);
+                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
             }
             ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                            &match, remote_ofpacts_p, hc_uuid);
+                            &match, &remote_ofpacts, &mc->header_.uuid);
         }
     }
+    ofpbuf_uninit(&ofpacts);
+    ofpbuf_uninit(&remote_ofpacts);
     sset_destroy(&remote_chassis);
 }
 
@@ -870,6 +874,68 @@ update_ofports(struct simap *old, struct simap *new)
     return changed;
 }
 
+static void
+reconsider_mc_group_for_pb(struct ovn_desired_flow_table *flow_table,
+                           struct controller_ctx *ctx,
+                           const struct sbrec_port_binding *pb,
+                           const char *mc_name,
+                           enum mf_field_id mff_ovn_geneve,
+                           const struct sbrec_chassis *chassis,
+                           const struct simap *ct_zones,
+                           struct hmap *local_datapaths)
+{
+    const struct sbrec_multicast_group *mc
+        = mcgroup_lookup_by_dp_name(ctx->ovnsb_idl, pb->datapath, mc_name);
+    if (mc) {
+        ofctrl_remove_flows(flow_table, &mc->header_.uuid);
+
+        consider_mc_group(flow_table, mff_ovn_geneve, ct_zones,
+                          local_datapaths, chassis, mc);
+    } else {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_WARN_RL(&rl, "MC group %s is not found in datapath: "UUID_FMT,
+                     mc_name, UUID_ARGS(&pb->datapath->header_.uuid));
+    }
+}
+
+void
+physical_handle_port_binding_changes(struct ovn_desired_flow_table *flow_table,
+                                     struct controller_ctx *ctx,
+                                     enum mf_field_id mff_ovn_geneve,
+                                     const struct sbrec_chassis *chassis,
+                                     const struct simap *ct_zones,
+                                     struct hmap *local_datapaths,
+                                     struct chassis_index *chassis_index,
+                                     struct sset *active_tunnels)
+{
+    const struct sbrec_port_binding *binding;
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    SBREC_PORT_BINDING_FOR_EACH_TRACKED (binding, ctx->ovnsb_idl) {
+        if (sbrec_port_binding_is_deleted(binding)) {
+            ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+        } else {
+            if (!sbrec_port_binding_is_new(binding)) {
+                ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+
+                reconsider_mc_group_for_pb(flow_table,
+                                           ctx, binding, "_MC_flood",
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths);
+                reconsider_mc_group_for_pb(flow_table,
+                                           ctx, binding, "_MC_unknown",
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths);
+            }
+            consider_port_binding(flow_table, ctx, mff_ovn_geneve, ct_zones,
+                                  chassis_index, active_tunnels,
+                                  local_datapaths, binding, chassis,
+                                  &ofpacts);
+        }
+    }
+
+}
+
 void
 physical_run(struct ovn_desired_flow_table *flow_table,
              struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
@@ -1013,16 +1079,11 @@ physical_run(struct ovn_desired_flow_table *flow_table,
 
     /* Handle output to multicast groups, in tables 32 and 33. */
     const struct sbrec_multicast_group *mc;
-    struct ofpbuf remote_ofpacts;
-    ofpbuf_init(&remote_ofpacts, 0);
     SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) {
         consider_mc_group(flow_table, mff_ovn_geneve, ct_zones,
-                          local_datapaths, chassis, mc, &ofpacts,
-                          &remote_ofpacts);
+                          local_datapaths, chassis, mc);
     }
 
-    ofpbuf_uninit(&remote_ofpacts);
-
     /* Table 0, priority 100.
      * ======================
      *
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index 7274825..88c5cda 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -52,5 +52,13 @@ void physical_run(struct ovn_desired_flow_table *,
                   const struct sset *local_lports,
                   struct chassis_index *chassis_index,
                   struct sset *active_tunnels);
+void physical_handle_port_binding_changes(struct ovn_desired_flow_table *,
+                                          struct controller_ctx *,
+                                          enum mf_field_id mff_ovn_geneve,
+                                          const struct sbrec_chassis *,
+                                          const struct simap *ct_zones,
+                                          struct hmap *local_datapaths,
+                                          struct chassis_index *,
+                                          struct sset *active_tunnels);
 
 #endif /* ovn/physical.h */
-- 
2.1.0



More information about the dev mailing list