[ovs-dev] [PATCH ovn v7 8/9] ovn-controller: Use the tracked runtime data changes for flow calculation.

Han Zhou hzhou at ovn.org
Tue May 26 21:59:54 UTC 2020


On Tue, May 26, 2020 at 6:57 AM Numan Siddique <numans at ovn.org> wrote:
>
>
>
> On Thu, May 21, 2020 at 12:42 PM Han Zhou <hzhou at ovn.org> wrote:
>>
>> On Wed, May 20, 2020 at 12:40 PM <numans at ovn.org> wrote:
>> >
>> > From: Venkata Anil <anilvenkata at redhat.com>
>> >
>> > This patch processes the logical flows of tracked datapaths
>> > and tracked logical ports. To handle the tracked logical port
>> > changes, reference of logical flows to port bindings is maintained.
>> >
>> > Acked-by: Mark Michelson <mmichels at redhat.com>
>> > Co-Authored-by: Numan Siddique <numans at ovn.org>
>> > Signed-off-by: Venkata Anil <anilvenkata at redhat.com>
>> > Signed-off-by: Numan Siddique <numans at ovn.org>
>> > ---
>> >  controller/lflow.c          |  82 +++++++++++++++++++++++-
>> >  controller/lflow.h          |  14 +++-
>> >  controller/ovn-controller.c | 124 ++++++++++++++++++++----------------
>> >  controller/physical.h       |   3 +-
>> >  tests/ovn-performance.at    |   4 +-
>> >  5 files changed, 168 insertions(+), 59 deletions(-)
>> >
>> > diff --git a/controller/lflow.c b/controller/lflow.c
>> > index 01214a3a6..45bf4aa4b 100644
>> > --- a/controller/lflow.c
>> > +++ b/controller/lflow.c
>> > @@ -258,7 +258,7 @@ lflow_resource_destroy_lflow(struct
>> lflow_resource_ref *lfrr,
>> >  /* Adds the logical flows from the Logical_Flow table to flow tables.
*/
>> >  static void
>> >  add_logical_flows(struct lflow_ctx_in *l_ctx_in,
>> > -                  struct lflow_ctx_out *l_ctx_out)
>> > +                        struct lflow_ctx_out *l_ctx_out)
>> >  {
>> >      const struct sbrec_logical_flow *lflow;
>> >
>> > @@ -649,6 +649,8 @@ consider_logical_flow(const struct
sbrec_logical_flow
>> *lflow,
>> >                  int64_t dp_id = lflow->logical_datapath->tunnel_key;
>> >                  char buf[16];
>> >                  snprintf(buf, sizeof(buf), "%"PRId64"_%"PRId64, dp_id,
>> port_id);
>> > +                lflow_resource_add(l_ctx_out->lfrr,
>> REF_TYPE_PORTBINDING, buf,
>> > +                                   &lflow->header_.uuid);
>> >                  if (!sset_contains(l_ctx_in->local_lport_ids, buf)) {
>> >                      VLOG_DBG("lflow "UUID_FMT
>> >                               " port %s in match is not local, skip",
>> > @@ -847,3 +849,81 @@ lflow_destroy(void)
>> >      expr_symtab_destroy(&symtab);
>> >      shash_destroy(&symtab);
>> >  }
>> > +
>> > +bool
>> > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
>> *pb_table)
>> > +{
>> > +    const struct sbrec_port_binding *binding;
>> > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
>> > +        if (!strcmp(binding->type, "remote")) {
>>
>> "remote" can be handled just like any regular VIF, just that it would
never
>> be bound on the current chassis.
>
>
> I didn't handle this comment in v8 as I was not to look into the remote
code again
> to understand a bit.
>
> The present approach is a bit inefficient because it would resort to full
recompute when
> any port bindings of type remote change. Are you fine If I handle this in
next patch
> series which I've started working on ?
>
Hi Numan,

I think it would be better to address it in this series because "remote"
port binding changes were handled incrementally even before this series.
Without this, any remote port changes will trigger recompute on all HVs in
all AZs. It is in fact the "easy" part because we are sure that "remote" is
never going to be bound or unbound from current chassis.

> If I understand correctly from the comment, if any remote port binding
changes,
> we need to add this in the tracked_dp_bindings ? so that
lflow_handle_flows_for_lport()
> will handle this ?

I assume that we don't need to handle anything in extra, just keeping the
original change-handler for port-bindings in flow_output. What do you think?

>
> Thanks
> Numan
>
>>
>> > +            return false;
>> > +        }
>> > +    }
>> > +
>> > +    return true;
>> > +}
>> > +
>> > +bool
>> > +lflow_add_flows_for_datapath(const struct sbrec_datapath_binding *dp,
>> > +                             struct lflow_ctx_in *l_ctx_in,
>> > +                             struct lflow_ctx_out *l_ctx_out)
>> > +{
>> > +    bool handled = true;
>> > +    struct hmap dhcp_opts = HMAP_INITIALIZER(&dhcp_opts);
>> > +    struct hmap dhcpv6_opts = HMAP_INITIALIZER(&dhcpv6_opts);
>> > +    const struct sbrec_dhcp_options *dhcp_opt_row;
>> > +    SBREC_DHCP_OPTIONS_TABLE_FOR_EACH (dhcp_opt_row,
>> > +                                       l_ctx_in->dhcp_options_table) {
>> > +        dhcp_opt_add(&dhcp_opts, dhcp_opt_row->name,
dhcp_opt_row->code,
>> > +                     dhcp_opt_row->type);
>> > +    }
>> > +
>> > +
>> > +    const struct sbrec_dhcpv6_options *dhcpv6_opt_row;
>> > +    SBREC_DHCPV6_OPTIONS_TABLE_FOR_EACH (dhcpv6_opt_row,
>> > +
l_ctx_in->dhcpv6_options_table)
>> {
>> > +       dhcp_opt_add(&dhcpv6_opts, dhcpv6_opt_row->name,
>> dhcpv6_opt_row->code,
>> > +                    dhcpv6_opt_row->type);
>> > +    }
>> > +
>> > +    struct hmap nd_ra_opts = HMAP_INITIALIZER(&nd_ra_opts);
>> > +    nd_ra_opts_init(&nd_ra_opts);
>> > +
>> > +    struct controller_event_options controller_event_opts;
>> > +    controller_event_opts_init(&controller_event_opts);
>> > +
>> > +    struct sbrec_logical_flow *lf_row =
>> sbrec_logical_flow_index_init_row(
>> > +        l_ctx_in->sbrec_logical_flow_by_logical_datapath);
>> > +    sbrec_logical_flow_index_set_logical_datapath(lf_row, dp);
>> > +
>> > +    const struct sbrec_logical_flow *lflow;
>> > +    SBREC_LOGICAL_FLOW_FOR_EACH_EQUAL (
>> > +        lflow, lf_row,
l_ctx_in->sbrec_logical_flow_by_logical_datapath)
>> {
>> > +        if (!consider_logical_flow(lflow, &dhcp_opts, &dhcpv6_opts,
>> > +                                   &nd_ra_opts,
&controller_event_opts,
>> > +                                   l_ctx_in, l_ctx_out)) {
>> > +            handled = false;
>> > +            break;
>> > +        }
>> > +    }
>> > +
>> > +    dhcp_opts_destroy(&dhcp_opts);
>> > +    dhcp_opts_destroy(&dhcpv6_opts);
>> > +    nd_ra_opts_destroy(&nd_ra_opts);
>> > +    controller_event_opts_destroy(&controller_event_opts);
>> > +    return handled;
>> > +}
>> > +
>> > +bool
>> > +lflow_handle_flows_for_lport(const struct sbrec_port_binding *pb,
>> > +                             struct lflow_ctx_in *l_ctx_in,
>> > +                             struct lflow_ctx_out *l_ctx_out)
>> > +{
>> > +    int64_t dp_id = pb->datapath->tunnel_key;
>> > +    char pb_ref_name[16];
>> > +    snprintf(pb_ref_name, sizeof(pb_ref_name), "%"PRId64"_%"PRId64,
>> > +             dp_id, pb->tunnel_key);
>> > +    bool changed = true;
>> > +    return lflow_handle_changed_ref(REF_TYPE_PORTBINDING, pb_ref_name,
>> > +                                    l_ctx_in, l_ctx_out, &changed);
>> > +}
>> > diff --git a/controller/lflow.h b/controller/lflow.h
>> > index f02f709d7..00c1b5c47 100644
>> > --- a/controller/lflow.h
>> > +++ b/controller/lflow.h
>> > @@ -48,6 +48,9 @@ struct sbrec_dhcp_options_table;
>> >  struct sbrec_dhcpv6_options_table;
>> >  struct sbrec_logical_flow_table;
>> >  struct sbrec_mac_binding_table;
>> > +struct sbrec_port_binding_table;
>> > +struct sbrec_datapath_binding;
>> > +struct sbrec_port_binding;
>> >  struct simap;
>> >  struct sset;
>> >  struct uuid;
>> > @@ -72,7 +75,8 @@ struct uuid;
>> >
>> >  enum ref_type {
>> >      REF_TYPE_ADDRSET,
>> > -    REF_TYPE_PORTGROUP
>> > +    REF_TYPE_PORTGROUP,
>> > +    REF_TYPE_PORTBINDING
>> >  };
>> >
>> >  /* Maintains the relationship for a pair of named resource and
>> > @@ -117,6 +121,7 @@ void lflow_resource_clear(struct lflow_resource_ref
>> *);
>> >
>> >  struct lflow_ctx_in {
>> >      struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
>> > +    struct ovsdb_idl_index *sbrec_logical_flow_by_logical_datapath;
>> >      struct ovsdb_idl_index *sbrec_port_binding_by_name;
>> >      const struct sbrec_dhcp_options_table *dhcp_options_table;
>> >      const struct sbrec_dhcpv6_options_table *dhcpv6_options_table;
>> > @@ -157,4 +162,11 @@ void lflow_destroy(void);
>> >
>> >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
>> >
>> > +bool lflow_add_flows_for_datapath(const struct sbrec_datapath_binding
*,
>> > +                                  struct lflow_ctx_in *,
>> > +                                  struct lflow_ctx_out *);
>> > +bool lflow_handle_flows_for_lport(const struct sbrec_port_binding *,
>> > +                                  struct lflow_ctx_in *,
>> > +                                  struct lflow_ctx_out *);
>> > +bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
*);
>> >  #endif /* controller/lflow.h */
>> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> > index dc790f0f7..e53f750bf 100644
>> > --- a/controller/ovn-controller.c
>> > +++ b/controller/ovn-controller.c
>> > @@ -1440,6 +1440,11 @@ static void init_lflow_ctx(struct engine_node
>> *node,
>> >                  engine_get_input("SB_port_binding", node),
>> >                  "name");
>> >
>> > +    struct ovsdb_idl_index *sbrec_logical_flow_by_dp =
>> > +        engine_ovsdb_node_get_index(
>> > +                engine_get_input("SB_logical_flow", node),
>> > +                "logical_datapath");
>> > +
>> >      struct ovsdb_idl_index *sbrec_mc_group_by_name_dp =
>> >          engine_ovsdb_node_get_index(
>> >                  engine_get_input("SB_multicast_group", node),
>> > @@ -1487,6 +1492,8 @@ static void init_lflow_ctx(struct engine_node
*node,
>> >
>> >      l_ctx_in->sbrec_multicast_group_by_name_datapath =
>> >          sbrec_mc_group_by_name_dp;
>> > +    l_ctx_in->sbrec_logical_flow_by_logical_datapath =
>> > +        sbrec_logical_flow_by_dp;
>> >      l_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>> >      l_ctx_in->dhcp_options_table  = dhcp_table;
>> >      l_ctx_in->dhcpv6_options_table = dhcpv6_table;
>> > @@ -1641,7 +1648,8 @@ flow_output_sb_mac_binding_handler(struct
>> engine_node *node, void *data)
>> >  }
>> >
>> >  static bool
>> > -flow_output_sb_port_binding_handler(struct engine_node *node, void
*data)
>> > +flow_output_sb_port_binding_handler(struct engine_node *node,
>> > +                                    void *data)
>> >  {
>> >      struct ed_type_runtime_data *rt_data =
>> >          engine_get_input_data("runtime_data", node);
>> > @@ -1649,56 +1657,21 @@ flow_output_sb_port_binding_handler(struct
>> engine_node *node, void *data)
>> >      struct ed_type_flow_output *fo = data;
>> >      struct ovn_desired_flow_table *flow_table = &fo->flow_table;
>> >
>> > -    /* XXX: now we handle port-binding changes for physical flow
>> processing
>> > -     * only, but port-binding change can have impact to logical flow
>> > -     * processing, too, in below circumstances:
>> > -     *
>> > -     *  - When a port-binding for a lport is inserted/deleted but the
>> lflow
>> > -     *    using that lport doesn't change.
>> > -     *
>> > -     *    This can happen only when the lport name is used by ACL
match
>> > -     *    condition, which is specified by user. Even in that case, if
>> the port
>> > -     *    is actually bound on the current chassis it will trigger
>> recompute on
>> > -     *    that chassis since ovs interface would be updated. So the
only
>> > -     *    situation this would have real impact is when user defines
an
>> ACL
>> > -     *    that includes lport that is not on current chassis, and
there
>> is a
>> > -     *    port-binding creation/deletion related to that lport.e.g.:
an
>> ACL is
>> > -     *    defined:
>> > -     *
>> > -     *    to-lport 1000 'outport=="A" && inport=="B"' allow-related
>> > -     *
>> > -     *    If "A" is on current chassis, but "B" is lport that hasn't
been
>> > -     *    created yet. When a lport "B" is created and bound on
another
>> > -     *    chassis, the ACL will not take effect on the current chassis
>> until a
>> > -     *    recompute is triggered later. This case doesn't seem to be a
>> problem
>> > -     *    for real world use cases because usually lport is created
>> before
>> > -     *    being referenced by name in ACLs.
>> > -     *
>> > -     *  - When is_chassis_resident(<lport>) is used in lflow. In this
>> case the
>> > -     *    port binding is not a regular VIF. It can be either "patch"
or
>> > -     *    "external", with ha-chassis-group assigned.  In current
>> > -     *    "runtime_data" handling, port-binding changes for these
types
>> always
>> > -     *    trigger recomputing. So it is fine even if we do not handle
it
>> here.
>> > -     *    (due to the ovsdb tracking support for referenced table
>> changes,
>> > -     *    ha-chassis-group changes will appear as port-binding
change).
>> > -     *
>> > -     *  - When a mac-binding doesn't change but the port-binding
related
>> to
>> > -     *    that mac-binding is deleted. In this case the neighbor flow
>> generated
>> > -     *    for the mac-binding should be deleted. This would not cause
>> any real
>> > -     *    issue for now, since the port-binding related to
mac-binding is
>> > -     *    always logical router port, and any change to logical router
>> port
>> > -     *    would just trigger recompute.
>> > -     *
>> > -     * Although there is no correctness issue so far (except the
unusual
>> ACL
>> > -     * use case, which doesn't seem to be a real problem), it might be
>> better
>> > -     * to handle this more gracefully, without the need to consider
these
>> > -     * tricky scenarios.  One approach is to maintain a mapping
between
>> lport
>> > -     * names and the lflows that uses them, and reprocess the related
>> lflows
>> > -     * when related port-bindings change.
>> > -     */
>> >      struct physical_ctx p_ctx;
>> >      init_physical_ctx(node, rt_data, &p_ctx);
>> >
>> > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
>> > +        /* If this returns false, it means there is an impact on
>> > +         * the logical flow processing because of some changes in
>> > +         * port bindings. Return false so that recompute is triggered
>> > +         * for this stage. */
>> > +        return false;
>> > +    }
>> > +
>> > +    /* We handle port-binding changes for physical flow processing
>> > +     * only. flow_output runtime data handler takes care of processing
>> > +     * logical flows for any port binding changes.
>> > +     */
>> >      physical_handle_port_binding_changes(&p_ctx, flow_table);
>> >
>> >      engine_set_node_state(node, EN_UPDATED);
>> > @@ -1786,6 +1759,8 @@ _flow_output_resource_ref_handler(struct
>> engine_node *node, void *data,
>> >              updated = &pg_data->updated;
>> >              deleted = &pg_data->deleted;
>> >              break;
>> > +
>> > +        case REF_TYPE_PORTBINDING:
>> >          default:
>> >              OVS_NOT_REACHED();
>> >      }
>> > @@ -1909,16 +1884,52 @@ flow_output_runtime_data_handler(struct
>> engine_node *node,
>> >          return false;
>> >      }
>> >
>> > -    if (!hmap_is_empty(&tracked_data->tracked_dp_bindings)) {
>> > -        /* We are not yet handling the tracked datapath binding
>> > -         * changes. Return false to trigger full recompute. */
>> > -        return false;
>> > +    if (hmap_is_empty(&tracked_data->tracked_dp_bindings)) {
>> > +        if (tracked_data->local_lports_changed) {
>> > +            engine_set_node_state(node, EN_UPDATED);
>> > +        }
>> > +        return true;
>> >      }
>> >
>> > -    if (tracked_data->local_lports_changed) {
>> > +    struct lflow_ctx_in l_ctx_in;
>> > +    struct lflow_ctx_out l_ctx_out;
>> > +    struct ed_type_flow_output *fo = data;
>> > +    struct ed_type_runtime_data *rt_data =
>> > +        engine_get_input_data("runtime_data", node);
>> > +    init_lflow_ctx(node, rt_data, fo, &l_ctx_in, &l_ctx_out);
>> > +
>> > +    struct physical_ctx p_ctx;
>> > +    init_physical_ctx(node, rt_data, &p_ctx);
>> > +
>> > +    bool handled = true;
>> > +    struct tracked_binding_datapath *tdp;
>> > +    HMAP_FOR_EACH (tdp, node, &tracked_data->tracked_dp_bindings) {
>> > +        if (tdp->is_new) {
>> > +            handled = lflow_add_flows_for_datapath(tdp->dp, &l_ctx_in,
>> > +                                                   &l_ctx_out);
>> > +            if (!handled) {
>> > +                break;
>> > +            }
>> > +        } else {
>> > +            struct tracked_binding_lport *lport;
>> > +            LIST_FOR_EACH (lport, list_node, &tdp->lports_head) {
>> > +                if (!lflow_handle_flows_for_lport(lport->pb,
&l_ctx_in,
>> > +                                                  &l_ctx_out)) {
>> > +                    handled = false;
>> > +                    break;
>> > +                }
>> > +            }
>> > +            if (!handled) {
>> > +                break;
>> > +            }
>> > +        }
>> > +    }
>> > +
>> > +    if (handled) {
>> >          engine_set_node_state(node, EN_UPDATED);
>> >      }
>> > -    return true;
>> > +
>> > +    return handled;
>> >  }
>> >
>> >  struct ovn_controller_exit_args {
>> > @@ -1976,6 +1987,9 @@ main(int argc, char *argv[])
>> >          = chassis_index_create(ovnsb_idl_loop.idl);
>> >      struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath
>> >          = mcast_group_index_create(ovnsb_idl_loop.idl);
>> > +    struct ovsdb_idl_index *sbrec_logical_flow_by_logical_datapath
>> > +        = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
>> > +
>>  &sbrec_logical_flow_col_logical_datapath);
>> >      struct ovsdb_idl_index *sbrec_port_binding_by_name
>> >          = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
>> >
 &sbrec_port_binding_col_logical_port);
>> > @@ -2125,6 +2139,8 @@ main(int argc, char *argv[])
>> >      engine_ovsdb_node_add_index(&en_sb_chassis, "name",
>> sbrec_chassis_by_name);
>> >      engine_ovsdb_node_add_index(&en_sb_multicast_group,
"name_datapath",
>> >
 sbrec_multicast_group_by_name_datapath);
>> > +    engine_ovsdb_node_add_index(&en_sb_logical_flow,
"logical_datapath",
>> > +
 sbrec_logical_flow_by_logical_datapath);
>> >      engine_ovsdb_node_add_index(&en_sb_port_binding, "name",
>> >                                  sbrec_port_binding_by_name);
>> >      engine_ovsdb_node_add_index(&en_sb_port_binding, "key",
>> > diff --git a/controller/physical.h b/controller/physical.h
>> > index 9ca34436a..481f03901 100644
>> > --- a/controller/physical.h
>> > +++ b/controller/physical.h
>> > @@ -33,6 +33,7 @@ struct ovsrec_bridge;
>> >  struct simap;
>> >  struct sbrec_multicast_group_table;
>> >  struct sbrec_port_binding_table;
>> > +struct sbrec_port_binding;
>> >  struct sset;
>> >
>> >  /* OVN Geneve option information.
>> > @@ -61,7 +62,7 @@ struct physical_ctx {
>> >  void physical_register_ovs_idl(struct ovsdb_idl *);
>> >  void physical_run(struct physical_ctx *,
>> >                    struct ovn_desired_flow_table *);
>> > -void physical_handle_port_binding_changes(struct physical_ctx *,
>> > +void physical_handle_port_binding_changes(struct physical_ctx *p_ctx,
>> >                                            struct
ovn_desired_flow_table
>> *);
>> >  void physical_handle_mc_group_changes(struct physical_ctx *,
>> >                                        struct ovn_desired_flow_table
*);
>> > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
>> > index 6e064e64f..a12757e18 100644
>> > --- a/tests/ovn-performance.at
>> > +++ b/tests/ovn-performance.at
>> > @@ -353,8 +353,8 @@ for i in 1 2; do
>> >      )
>> >
>> >      # Bind port $lp and wait for it to come up
>> > -    OVN_CONTROLLER_EXPECT_HIT_COND(
>> > -        [hv$i hv$j], [lflow_run], [>0 =0],
>> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > +        [hv$i hv$j], [lflow_run],
>> >          [as hv$i ovs-vsctl add-port br-int $vif -- set Interface $vif
>> external-ids:iface-id=$lp &&
>> >           ovn-nbctl wait-until Logical_Switch_Port $lp 'up=true' &&
>> >           ovn-nbctl --wait=hv sync]
>> > --
>> > 2.26.2
>> >
>> > _______________________________________________
>> > dev mailing list
>> > dev at openvswitch.org
>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>


More information about the dev mailing list