[ovs-dev] [PATCH ovn v10 5/8] ovn-controller: I-P for ct zone and OVS interface changes in flow output stage.
Dumitru Ceara
dceara at redhat.com
Tue Jun 9 22:08:43 UTC 2020
On 6/9/20 7:42 PM, Numan Siddique wrote:
>
>
> On Tue, Jun 9, 2020 at 2:31 PM Dumitru Ceara <dceara at redhat.com
> <mailto:dceara at redhat.com>> wrote:
>
> On 6/9/20 10:51 AM, Numan Siddique wrote:
> >
> >
> > On Mon, Jun 8, 2020 at 11:29 PM Dumitru Ceara <dceara at redhat.com
> <mailto:dceara at redhat.com>
> > <mailto:dceara at redhat.com <mailto:dceara at redhat.com>>> wrote:
> >
> > On 6/8/20 3:50 PM, numans at ovn.org <mailto:numans at ovn.org>
> <mailto:numans at ovn.org <mailto:numans at ovn.org>> wrote:
> > > From: Numan Siddique <numans at ovn.org <mailto:numans at ovn.org>
> <mailto:numans at ovn.org <mailto:numans at ovn.org>>>
> > >
> > > This patch handles ct zone changes and OVS interface changes
> > incrementally
> > > in the flow output stage.
> > >
> > > Any changes to ct zone can be handled by running physical_run()
> > instead of running
> > > flow_output_run(). And any changes to OVS interfaces can be
> either
> > handled
> > > incrementally (for OVS interfaces representing VIFs) or just
> running
> > > physical_run() (for tunnel and other types of interfaces).
> > >
> > > To better handle this, a new engine node 'physical_flow_changes'
> > is added which
> > > handles changes to ct zone and OVS interfaces.
> > >
> > > Signed-off-by: Numan Siddique <numans at ovn.org
> <mailto:numans at ovn.org> <mailto:numans at ovn.org <mailto:numans at ovn.org>>>
> >
> > Hi Numan,
> >
> > Overall this patch looks ok to me. I do have a few
> > comments/questions below.
> >
> > Thanks,
> > Dumitru
> >
> > > ---
> > > controller/binding.c | 23 +-----
> > > controller/binding.h | 24 +++++-
> > > controller/ovn-controller.c | 144
> > +++++++++++++++++++++++++++++++++++-
> > > controller/physical.c | 51 +++++++++++++
> > > controller/physical.h | 5 +-
> > > 5 files changed, 222 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/controller/binding.c b/controller/binding.c
> > > index 71063fe9a..c2d9a4c6b 100644
> > > --- a/controller/binding.c
> > > +++ b/controller/binding.c
> > > @@ -503,7 +503,7 @@ remove_local_lport_ids(const struct
> > sbrec_port_binding *binding_rec,
> > > * 'struct local_binding' is used. A shash of these local
> bindings is
> > > * maintained with the 'external_ids:iface-id' as the key
> to the
> > shash.
> > > *
> > > - * struct local_binding has 3 main fields:
> > > + * struct local_binding (defined in binding.h) has 3 main
> fields:
> > > * - type
> > > * - OVS interface row object
> > > * - Port_Binding row object
> > > @@ -554,21 +554,6 @@ remove_local_lport_ids(const struct
> > sbrec_port_binding *binding_rec,
> > > * - For each 'virtual' Port Binding (of type BT_VIRTUAL)
> > provided its parent
> > > * is bound to this chassis.
> > > */
> > > -enum local_binding_type {
> > > - BT_VIF,
> > > - BT_CONTAINER,
> > > - BT_VIRTUAL
> > > -};
> > > -
> > > -struct local_binding {
> > > - char *name;
> > > - enum local_binding_type type;
> > > - const struct ovsrec_interface *iface;
> > > - const struct sbrec_port_binding *pb;
> > > -
> > > - /* shash of 'struct local_binding' representing
> children. */
> > > - struct shash children;
> > > -};
> > >
> > > static struct local_binding *
> > > local_binding_create(const char *name, const struct
> > ovsrec_interface *iface,
> > > @@ -590,12 +575,6 @@ local_binding_add(struct shash
> > *local_bindings, struct local_binding *lbinding)
> > > shash_add(local_bindings, lbinding->name, lbinding);
> > > }
> > >
> > > -static struct local_binding *
> > > -local_binding_find(struct shash *local_bindings, const char
> *name)
> > > -{
> > > - return shash_find_data(local_bindings, name);
> > > -}
> > > -
> > > static void
> > > local_binding_destroy(struct local_binding *lbinding)
> > > {
> > > diff --git a/controller/binding.h b/controller/binding.h
> > > index f7ace6faf..21118ecd4 100644
> > > --- a/controller/binding.h
> > > +++ b/controller/binding.h
> > > @@ -18,6 +18,7 @@
> > > #define OVN_BINDING_H 1
> > >
> > > #include <stdbool.h>
> > > +#include "openvswitch/shash.h"
> > >
> > > struct hmap;
> > > struct ovsdb_idl;
> > > @@ -32,7 +33,6 @@ struct sbrec_chassis;
> > > struct sbrec_port_binding_table;
> > > struct sset;
> > > struct sbrec_port_binding;
> > > -struct shash;
> > >
> > > struct binding_ctx_in {
> > > struct ovsdb_idl_txn *ovnsb_idl_txn;
> > > @@ -60,6 +60,28 @@ struct binding_ctx_out {
> > > struct smap *local_iface_ids;
> > > };
> > >
> > > +enum local_binding_type {
> > > + BT_VIF,
> > > + BT_CONTAINER,
> > > + BT_VIRTUAL
> > > +};
> > > +
> > > +struct local_binding {
> > > + char *name;
> > > + enum local_binding_type type;
> > > + const struct ovsrec_interface *iface;
> > > + const struct sbrec_port_binding *pb;
> > > +
> > > + /* shash of 'struct local_binding' representing
> children. */
> > > + struct shash children;
> > > +};
> > > +
> > > +static inline struct local_binding *
> > > +local_binding_find(struct shash *local_bindings, const char
> *name)
> > > +{
> > > + return shash_find_data(local_bindings, name);
> > > +}
> > > +
> > > void binding_register_ovs_idl(struct ovsdb_idl *);
> > > void binding_run(struct binding_ctx_in *, struct
> binding_ctx_out *);
> > > bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > > diff --git a/controller/ovn-controller.c
> b/controller/ovn-controller.c
> > > index 687a33c88..795a1c297 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -1368,8 +1368,13 @@ static void init_physical_ctx(struct
> > engine_node *node,
> > >
> > > ovs_assert(br_int && chassis);
> > >
> > > + struct ovsrec_interface_table *iface_table =
> > > + (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > > + engine_get_input("OVS_interface", node));
> > > +
> > > struct ed_type_ct_zones *ct_zones_data =
> > > engine_get_input_data("ct_zones", node);
> > > +
> > > struct simap *ct_zones = &ct_zones_data->current;
> > >
> > > p_ctx->sbrec_port_binding_by_name =
> sbrec_port_binding_by_name;
> > > @@ -1377,12 +1382,14 @@ static void init_physical_ctx(struct
> > engine_node *node,
> > > p_ctx->mc_group_table = multicast_group_table;
> > > p_ctx->br_int = br_int;
> > > p_ctx->chassis_table = chassis_table;
> > > + p_ctx->iface_table = iface_table;
> > > p_ctx->chassis = chassis;
> > > p_ctx->active_tunnels = &rt_data->active_tunnels;
> > > p_ctx->local_datapaths = &rt_data->local_datapaths;
> > > p_ctx->local_lports = &rt_data->local_lports;
> > > p_ctx->ct_zones = ct_zones;
> > > p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> > > + p_ctx->local_bindings = &rt_data->local_bindings;
> > > }
> > >
> > > static void init_lflow_ctx(struct engine_node *node,
> > > @@ -1790,6 +1797,124 @@ flow_output_port_groups_handler(struct
> > engine_node *node, void *data)
> > > return _flow_output_resource_ref_handler(node, data,
> > REF_TYPE_PORTGROUP);
> > > }
> > >
> > > +/* Engine node en_physical_flow_changes indicates whether
> > > + * there is a need to
> > > + * - recompute only physical flows or
> > > + * - we can incrementally process the physical flows.
> > > + *
> > > + * en_physical_flow_changes is an input to flow_output
> engine node.
> > > + * If the engine node 'en_physical_flow_changes' gets
> updated during
> > > + * engine run, it means the handler for this -
> > > + * flow_output_physical_flow_changes_handler() will either
> > > + * - recompute the physical flows by calling
> 'physical_run() or
> > > + * - incrementlly process some of the changes for
> physical flow
> > > + * calculation. Right now we handle OVS interfaces changes
> > > + * for physical flow computation.
> > > + *
> > > + * When ever a port binding happens, the follow up
> > > + * activity is the zone id allocation for that port binding.
> > > + * With this intermediate engine node, we avoid full
> recomputation.
> > > + * Instead we do physical flow computation (either full
> recomputation
> > > + * by calling physical_run() or handling the changes
> incrementally.
> > > + *
> > > + * Hence this is an intermediate engine node to indicate the
> > > + * flow_output engine to recomputes/compute the physical flows.
> > > + *
> > > + * TODO 1. Ideally this engine node should
> recompute/compute the
> > physica
> >
> > Typo: s/physica/physical.
> >
> > > + * flows instead of relegating it to the flow_output node.
> > > + * But this requires splitting the flow_output node to
> > > + * logical_flow_output and physical_flow_output.
> > > + *
> > > + * TODO 2. We can further optimise the en_ct_zone changes to
> > > + * compute the phsyical flows for changed zone ids.
> > > + *
> > > + * TODO 3: physical.c has a global simap -localvif_to_ofport
> > which stores the
> > > + * local OVS interfaces and the ofport numbers. Ideally
> > this should be
> > > + * part of the engine data.
> > > + */
> > > +struct ed_type_pfc_data {
> > > + bool recompute_physical_flows;
> > > + bool ovs_ifaces_changed;
> > > +};
> > > +
> > > +static void
> > > +en_physical_flow_changes_clear_tracked_data(void *data_)
> > > +{
> > > + struct ed_type_pfc_data *data = data_;
> > > + data->recompute_physical_flows = false;
> > > + data->ovs_ifaces_changed = false;
> > > +}
> > > +
> >
> > Nothing wrong here but it seems a bit confusing that we have a
> function
> > called *_clear_tracked_data() which just resets the node's data.
> >
> > This makes me think more about the name we used in patch 4,
> > "clear_tracked_data", and wonder if we need a less specific
> name for it.
> > Would "reset_data" be more accurate?
> >
> >
> > I don't know. reset_data() would give the impression that it will
> reset
> > the whole engine data.
> >
>
> My argument against clear_tracked_data is that
> en_physical_flow_changes_clear_tracked_data() doesn't clear any tracked
> data. It clears some of the generic data stored in the node.
>
>
> In earlier versions, there was a separate variable for tracked_data.
> Now the tracked data is part of engine data. So it's up to the node
> to how to want to maintain and manage the tracked data.
>
> IMO, the recompute_physical_flows and ovs_ifaces_changed are tracked data,
> because these are updated and cleared during the engine run.
>
> Thanks
> Numan
>
Ok, it's just a matter of naming and if you think clear_tracked_data()
is the way to go, I'm ok with it :)
Thanks,
Dumitru
>
> Otoh my personal interpretation of "reset_data" would be "reset all data
> that needs to be reset". But I don't have a very strong opinion about
> this I guess.
>
> >
> >
> > > +static void *
> > > +en_physical_flow_changes_init(struct engine_node *node
> OVS_UNUSED,
> > > + struct engine_arg *arg
> OVS_UNUSED)
> > > +{
> > > + struct ed_type_pfc_data *data = xzalloc(sizeof *data);
> > > + return data;
> > > +}
> > > +
> > > +static void
> > > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> > > +{
> > > +}
> > > +
> > > +/* Indicate to the flow_output engine that we need to recompute
> > physical
> > > + * flows. */
> > > +static void
> > > +en_physical_flow_changes_run(struct engine_node *node, void
> *data)
> > > +{
> > > + struct ed_type_pfc_data *pfc_tdata = data;
> > > + pfc_tdata->recompute_physical_flows = true;
> > > + engine_set_node_state(node, EN_UPDATED);
> > > +}
> > > +
> > > +/* There are OVS interface changes. Indicate to the flow_output
> > engine
> > > + * to handle these OVS interface changes for physical flow
> > computations. */
> > > +static bool
> > > +physical_flow_changes_ovs_iface_handler(struct engine_node
> *node,
> > void *data)
> > > +{
> > > + struct ed_type_pfc_data *pfc_tdata = data;
> > > + pfc_tdata->ovs_ifaces_changed = true;
> > > + engine_set_node_state(node, EN_UPDATED);
> > > + return true;
> > > +}
> > > +
> > > +static bool
> > > +flow_output_physical_flow_changes_handler(struct engine_node
> > *node, void *data)
> > > +{
> > > + struct ed_type_runtime_data *rt_data =
> > > + engine_get_input_data("runtime_data", node);
> > > +
> > > + struct ed_type_flow_output *fo = data;
> > > + struct physical_ctx p_ctx;
> > > + init_physical_ctx(node, rt_data, &p_ctx);
> > > +
> > > + engine_set_node_state(node, EN_UPDATED);
> > > + struct ed_type_pfc_data *pfc_data =
> > > + engine_get_input_data("physical_flow_changes", node);
> > > +
> > > + if (pfc_data->recompute_physical_flows) {
> > > + /* This indicates that we need to recompute the
> physical
> > flows. */
> > > + physical_run(&p_ctx, &fo->flow_table);
> > > + return true;
> > > + }
> > > +
> > > + if (pfc_data->ovs_ifaces_changed) {
> > > + /* There are OVS interface changes. Try to handle them
> > > + * incrementally. */
> > > + return physical_handle_ovs_iface_changes(&p_ctx,
> > &fo->flow_table);
> > > + }
> > > +
> >
> > Can it happen that we have both pfc_data->ovs_ifaces_changed and
> > pfc_data->recompute_physical_flows true? If so, shouldn't the
> order of
> > the two if statements above be reversed?
> >
> >
> > If both are true, then recompute_physical_flows() has to take
> precedence
> > because there is no need to call
> physical_handle_ovs_iface_changes() then.
> > As physical_run() is a recompute of physical flows.
>
> Ah got it, thanks for the explanation!
>
> Thanks,
> Dumitru
>
> >
> > Thanks
> > Numan
> >
> >
> > > + return true;
> > > +}
> > > +
> > > +static bool
> > > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> > > + void *data OVS_UNUSED)
> > > +{
> > > + return true;
> > > +}
> > > +
> >
> > Please see the comment about noop_handler in patch 2.
> >
> > > struct ovn_controller_exit_args {
> > > bool *exiting;
> > > bool *restart;
> > > @@ -1914,6 +2039,8 @@ main(int argc, char *argv[])
> > > ENGINE_NODE(runtime_data, "runtime_data");
> > > ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > + ENGINE_NODE_WITH_CLEAR_TRACK_DATA(physical_flow_changes,
> > > + "physical_flow_changes");
> > > ENGINE_NODE(flow_output, "flow_output");
> > > ENGINE_NODE(addr_sets, "addr_sets");
> > > ENGINE_NODE(port_groups, "port_groups");
> > > @@ -1933,13 +2060,28 @@ main(int argc, char *argv[])
> > > engine_add_input(&en_port_groups, &en_sb_port_group,
> > > port_groups_sb_port_group_handler);
> > >
> > > + /* Engine node physical_flow_changes indicates whether
> > > + * we can recompute only physical flows or we can
> > > + * incrementally process the physical flows.
> > > + */
> > > + engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> > > + NULL);
> > > + engine_add_input(&en_physical_flow_changes,
> &en_ovs_interface,
> > > + physical_flow_changes_ovs_iface_handler);
> > > +
> > > engine_add_input(&en_flow_output, &en_addr_sets,
> > > flow_output_addr_sets_handler);
> > > engine_add_input(&en_flow_output, &en_port_groups,
> > > flow_output_port_groups_handler);
> > > engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> > > - engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> > > engine_add_input(&en_flow_output, &en_mff_ovn_geneve,
> NULL);
> > > + engine_add_input(&en_flow_output,
> &en_physical_flow_changes,
> > > +
> flow_output_physical_flow_changes_handler);
> > > +
> > > + /* We need this input nodes for only data. Hence the noop
> > handler. */
> > > + engine_add_input(&en_flow_output, &en_ct_zones,
> > flow_output_noop_handler);
> > > + engine_add_input(&en_flow_output, &en_ovs_interface,
> > > + flow_output_noop_handler);
> > >
> > > engine_add_input(&en_flow_output, &en_ovs_open_vswitch,
> NULL);
> > > engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> > > diff --git a/controller/physical.c b/controller/physical.c
> > > index f06313b9d..240ec158e 100644
> > > --- a/controller/physical.c
> > > +++ b/controller/physical.c
> > > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
> > > simap_destroy(&new_tunnel_to_ofport);
> > > }
> > >
> > > +bool
> > > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> > > + struct ovn_desired_flow_table
> > *flow_table)
> > > +{
> > > + const struct ovsrec_interface *iface_rec;
> > > + OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > p_ctx->iface_table) {
> > > + if (!strcmp(iface_rec->type, "geneve") ||
> > > + !strcmp(iface_rec->type, "patch")) {
> > > + return false;
> > > + }
> > > + }
> > > +
> > > + struct ofpbuf ofpacts;
> > > + ofpbuf_init(&ofpacts, 0);
> > > +
> > > + OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > p_ctx->iface_table) {
> > > + const char *iface_id =
> smap_get(&iface_rec->external_ids,
> > "iface-id");
> > > + if (!iface_id) {
> > > + continue;
> > > + }
> > > +
> > > + const struct local_binding *lb =
> > > + local_binding_find(p_ctx->local_bindings,
> iface_id);
> > > +
> > > + if (!lb || !lb->pb) {
> > > + continue;
> > > + }
> > > +
> > > + int64_t ofport = iface_rec->n_ofport ?
> *iface_rec->ofport
> > : 0;
> > > + if (ovsrec_interface_is_deleted(iface_rec)) {
> > > + ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> > > + simap_find_and_delete(&localvif_to_ofport,
> iface_id);
> > > + } else {
> > > + if (!ovsrec_interface_is_new(iface_rec)) {
> > > + ofctrl_remove_flows(flow_table,
> > &lb->pb->header_.uuid);
> > > + }
> > > +
> > > + simap_put(&localvif_to_ofport, iface_id, ofport);
> > > +
> consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> > > + p_ctx->mff_ovn_geneve,
> > p_ctx->ct_zones,
> > > + p_ctx->active_tunnels,
> > > + p_ctx->local_datapaths,
> > > + lb->pb, p_ctx->chassis,
> > > + flow_table, &ofpacts);
> > > + }
> > > + }
> > > +
> > > + ofpbuf_uninit(&ofpacts);
> > > + return true;
> > > +}
> > > +
> > > bool
> > > get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> > ofp_port_t *ofport)
> > > {
> > > diff --git a/controller/physical.h b/controller/physical.h
> > > index dadf84ea1..9ca34436a 100644
> > > --- a/controller/physical.h
> > > +++ b/controller/physical.h
> > > @@ -49,11 +49,13 @@ struct physical_ctx {
> > > const struct ovsrec_bridge *br_int;
> > > const struct sbrec_chassis_table *chassis_table;
> > > const struct sbrec_chassis *chassis;
> > > + const struct ovsrec_interface_table *iface_table;
> > > const struct sset *active_tunnels;
> > > struct hmap *local_datapaths;
> > > struct sset *local_lports;
> > > const struct simap *ct_zones;
> > > enum mf_field_id mff_ovn_geneve;
> > > + struct shash *local_bindings;
> > > };
> > >
> > > void physical_register_ovs_idl(struct ovsdb_idl *);
> > > @@ -63,7 +65,8 @@ void
> physical_handle_port_binding_changes(struct
> > physical_ctx *,
> > > struct
> > ovn_desired_flow_table *);
> > > void physical_handle_mc_group_changes(struct physical_ctx *,
> > > struct
> > ovn_desired_flow_table *);
> > > -
> > > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> > > + struct
> > ovn_desired_flow_table *);
> > > bool get_tunnel_ofport(const char *chassis_name, char
> *encap_ip,
> > > ofp_port_t *ofport);
> > > #endif /* controller/physical.h */
> > >
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org <mailto:dev at openvswitch.org>
> <mailto:dev at openvswitch.org <mailto:dev at openvswitch.org>>
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org <mailto:dev at openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
More information about the dev
mailing list