[ovs-dev] [PATCH ovn v3 5/5] controller: Improve ct zone handling.

Han Zhou zhouhan at gmail.com
Mon Aug 2 07:27:34 UTC 2021


On Tue, Jul 27, 2021 at 7:48 PM <numans at ovn.org> wrote:
>
> From: Numan Siddique <numans at ovn.org>
>
> Prior to this patch, ovn-controller generates a zone id for each
> OVS interface which has external_ids:iface-id set even if there is
> no corresponding logical port for it.  This patch now changes the
> way we allocate the zone id.  A zone id is allocated only if
> there is an OVS interface with external_ids:iface-id and the
> corresponding logical port is claimed.  We use the runtime data
> (rt_data->lbinding_data.lports) for this.
>
> This patch also improves the ct_zones_runtime_data_handler()
> by using the tracked datapath data to allocate the zone ids
> for newly claimed lports or to free up the zone id for
> the released lports instead of triggering a full recompute.
>
> And finally this patch also adds a ct zone handler for pflow_output
> engine.  This handler falls back to recompute if the ct zone engine
> was recomputed.  Otherwise it returns true.
>
> Signed-off-by: Numan Siddique <numans at ovn.org>
> ---
>  controller/ovn-controller.c | 133 +++++++++++++++++++++++++++---------
>  lib/inc-proc-eng.h          |   4 ++
>  tests/ovn.at                |   2 +-
>  3 files changed, 106 insertions(+), 33 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index a5169ec0e..50a42c33f 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash
*pending_ct_zones,
>      }
>  }
>
> +static bool
> +alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
> +                    unsigned long *ct_zone_bitmap, int *scan_start,
> +                    struct shash *pending_ct_zones)
> +{
> +    /* We assume that there are 64K zones and that we own them all. */
> +    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES
+ 1);
> +    if (zone == MAX_CT_ZONES + 1) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_WARN_RL(&rl, "exhausted all ct zones");
> +        return false;
> +    }
> +
> +    *scan_start = zone + 1;
> +
> +    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> +                              zone, true, zone_name);
> +
> +    bitmap_set1(ct_zone_bitmap, zone);
> +    simap_put(ct_zones, zone_name, zone);
> +    return true;
> +}
> +
>  static void
> -update_ct_zones(const struct sset *lports, const struct hmap
*local_datapaths,
> +update_ct_zones(const struct shash *binding_lports,
> +                const struct hmap *local_datapaths,
>                  struct simap *ct_zones, unsigned long *ct_zone_bitmap,
>                  struct shash *pending_ct_zones)
>  {
> @@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const
struct hmap *local_datapaths,
>      struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
>      unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
>
> -    SSET_FOR_EACH(user, lports) {
> -        sset_add(&all_users, user);
> +    struct shash_node *shash_node;
> +    SHASH_FOR_EACH (shash_node, binding_lports) {
> +        sset_add(&all_users, shash_node->name);
>      }
>
>      /* Local patched datapath (gateway routers) need zones assigned. */
> @@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const
struct hmap *local_datapaths,
>      /* Assign a unique zone id for each logical port and two zones
>       * to a gateway router. */
>      SSET_FOR_EACH(user, &all_users) {
> -        int zone;
> -
>          if (simap_contains(ct_zones, user)) {
>              continue;
>          }
>
> -        /* We assume that there are 64K zones and that we own them all.
*/
> -        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES +
1);
> -        if (zone == MAX_CT_ZONES + 1) {
> -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
1);
> -            VLOG_WARN_RL(&rl, "exhausted all ct zones");
> -            break;
> -        }
> -        scan_start = zone + 1;
> -
> -        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> -                                  zone, true, user);
> -
> -        bitmap_set1(ct_zone_bitmap, zone);
> -        simap_put(ct_zones, user, zone);
> +        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
> +                            pending_ct_zones);
>      }
>
>      simap_destroy(&req_snat_zones);
> @@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
>      unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
>      struct shash pending;
>      struct simap current;
> +
> +    /* Tracked data. */
> +    bool recomputed;
>  };
>
>  static void *
> @@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct
engine_arg *arg OVS_UNUSED)
>      return data;
>  }
>
> +static void
> +en_ct_zones_clear_tracked_data(void *data_)
> +{
> +    struct ed_type_ct_zones *data = data_;
> +    data->recomputed = false;
> +}
> +
>  static void
>  en_ct_zones_cleanup(void *data)
>  {
> @@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void
*data)
>      struct ed_type_runtime_data *rt_data =
>          engine_get_input_data("runtime_data", node);
>
> -    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
> +    update_ct_zones(&rt_data->lbinding_data.lports,
&rt_data->local_datapaths,
>                      &ct_zones_data->current, ct_zones_data->bitmap,
>                      &ct_zones_data->pending);
>
>
> +    ct_zones_data->recomputed = true;
>      engine_set_node_state(node, EN_UPDATED);
>  }
>
> @@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct
engine_node *node, void *data)
>  }
>
>  static bool
> -ct_zones_runtime_data_handler(struct engine_node *node, void *data
OVS_UNUSED)
> +ct_zones_runtime_data_handler(struct engine_node *node, void *data)
>  {
>      struct ed_type_runtime_data *rt_data =
>          engine_get_input_data("runtime_data", node);
> @@ -1879,24 +1901,51 @@ ct_zones_runtime_data_handler(struct engine_node
*node, void *data OVS_UNUSED)
>          return false;
>      }
>
> -    /* If local_lports have changed then fall back to full recompute. */
> -    if (rt_data->local_lports_changed) {
> -        return false;
> -    }
> +    struct ed_type_ct_zones *ct_zones_data = data;
>
>      struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
>      struct tracked_datapath *tdp;
> +    int scan_start = 0;
> +
> +    bool updated = false;
> +
>      HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
>          if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
>              /* A new datapath has been added. Fall back to full
recompute. */
>              return false;
>          }
>
> -        /* When an lport is claimed or released because of port binding,
> -         * changes we don't have to compute the ct zone entries for
these.
> -         * That is because we generate the ct zone entries for each local
> -         * OVS interface which has external_ids:iface-id set.  For the
local
> -         * OVS interface changes, rt_data->local_ports_changed will be
true. */
> +        struct shash_node *shash_node;
> +        SHASH_FOR_EACH (shash_node, &tdp->lports) {
> +            struct tracked_lport *t_lport = shash_node->data;
> +            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
> +                if (!simap_contains(&ct_zones_data->current,
> +                                    t_lport->pb->logical_port)) {
> +                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
> +                                        &ct_zones_data->current,
> +                                        ct_zones_data->bitmap,
&scan_start,
> +                                        &ct_zones_data->pending);
> +                    updated = true;
> +                }
> +            } else if (t_lport->tracked_type ==
TRACKED_RESOURCE_REMOVED) {
> +                struct simap_node *ct_zone =
> +                    simap_find(&ct_zones_data->current,
> +                               t_lport->pb->logical_port);
> +                if (ct_zone) {
> +                    add_pending_ct_zone_entry(
> +                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
> +                        ct_zone->data, false, ct_zone->name);
> +
> +                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
> +                    simap_delete(&ct_zones_data->current, ct_zone);
> +                    updated = true;
> +                }
> +            }

nit: The enum en_tracked_resource_type has three values, but here only two
were checked. I understand that t_lport->tracked_type would never be
"UPDATED" in the current implementation, but it is better to have a final
else (or use switch/case) to assert with OVS_NOT_REACHED. With this
addressed:

Acked-by: Han Zhou <hzhou at ovn.org>

> +        }
> +    }
> +
> +    if (updated) {
> +        engine_set_node_state(node, EN_UPDATED);
>      }
>
>      return true;
> @@ -2758,6 +2807,25 @@ pflow_output_runtime_data_handler(struct
engine_node *node, void *data)
>      return true;
>  }
>
> +static bool
> +pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
> +                                    void *data OVS_UNUSED)
> +{
> +    struct ed_type_ct_zones *ct_zones_data =
> +        engine_get_input_data("ct_zones", node);
> +
> +    /* If ct_zones engine node was recomputed, then fall back to full
> +     * recompute of pflow_output.  Otherwise there is no need to do
> +     * anything for the following reasons:
> +     *   - When an lport is claimed, ct zone handler for the
> +     *     runtime_data handler allocates the zone id for the lport (and
it is
> +     *     saved in the br-int external_ids).
> +     *   - pflow_output handler for the runtime data  adds
> +     *     the physical flows for the claimed lport.
> +     * */
> +    return !ct_zones_data->recomputed;
> +}
> +
>  static void *
>  en_flow_output_init(struct engine_node *node OVS_UNUSED,
>                      struct engine_arg *arg OVS_UNUSED)
> @@ -3013,7 +3081,7 @@ main(int argc, char *argv[])
>      stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
>
>      /* Define inc-proc-engine nodes. */
> -    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
> +    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
>      ENGINE_NODE(non_vif_data, "non_vif_data");
>      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> @@ -3054,7 +3122,8 @@ main(int argc, char *argv[])
>       */
>      engine_add_input(&en_pflow_output, &en_non_vif_data,
>                       NULL);
> -    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
> +    engine_add_input(&en_pflow_output, &en_ct_zones,
> +                     pflow_output_ct_zones_handler);
>      engine_add_input(&en_pflow_output, &en_sb_chassis,
>                       pflow_lflow_output_sb_chassis_handler);
>
> diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> index 7e9f5bb70..859b30a71 100644
> --- a/lib/inc-proc-eng.h
> +++ b/lib/inc-proc-eng.h
> @@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node
*, const char *name,
>  #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
>      ENGINE_NODE_DEF(NAME, NAME_STR)
>
> +#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
> +    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> +    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
> +
>  #define ENGINE_NODE(NAME, NAME_STR) \
>      static bool (*en_##NAME##_is_valid)(struct engine_node *node) =
NULL; \
>      ENGINE_NODE_DEF(NAME, NAME_STR)
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 13d860f10..26c4dc3f6 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -23349,7 +23349,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows
br-int table=38,metadata=${sw0_dpkey
>  reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
>
>  p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int
external_ids:ct-zone-sw0-p1 | sed 's/"//g')
> -AT_CHECK([test ! -z $p1_zoneid])
> +AT_CHECK([test -z $p1_zoneid])
>
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list