[ovs-dev] [PATCH ovn v2 4/4] ovn-controller: Fix port group conjunction flow explosion problem.

Mark Gray mark.d.gray at redhat.com
Thu Apr 29 16:10:47 UTC 2021


On 28/04/2021 19:41, Han Zhou wrote:
> On Wed, Apr 28, 2021 at 7:06 AM Mark Gray <mark.d.gray at redhat.com> wrote:
>>
>> On 27/04/2021 18:29, Han Zhou wrote:
>>> On Tue, Apr 27, 2021 at 8:43 AM Mark Gray <mark.d.gray at redhat.com>
> wrote:
>>>>
>>>> On 22/04/2021 21:14, Han Zhou wrote:
>>>>> For an ACL with match: outport == @PG && ip4.src == $PG_AS, given
> below
>>>>> scale:
>>>>>
>>>>> P: PG size
>>>>> LP: number of local lports
>>>>> D: number of all datapaths (logical switches)
>>>>> LD: number of datapaths that contain local lports
>>>>>
>>>>> With current OVN implementation, the total number of OF flows is:
>>>>>     LP + (P * D) + D
>>>>>
>>>>> The reason is, firstly, datapath is not part of the conjunction, so
> for
>>>>> each datapath the lflow is reparsed.
>>>>>
>>>>> Secondly, although ovn-controller tries to filter out the flows that
> are
>>>>> for non-local lports, with the conjunction match, the logic that
> filters
>>>>> out non-local flows doesn't work for the conjunction part that doesn't
>>>>> have the lport in the match (the P * D part). When there is only one
>>>>> port on each LS it is fine, because no conjunction will be used
> because
>>>>> SB port groups are splited per datapath, so each port group would have
>>>> suggest "split per datapath"
>>>
>>> Ack
>>>
>>>>> only one port. However, when more than one ports are on each LS the
> flow
>>>>> explosion happens.
>>>>>
>>>>> This patch deal with the second reason above, by refining the SB port
>>>>> groups to store only locally bound lports: empty const sets will not
>>>>> generate any flows. This reduces the related flow number from
>>>>> LP + (P * D) + D to LP + (P * LD) + LD.
>>>>>
>>>>> Since LD is expected to be small, so even if it is a multiplier, the
>>>>> total number is larged reduced.  In particular, in ovn-k8s use cases
> the
>>>> suggest "reduced significantly"
>>>
>>> Ack
>>>
>>>>> LD is always 1, so the formular above becomes LP + P + LD.
>>>>>
>>>> s/formular/formula
>>>
>>> Ack
>>>
>>>>> With a scale of 1k k8s nodes, each has 4 ports for the same PG: P =
> 4k,
>>>>> LP = 4, D = 1k, LD = 1. The current implementation generates ~4m
> flows.
>>>>> With this patch it becomes only ~4k.
>>>> Cool!
>>>>>
>>>>> Reported-by: Girish Moodalbail <gmoodalbail at nvidia.com>
>>>>> Reported-at:
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2021-March/381082.html
>>>>> Reported-by: Dumitru Ceara <dceara at redhat.com>
>>>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1944098
>>>>> Signed-off-by: Han Zhou <hzhou at ovn.org>
>>>>
>>>> I tested this as well and it seemed to work as expected.
>>>
>>> Thanks for the test!
>>>
>>>>> ---
>>>>> v1->v2: fix memory leaks found by address sanitizer
>>>>>
>>>>>  controller/binding.c        |  20 ++++
>>>>>  controller/binding.h        |   9 ++
>>>>>  controller/ovn-controller.c | 217
> ++++++++++++++++++++++++++++++------
>>>>>  include/ovn/expr.h          |   2 +-
>>>>>  lib/expr.c                  |   8 +-
>>>>>  tests/ovn.at                |  53 +++++++++
>>>>>  tests/test-ovn.c            |  12 +-
>>>>>  utilities/ovn-trace.c       |   4 +-
>>>>>  8 files changed, 283 insertions(+), 42 deletions(-)
>>>>>
>>>>> diff --git a/controller/binding.c b/controller/binding.c
>>>>> index 514f5f33f..5aca964cc 100644
>>>>> --- a/controller/binding.c
>>>>> +++ b/controller/binding.c
>>>>> @@ -2987,3 +2987,23 @@ cleanup:
>>>>>
>>>>>      return b_lport;
>>>>>  }
>>>>> +
>>>>> +struct sset *
>>>>> +binding_collect_local_binding_lports(struct local_binding_data
>>> *lbinding_data)
>>>>> +{
>>>>> +    struct sset *lports = xzalloc(sizeof *lports);
>>>>> +    sset_init(lports);
>>>>> +    struct shash_node *shash_node;
>>>>> +    SHASH_FOR_EACH (shash_node, &lbinding_data->lports) {
>>>>> +        struct binding_lport *b_lport = shash_node->data;
>>>>> +        sset_add(lports, b_lport->name);
>>>>> +    }
>>>>> +    return lports;
>>>>> +}
>>>>> +
>>>>> +void
>>>>> +binding_destroy_local_binding_lports(struct sset *lports)
>>>>> +{
>>>>> +    sset_destroy(lports);
>>>>> +    free(lports);
>>>>> +}
>>>>> diff --git a/controller/binding.h b/controller/binding.h
>>>>> index 4fc9ef207..31f0352a0 100644
>>>>> --- a/controller/binding.h
>>>>> +++ b/controller/binding.h
>>>>> @@ -128,4 +128,13 @@ void binding_seqno_run(struct local_binding_data
>>> *lbinding_data);
>>>>>  void binding_seqno_install(struct local_binding_data *lbinding_data);
>>>>>  void binding_seqno_flush(void);
>>>>>  void binding_dump_local_bindings(struct local_binding_data *, struct
>>> ds *);
>>>>> +
>>>>> +/* Generates a sset of lport names from local_binding_data.
>>>>> + * Note: the caller is responsible for destroying and freeing the
>>> returned
>>>>> + * sset, by calling binding_collect_local_binding_lports(). */
>>>> I think this^ should say binding_destroy_local_binding_lports()?
>>>
>>> Oops. My bad.
>>>
>>>>> +struct sset *binding_collect_local_binding_lports(struct
>>> local_binding_data *);
>>>>> +
>>>>> +/* Destroy and free the lports sset returned by
>>>>> + * binding_collect_local_binding_lports(). */
>>>>> +void binding_destroy_local_binding_lports(struct sset *lports);
>>>>>  #endif /* controller/binding.h */
>>>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>>>> index 7320bd56c..c6ba9ff88 100644
>>>>> --- a/controller/ovn-controller.c
>>>>> +++ b/controller/ovn-controller.c
>>>>> @@ -1341,7 +1341,7 @@ addr_sets_init(const struct
>>> sbrec_address_set_table *address_set_table,
>>>>>      SBREC_ADDRESS_SET_TABLE_FOR_EACH (as, address_set_table) {
>>>>>          expr_const_sets_add(addr_sets, as->name,
>>>>>                              (const char *const *) as->addresses,
>>>>> -                            as->n_addresses, true);
>>>>> +                            as->n_addresses, true, NULL);
>>>>>      }
>>>>>  }
>>>>>
>>>>> @@ -1358,7 +1358,7 @@ addr_sets_update(const struct
>>> sbrec_address_set_table *address_set_table,
>>>>>          } else {
>>>>>              expr_const_sets_add(addr_sets, as->name,
>>>>>                                  (const char *const *) as->addresses,
>>>>> -                                as->n_addresses, true);
>>>>> +                                as->n_addresses, true, NULL);
>>>>>              if (sbrec_address_set_is_new(as)) {
>>>>>                  sset_add(new, as->name);
>>>>>              } else {
>>>>> @@ -1367,6 +1367,7 @@ addr_sets_update(const struct
>>> sbrec_address_set_table *address_set_table,
>>>>>          }
>>>>>      }
>>>>>  }
>>>>> +
>>>>>  static void
>>>>>  en_addr_sets_run(struct engine_node *node, void *data)
>>>>>  {
>>>>> @@ -1415,20 +1416,72 @@ addr_sets_sb_address_set_handler(struct
>>> engine_node *node, void *data)
>>>>>  }
>>>>>
>>>>>  struct ed_type_port_groups{
>>>>> -    struct shash port_groups;
>>>>> +    /* A copy of SB port_groups, each converted as a sset for
>>> efficient lport
>>>>> +     * lookup. */
>>>>> +    struct shash port_group_ssets;
>>>>> +
>>>>> +    /* Const sets containing local lports, used for expr parsing. */
>>>>> +    struct shash port_groups_cs_local;
>>>>> +
>>>>>      bool change_tracked;
>>>>>      struct sset new;
>>>>>      struct sset deleted;
>>>>>      struct sset updated;
>>>>>  };
>>>>>
>>>>> +static void
>>>>> +port_group_ssets_add_or_update(struct shash *port_group_ssets,
>>>>> +                               const struct sbrec_port_group *pg)
>>>>> +{
>>>>> +    struct sset *lports = shash_find_data(port_group_ssets,
> pg->name);
>>>>> +    if (lports) {
>>>>> +        sset_clear(lports);
>>>>> +    } else {
>>>>> +        lports = xzalloc(sizeof *lports);
>>>>> +        sset_init(lports);
>>>>> +        shash_add(port_group_ssets, pg->name, lports);
>>>>> +    }
>>>>> +
>>>>> +    for (size_t i = 0; i < pg->n_ports; i++) {
>>>>> +        sset_add(lports, pg->ports[i]);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +port_group_ssets_delete(struct shash *port_group_ssets,
>>>>> +                        const char *pg_name)
>>>>> +{
>>>>> +    struct shash_node *node = shash_find(port_group_ssets, pg_name);
>>>>> +    if (node) {
>>>>> +        struct sset *lports = node->data;
>>>>> +        shash_delete(port_group_ssets, node);
>>>>> +        sset_destroy(lports);
>>>>> +        free(lports);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* Delete and free all ssets in port_group_ssets, but not
>>>>> + * destroying the shash itself. */
>>>>> +static void
>>>>> +port_group_ssets_clear(struct shash *port_group_ssets)
>>>>> +{
>>>>> +    struct shash_node *node, *next;
>>>>> +    SHASH_FOR_EACH_SAFE (node, next, port_group_ssets) {
>>>>> +        struct sset *lports = node->data;
>>>>> +        shash_delete(port_group_ssets, node);
>>>>> +        sset_destroy(lports);
>>>>> +        free(lports);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>>  static void *
>>>>>  en_port_groups_init(struct engine_node *node OVS_UNUSED,
>>>>>                      struct engine_arg *arg OVS_UNUSED)
>>>>>  {
>>>>>      struct ed_type_port_groups *pg = xzalloc(sizeof *pg);
>>>>>
>>>>> -    shash_init(&pg->port_groups);
>>>>> +    shash_init(&pg->port_group_ssets);
>>>>> +    shash_init(&pg->port_groups_cs_local);
>>>>>      pg->change_tracked = false;
>>>>>      sset_init(&pg->new);
>>>>>      sset_init(&pg->deleted);
>>>>> @@ -1440,41 +1493,52 @@ static void
>>>>>  en_port_groups_cleanup(void *data)
>>>>>  {
>>>>>      struct ed_type_port_groups *pg = data;
>>>>> -    expr_const_sets_destroy(&pg->port_groups);
>>>>> -    shash_destroy(&pg->port_groups);
>>>>> +
>>>>> +    expr_const_sets_destroy(&pg->port_groups_cs_local);
>>>>> +    shash_destroy(&pg->port_groups_cs_local);
>>>>> +
>>>>> +    port_group_ssets_clear(&pg->port_group_ssets);
>>>>> +    shash_destroy(&pg->port_group_ssets);
>>>>> +
>>>>>      sset_destroy(&pg->new);
>>>>>      sset_destroy(&pg->deleted);
>>>>>      sset_destroy(&pg->updated);
>>>>>  }
>>>>>
>>>>> -/* Iterate port groups in the southbound database.  Create and update
>>> the
>>>>> - * corresponding symtab entries as necessary. */
>>>>> - static void
>>>>> +static void
>>>>>  port_groups_init(const struct sbrec_port_group_table
> *port_group_table,
>>>>> -                 struct shash *port_groups)
>>>>> +                 const struct sset *local_lports,
>>>>> +                 struct shash *port_group_ssets,
>>>>> +                 struct shash *port_groups_cs_local)
>>>>>  {
>>>>>      const struct sbrec_port_group *pg;
>>>>>      SBREC_PORT_GROUP_TABLE_FOR_EACH (pg, port_group_table) {
>>>>> -        expr_const_sets_add(port_groups, pg->name,
>>>>> +        port_group_ssets_add_or_update(port_group_ssets, pg);
>>>>> +        expr_const_sets_add(port_groups_cs_local, pg->name,
>>>>>                              (const char *const *) pg->ports,
>>>>> -                            pg->n_ports, false);
>>>>> +                            pg->n_ports, false, local_lports);
>>>>>      }
>>>>>  }
>>>>>
>>>>>  static void
>>>>>  port_groups_update(const struct sbrec_port_group_table
>>> *port_group_table,
>>>>> -                   struct shash *port_groups, struct sset *new,
>>>>> -                   struct sset *deleted, struct sset *updated)
>>>>> +                   const struct sset *local_lports,
>>>>> +                   struct shash *port_group_ssets,
>>>>> +                   struct shash *port_groups_cs_local,
>>>>> +                   struct sset *new, struct sset *deleted,
>>>>> +                   struct sset *updated)
>>>>>  {
>>>>>      const struct sbrec_port_group *pg;
>>>>>      SBREC_PORT_GROUP_TABLE_FOR_EACH_TRACKED (pg, port_group_table) {
>>>>>          if (sbrec_port_group_is_deleted(pg)) {
>>>>> -            expr_const_sets_remove(port_groups, pg->name);
>>>>> +            expr_const_sets_remove(port_groups_cs_local, pg->name);
>>>>> +            port_group_ssets_delete(port_group_ssets, pg->name);
>>>>>              sset_add(deleted, pg->name);
>>>>>          } else {
>>>>> -            expr_const_sets_add(port_groups, pg->name,
>>>>> +            expr_const_sets_add(port_groups_cs_local, pg->name,
>>>>>                                  (const char *const *) pg->ports,
>>>>> -                                pg->n_ports, false);
>>>>> +                                pg->n_ports, false, local_lports);
>>>>> +            port_group_ssets_add_or_update(port_group_ssets, pg);
>>>>
>>>> Bit of a nit: maybe the port_group_ssets_() suite of functions could
>>>> follow the same pattern as expr_const_sets_() by s/ssets/sets and
>>>> changing the verbs. It keeps it consistent and is basically working on
>>>> the same type of data sructure.
>>>>
>>>> e.g.
>>>>
>>>> port_group_sets_add()
>>>> port_group_sets_remove()
>>>> port_group_sets_destroy()
>>>>
>>>
>>> port_group_ssets is different from expr_const_sets. port_groups_ssets
> is a
>>> struct shash with each value item being a struct sset. expr_const_sets
> is
>>> also a struct shash but each item is an array of strings. I think
> different
>>> naming patterns would help remind that they are different.
>>>
>>>>>              if (sbrec_port_group_is_new(pg)) {
>>>>>                  sset_add(new, pg->name);
>>>>>              } else {
>>>>> @@ -1485,22 +1549,36 @@ port_groups_update(const struct
>>> sbrec_port_group_table *port_group_table,
>>>>>  }
>>>>>
>>>>>  static void
>>>>> -en_port_groups_run(struct engine_node *node, void *data)
>>>>> +en_port_groups_clear_tracked_data(void *data_)
>>>>>  {
>>>>> -    struct ed_type_port_groups *pg = data;
>>>>> -
>>>>> +    struct ed_type_port_groups *pg = data_;
>>>>>      sset_clear(&pg->new);
>>>>>      sset_clear(&pg->deleted);
>>>>>      sset_clear(&pg->updated);
>>>>> -    expr_const_sets_destroy(&pg->port_groups);
>>>>> +    pg->change_tracked = false;
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +en_port_groups_run(struct engine_node *node, void *data)
>>>>> +{
>>>>> +    struct ed_type_port_groups *pg = data;
>>>>> +
>>>>> +    expr_const_sets_destroy(&pg->port_groups_cs_local);
>>>>> +    port_group_ssets_clear(&pg->port_group_ssets);
>>>>>
>>>>>      struct sbrec_port_group_table *pg_table =
>>>>>          (struct sbrec_port_group_table *)EN_OVSDB_GET(
>>>>>              engine_get_input("SB_port_group", node));
>>>>>
>>>>> -    port_groups_init(pg_table, &pg->port_groups);
>>>>> +    struct ed_type_runtime_data *rt_data =
>>>>> +        engine_get_input_data("runtime_data", node);
>>>>> +
>>>>> +    struct sset *local_b_lports =
> binding_collect_local_binding_lports(
>>>>> +        &rt_data->lbinding_data);
>>>>> +    port_groups_init(pg_table, local_b_lports, &pg->port_group_ssets,
>>>>> +                     &pg->port_groups_cs_local);
>>>>> +    binding_destroy_local_binding_lports(local_b_lports);
>>>>>
>>>>> -    pg->change_tracked = false;
>>>>>      engine_set_node_state(node, EN_UPDATED);
>>>>>  }
>>>>>
>>>>> @@ -1509,16 +1587,19 @@ port_groups_sb_port_group_handler(struct
>>> engine_node *node, void *data)
>>>>>  {
>>>>>      struct ed_type_port_groups *pg = data;
>>>>>
>>>>> -    sset_clear(&pg->new);
>>>>> -    sset_clear(&pg->deleted);
>>>>> -    sset_clear(&pg->updated);
>>>>> -
>>>>>      struct sbrec_port_group_table *pg_table =
>>>>>          (struct sbrec_port_group_table *)EN_OVSDB_GET(
>>>>>              engine_get_input("SB_port_group", node));
>>>>>
>>>>> -    port_groups_update(pg_table, &pg->port_groups, &pg->new,
>>>>> -                     &pg->deleted, &pg->updated);
>>>>> +    struct ed_type_runtime_data *rt_data =
>>>>> +        engine_get_input_data("runtime_data", node);
>>>>> +
>>>>> +    struct sset *local_b_lports =
> binding_collect_local_binding_lports(
>>>>> +        &rt_data->lbinding_data);
>>>>> +    port_groups_update(pg_table, local_b_lports,
> &pg->port_group_ssets,
>>>>> +                       &pg->port_groups_cs_local, &pg->new,
>>> &pg->deleted,
>>>>> +                       &pg->updated);
>>>>> +    binding_destroy_local_binding_lports(local_b_lports);
>>>>>
>>>>>      if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
>>>>>              !sset_is_empty(&pg->updated)) {
>>>>> @@ -1531,6 +1612,74 @@ port_groups_sb_port_group_handler(struct
>>> engine_node *node, void *data)
>>>>>      return true;
>>>>>  }
>>>>>
>>>>> +static bool
>>>>> +port_groups_runtime_data_handler(struct engine_node *node, void
> *data)
>>>>> +{
>>>>> +    struct ed_type_port_groups *pg = data;
>>>>> +
>>>>> +    struct sbrec_port_group_table *pg_table =
>>>>> +        (struct sbrec_port_group_table *)EN_OVSDB_GET(
>>>>> +            engine_get_input("SB_port_group", node));
>>>>> +
>>>>> +    struct ed_type_runtime_data *rt_data =
>>>>> +        engine_get_input_data("runtime_data", node);
>>>>> +
>>>>> +    if (!rt_data->tracked) {
>>>>> +        return false;
>>>>> +    }
>>>>> +
>>>>> +    if (hmap_is_empty(&rt_data->tracked_dp_bindings)) {
>>>>> +        goto out;
>>>>> +    }
>>>>> +
>>>>> +    struct sset *local_b_lports =
> binding_collect_local_binding_lports(
>>>>> +        &rt_data->lbinding_data);
>>>>> +
>>>>> +    const struct sbrec_port_group *pg_sb;
>>>>> +    SBREC_PORT_GROUP_TABLE_FOR_EACH (pg_sb, pg_table) {
>>>>> +
>>>> Remove blank line^
>>>
>>> Ack
>>>
>>>>> +        struct sset *pg_lports =
> shash_find_data(&pg->port_group_ssets,
>>>>> +                                                 pg_sb->name);
>>>>
>>>> Correct me if I am wrong but I think this^ is the only reason you need
>>>> the 'port_group_ssets' data structure? Perhaps you could remove the
> need
>>>> for it by implementing expr_const_sets_find() function? It would
>>>> simplify the code a lot as you would only need 'port_groups_cs_local'.
>>>> If you could do this, you could also rename that to 'port_groups_local'
>>>
>>> In fact, what's more important of the usage of "port_group_ssets" is the
>>> check in the below loop:
>>>
>>>                  if (sset_contains(pg_lports, lport->pb->logical_port))
> {
>>> ...
>>>
>>> It is to quickly check if a local port binding is affecting that PG.
>>> Without the "port_group_ssets" we will have to do a O(n) iteration of
> each
>>> lport of a PG. This is also briefly mentioned in the definition:
>>>
>>>     /* A copy of SB port_groups, each converted as a sset for efficient
>>> lport
>>>      * lookup. */
>>>     struct shash port_group_ssets;
>>>
>>> It can't be replaced by 'expr_const_sets_find' because they contain
>>> different information (local lports v.s. all lports) and also different
>>> data structure (array v.s. sset).
>>
>> Yes, I actually realized that when I was thinking about it again
>> yesterday evening. There are two things that were confusing me
>> yesterday: 1) there are now three layers of abstraction in this code
>> (en_port_groups_, port_groups_, and port_groups_ssets_). I get it after
>> looking at it for a few times but its not immediately clear because the
>> names are similar. 2) The loops in the function
>> (port_groups_runtime_data_handler) are quite complicated due to having 3
>> nesting levels. I dont think there is anything functionally wrong with
>> it but I was hoping to simplify it somehow and I thought we could remove
>> the 'port_groups_ssets' abstraction.
>>
>> For 1) maybe alot of this code could be organised into a port_groups.c/h
>> file?
> 
> I agree that the naming can be confusing. Let me explain.
> 
> - en_port_groups is the I-P engine node for storing port_groups, so
> en_port_groups_xxx functions are standard I-P engine interfaces.
> - port_groups_xxx_handler functions are also following the I-P patterns for
> change handlers.
> 
> The current pattern is to have the above interfaces in ovn-controller.c for
> each I-P node. The functions that may be moved to a separate module are:
> 
> - port_group_ssets_xxx functions are for operating the shash
> port_group_ssets, which should be straightforward.
> - port_groups_init and port_groups_update. These may be the confusing part.
> They are just helper functions used by the handlers.
> 
> These two parts can be moved to a separate module if preferred. But I'd
> keep them here in this patch to make it easier to follow what's changed in
> this patch. I can add a follow-up patch to refactor these interfaces and
> move to a separate file. What do you think?

Yes, this would be good.
> 
> 
>>
>> For 2)Why can we not just iterate over the port groups in the sb
>> database and call port_groups_update()? Is this an optimization?
> 
> The loop in port_groups_runtime_data_handler() is to handle local port
> binding changes (as part of runtime_data). For any changed local bindings,
> the loop checks if the change is related to any PG, if so the PG needs to
> be reprocessed, even if there are no PG changes. On the other hand,
> port_groups_update() is used in port_groups_sb_port_group_handler(), which
> handles PG changes. This may be the tricky part for I-P, in that you have
> to consider each individual input change and join with other input data.
> 
> I agree that the loop looks a little complex, but it is because of the way
> local port binding changes are tracked, which requires two nested loops to
> iterate the change. We might improve the data structure to make it easier
> to iterate the tracked changes but I'd rather not touch that part in this
> patch.

Ok, when it gets moved out, to a seperate file. It should be easier to
add tests and potentially restructure.

> 
>>>
>>>>> +        ovs_assert(pg_lports);
>>>>> +
>>>>> +        struct tracked_binding_datapath *tdp;
>>>>> +        bool need_update = false;
>>>>> +        HMAP_FOR_EACH (tdp, node, &rt_data->tracked_dp_bindings) {
>>>>> +            struct shash_node *shash_node;
>>>>> +            SHASH_FOR_EACH (shash_node, &tdp->lports) {
>>>>> +                struct tracked_binding_lport *lport =
> shash_node->data;
>>>>> +                if (sset_contains(pg_lports,
> lport->pb->logical_port))
>>> {
>>>>> +                    /* At least one local port-binding change is
>>> related to the
>>>>> +                     * port_group, so the port_group_cs_local needs
>>> update. */
>>>>> +                    need_update = true;
>>>>> +                    break;
>>>>> +                }
>>>>> +            }
>>>>> +            if (need_update) {
>>>>> +                break;
>>>>> +            }
>>>>> +        }
>>>>> +        if (need_update) {
>>>>> +            expr_const_sets_add(&pg->port_groups_cs_local,
> pg_sb->name,
>>>>> +                                (const char *const *) pg_sb->ports,
>>>>> +                                pg_sb->n_ports, false,
> local_b_lports);
>>>>> +            sset_add(&pg->updated, pg_sb->name);
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    binding_destroy_local_binding_lports(local_b_lports);
>>>>> +
>>>>> +out:
>>>>> +    if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
>>>>> +            !sset_is_empty(&pg->updated)) {
>>>>> +        engine_set_node_state(node, EN_UPDATED);
>>>>> +    } else {
>>>>> +        engine_set_node_state(node, EN_UNCHANGED);
>>>>> +    }
>>>>> +    pg->change_tracked = true;
>>>>> +    return true;
>>>>> +}
>>>>> +
>>>>>  /* Connection tracking zones. */
>>>>>  struct ed_type_ct_zones {
>>>>>      unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
>>>>> @@ -1880,7 +2029,7 @@ static void init_lflow_ctx(struct engine_node
>>> *node,
>>>>>
>>>>>      struct ed_type_port_groups *pg_data =
>>>>>          engine_get_input_data("port_groups", node);
>>>>> -    struct shash *port_groups = &pg_data->port_groups;
>>>>> +    struct shash *port_groups = &pg_data->port_groups_cs_local;
>>>>>
>>>>>      l_ctx_in->sbrec_multicast_group_by_name_datapath =
>>>>>          sbrec_mc_group_by_name_dp;
>>>>> @@ -2540,7 +2689,7 @@ main(int argc, char *argv[])
>>>>>                                        "physical_flow_changes");
>>>>>      ENGINE_NODE(flow_output, "flow_output");
>>>>>      ENGINE_NODE(addr_sets, "addr_sets");
>>>>> -    ENGINE_NODE(port_groups, "port_groups");
>>>>> +    ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
>>>>>
>>>>>  #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
>>>>>      SB_NODES
>>>>> @@ -2556,6 +2705,10 @@ main(int argc, char *argv[])
>>>>>                       addr_sets_sb_address_set_handler);
>>>>>      engine_add_input(&en_port_groups, &en_sb_port_group,
>>>>>                       port_groups_sb_port_group_handler);
>>>>> +    /* port_groups computation requires runtime_data's lbinding_data
>>> for the
>>>>> +     * locally bound ports. */
>>>>> +    engine_add_input(&en_port_groups, &en_runtime_data,
>>>>> +                     port_groups_runtime_data_handler);
>>>>>
>>>>>      /* Engine node physical_flow_changes indicates whether
>>>>>       * we can recompute only physical flows or we can
>>>>> @@ -2986,7 +3139,7 @@ main(int argc, char *argv[])
>>>>>                      engine_get_data(&en_port_groups);
>>>>>                  if (br_int && chassis && as_data && pg_data) {
>>>>>                      char *error = ofctrl_inject_pkt(br_int,
>>> pending_pkt.flow_s,
>>>>> -                        &as_data->addr_sets, &pg_data->port_groups);
>>>>> +                        &as_data->addr_sets,
>>> &pg_data->port_groups_cs_local);
>>>>>                      if (error) {
>>>>>                          unixctl_command_reply_error(pending_pkt.conn,
>>> error);
>>>>>                          free(error);
>>>>> diff --git a/include/ovn/expr.h b/include/ovn/expr.h
>>>>> index 032370058..2f7ecbc9c 100644
>>>>> --- a/include/ovn/expr.h
>>>>> +++ b/include/ovn/expr.h
>>>>> @@ -547,7 +547,7 @@ void expr_constant_set_destroy(struct
>>> expr_constant_set *cs);
>>>>>
>>>>>  void expr_const_sets_add(struct shash *const_sets, const char *name,
>>>>>                           const char * const *values, size_t n_values,
>>>>> -                         bool convert_to_integer);
>>>>> +                         bool convert_to_integer, const struct sset
>>> *filter);
>>>>>  void expr_const_sets_remove(struct shash *const_sets, const char
>>> *name);
>>>>>  void expr_const_sets_destroy(struct shash *const_sets);
>>>>>
>>>>> diff --git a/lib/expr.c b/lib/expr.c
>>>>> index f061a8fbe..b42d78d3e 100644
>>>>> --- a/lib/expr.c
>>>>> +++ b/lib/expr.c
>>>>> @@ -1065,7 +1065,7 @@ expr_constant_set_destroy(struct
>>> expr_constant_set *cs)
>>>>>  void
>>>>
>>>>>  expr_const_sets_add(struct shash *const_sets, const char *name,
>>>>>                      const char *const *values, size_t n_values,
>>>>> -                    bool convert_to_integer)
>>>>> +                    bool convert_to_integer, const struct sset
> *filter)
>>>>>  {
>>>>>      /* Replace any existing entry for this name. */
>>>>>      expr_const_sets_remove(const_sets, name);
>>>>> @@ -1075,6 +1075,7 @@ expr_const_sets_add(struct shash *const_sets,
>>> const char *name,
>>>>>      cs->n_values = 0;
>>>>>      cs->values = xmalloc(n_values * sizeof *cs->values);
>>>>>      if (convert_to_integer) {
>>>>> +        ovs_assert(!filter);
>>>> Because the parameters of this function do not apply to both the
> integer
>>>> and the string case, this implies all the callers must know if they are
>>>> dealing with integers or strings. I'm wondering should this function be
>>>> split into something like:
>>>>
>>>> expr_const_sets_add_ints()
>>>> expr_const_sets_add_strings()
>>>>
>>>> and maybe even:
>>>>
>>>> expr_const_sets_add_filtered_strings()
>>>>
>>>> I just looked through the code and this seems to be the case - in fact
>>>> 'convert_to_integer' basically means that this is an integer. What do
>>>> you think?
>>>>
>>>
>>> Agree. I thought about the same thing but was lazy :). I can change it
> in
>>> the next version of the patch.
>>
>> Great
>>
>>>
>>>>>          cs->type = EXPR_C_INTEGER;
>>>>>          for (size_t i = 0; i < n_values; i++) {
>>>>>              /* Use the lexer to convert each constant set into the
>>> proper
>>>>> @@ -1100,6 +1101,11 @@ expr_const_sets_add(struct shash *const_sets,
>>> const char *name,
>>>>>      } else {
>>>>>          cs->type = EXPR_C_STRING;
>>>>>          for (size_t i = 0; i < n_values; i++) {
>>>>> +            if (filter && !sset_find(filter, values[i])) {
>>>>> +                VLOG_DBG("Skip constant set entry '%s' for '%s'",
>>>>> +                         values[i], name);
>>>>> +                continue;
>>>>> +            }
>>>>>              union expr_constant *c = &cs->values[cs->n_values++];
>>>>>              c->string = xstrdup(values[i]);
>>>>>          }
>>>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>>>> index 55444bbd7..aa1d1cf17 100644
>>>>> --- a/tests/ovn.at
>>>>> +++ b/tests/ovn.at
>>>>> @@ -26265,3 +26265,56 @@ primary lport : [[sw0p2]]
>>>>>  OVN_CLEANUP([hv1], [hv2])
>>>>>  AT_CLEANUP
>>>>>  ])
>>>>> +
>>>>> +# Tests the efficiency of conjunction flow generation when port
> groups
>>> are used
>>>>> +# by ACLs. Make sure there is no open flow explosion in table 45 for
>>> an ACL
>>>>> +# with self-referencing match condition of a port group:
>>>>> +#
>>>>> +# "outport == @pg1 && ip4.src == $pg1_ip4"
>>>>> +#
>>>>> +# 10 LSes (ls[0-9]), each has 2 LSPs (lsp[0-9][01]). All LSPs belong
>>> to port
>>>>> +# group pg1, but only LSPs of LS0 are bound on HV1.
>>>>> +#
>>>>> +# The expected number of conjunction flows is 2 + 20 = 22.
>>>>
>>>> A brief explanation of why that number is expected should help our
>>>> future selves.
>>>
>>> Ack
>>>
>>>>> +OVN_FOR_EACH_NORTHD([
>>>>> +AT_SETUP([ovn -- ACL with Port Group conjunction flow efficiency])
>>>>> +ovn_start
>>>>> +
>>>>> +net_add n1
>>>>> +
>>>>> +sim_add hv1
>>>>> +as hv1
>>>>
>>>> Maybe this test should be run with 2 hypervisors as it will catch any
>>>> corner cases that may come up due to the locality of ports and
>>>> datapaths? What do you think?
>>>>
>>>
>>> In fact we have at least another 2 test cases, "Port Group" and "ACLs on
>>> Port Groups", both have 3 HVs, so I think the coverage is good. I could
>>> have added the flow count check in existing cases but it seems more
> clear
>>> to add a new test case focusing on the flow count efficiency only, so I
>>> added this new test case and I think one HV is enough for this purpose.
>>> What do you think?
>>>
>>
>> Ok, I had a look at those tests and this seems Ok. Your strategy makes
>> sense.
>>
>>> Thanks again for all your comments. I will update with v3.
>>>
>>> Han
>>>
>>>>> +ovs-vsctl add-br br-phys
>>>>> +ovn_attach n1 br-phys 192.168.0.1
>>>>> +
>>>>> +ovn-nbctl lr-add lr0
>>>>> +
>>>>> +for i in $(seq 0 9); do
>>>>> +    ovn-nbctl ls-add ls$i
>>>>> +    ovn-nbctl lrp-add lr0 lrp_lr0_ls$i aa:bb:bb:00:00:0$i
>>> 192.168.${i}.1/24
>>>>> +
>>>>> +    ovn-nbctl lsp-add ls$i lsp_ls${i}_lr0 -- \
>>>>> +        lsp-set-addresses lsp_ls${i}_lr0 router -- \
>>>>> +        lsp-set-type lsp_ls${i}_lr0 router -- \
>>>>> +        lsp-set-options lsp_ls${i}_lr0 router-port=lrp_lr0_ls${i}
>>>>> +
>>>>> +    for j in 0 1; do
>>>>> +        ovn-nbctl lsp-add ls$i lsp${i}-${j} -- \
>>>>> +            lsp-set-addresses lsp${i}-${j} "aa:aa:aa:00:0$i:0$j
>>> 192.168.$i.1$j"
>>>>> +    done
>>>>> +done
>>>>> +
>>>>> +ovn-nbctl pg-add pg1
>>>>> +ovn-nbctl pg-set-ports pg1 $(for i in 0 1 2 3 4 5 6 7 8 9; do for j
> in
>>> 0 1; do echo lsp${i}-${j}; done; done)
>>>>> +
>>>>> +ovn-nbctl --type=port-group acl-add pg1 to-lport 100 "outport == @pg1
>>> && ip4.src == \$pg1_ip4" allow
>>>>> +
>>>>> +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0
>>> external_ids:iface-id=lsp0-0
>>>>> +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1
>>> external_ids:iface-id=lsp0-1
>>>>> +
>>>>> +check ovn-nbctl --wait=hv sync
>>>>> +AT_CHECK([test $(ovs-ofctl dump-flows br-int table=45 | grep
>>> conjunction | wc -l) == 22])
>>>>> +
>>>>> +OVN_CLEANUP([hv1])
>>>>> +AT_CLEANUP
>>>>> +])
>>>>> diff --git a/tests/test-ovn.c b/tests/test-ovn.c
>>>>> index 202a96c5d..55dbbb6b4 100644
>>>>> --- a/tests/test-ovn.c
>>>>> +++ b/tests/test-ovn.c
>>>>> @@ -227,10 +227,10 @@ create_addr_sets(struct shash *addr_sets)
>>>>>      };
>>>>>      static const char *const addrs4[] = { NULL };
>>>>>
>>>>> -    expr_const_sets_add(addr_sets, "set1", addrs1, 3, true);
>>>>> -    expr_const_sets_add(addr_sets, "set2", addrs2, 3, true);
>>>>> -    expr_const_sets_add(addr_sets, "set3", addrs3, 3, true);
>>>>> -    expr_const_sets_add(addr_sets, "set4", addrs4, 0, true);
>>>>> +    expr_const_sets_add(addr_sets, "set1", addrs1, 3, true, NULL);
>>>>> +    expr_const_sets_add(addr_sets, "set2", addrs2, 3, true, NULL);
>>>>> +    expr_const_sets_add(addr_sets, "set3", addrs3, 3, true, NULL);
>>>>> +    expr_const_sets_add(addr_sets, "set4", addrs4, 0, true, NULL);
>>>>>  }
>>>>>
>>>>>  static void
>>>>> @@ -243,8 +243,8 @@ create_port_groups(struct shash *port_groups)
>>>>>      };
>>>>>      static const char *const pg2[] = { NULL };
>>>>>
>>>>> -    expr_const_sets_add(port_groups, "0_pg1", pg1, 3, false);
>>>>> -    expr_const_sets_add(port_groups, "0_pg_empty", pg2, 0, false);
>>>>> +    expr_const_sets_add(port_groups, "0_pg1", pg1, 3, false, NULL);
>>>>> +    expr_const_sets_add(port_groups, "0_pg_empty", pg2, 0, false,
>>> NULL);
>>>>>  }
>>>>>
>>>>>  static bool
>>>>> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
>>>>> index 6b883886f..63b640db9 100644
>>>>> --- a/utilities/ovn-trace.c
>>>>> +++ b/utilities/ovn-trace.c
>>>>> @@ -820,7 +820,7 @@ read_address_sets(void)
>>>>>      SBREC_ADDRESS_SET_FOR_EACH (sbas, ovnsb_idl) {
>>>>>          expr_const_sets_add(&address_sets, sbas->name,
>>>>>                             (const char *const *) sbas->addresses,
>>>>> -                           sbas->n_addresses, true);
>>>>> +                           sbas->n_addresses, true, NULL);
>>>>>      }
>>>>>  }
>>>>>
>>>>> @@ -833,7 +833,7 @@ read_port_groups(void)
>>>>>      SBREC_PORT_GROUP_FOR_EACH (sbpg, ovnsb_idl) {
>>>>>          expr_const_sets_add(&port_groups, sbpg->name,
>>>>>                             (const char *const *) sbpg->ports,
>>>>> -                           sbpg->n_ports, false);
>>>>> +                           sbpg->n_ports, false, NULL);
>>>>>      }
>>>>>  }
>>>>>
>>>>>
>>>>
>>>
>>
> 



More information about the dev mailing list