[ovs-dev] [PATCH ovn] ofctrl: Split large group_mod messages up.

Numan Siddique numans at ovn.org
Wed May 13 07:19:27 UTC 2020


On Fri, May 8, 2020 at 2:55 AM Mark Michelson <mmichels at redhat.com> wrote:

> Group mod messages have the possibility of growing very large if OVN
> installs a load balancer with a great many backends. The current
> approach is to send a single ADD message with the entire group contents.
> If the size of this message exceeds UINT16_MAX, then OpenFlow cannot
> properly express the length of the message since the OpenFlow header's
> length is limited to 16 bits.
>
> This patch solves the problem by breaking the message into pieces. The
> first piece is an ADD, and subsequent messages are INSERT_BUCKET
> messages. This way, we end up being able to express the entire size of
> the group through multiple OpenFlow messages.
>
> Signed-off-by: Mark Michelson <mmichels at redhat.com>
>

Hi Mark,

Thanks for addressing this.

The patch LGTM. But there seems to be some memory leak when  I ran with
valgrind.

Can you please take a look.



> ---
>  controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
>  tests/ovn.at        | 29 +++++++++++++++++++
>  2 files changed, 95 insertions(+), 4 deletions(-)
>
> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> index 4b51cd86e..2b98dd6b1 100644
> --- a/controller/ofctrl.c
> +++ b/controller/ofctrl.c
> @@ -930,10 +930,72 @@ encode_group_mod(const struct ofputil_group_mod *gm)
>  }
>
>  static void
> -add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
> +add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs)
>  {
>      struct ofpbuf *msg = encode_group_mod(gm);
> -    ovs_list_push_back(msgs, &msg->list_node);
> +    if (msg->size <= UINT16_MAX) {
> +        ovs_list_push_back(msgs, &msg->list_node);
> +        return;
> +    }
> +    /* This group mod request is too large to fit in a single OF message
> +     * since the header can only specify a 16-bit size. We need to break
> +     * this into multiple group_mods requests.
> +     */
> +
> +    /* Pull the first bucket. All buckets are approximately the same
> length
> +     * since they contain near-identical actions. Using its length can
> give
> +     * us a good approximation of how many buckets we can fit in a single
> +     * OF message.
> +     */
> +    ofpraw_pull_assert(msg);
> +    struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm));
> +    struct ofp15_bucket *of_bucket = ofpbuf_pull(msg, sizeof(*of_bucket));
> +    uint16_t bucket_size = ntohs(of_bucket->len);
> +
> +    ofpbuf_uninit(msg);
> +
> +    /* Dividing by 2 here ensures that just in case there are variations
> in
> +     * the size of the buckets, we absolutely will not put too many in our
> +     * new group_mod message.
> +     */
> +    size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size) / 2;
> +
> +    ovs_assert(max_buckets < ovs_list_size(&gm->buckets));
> +
> +    uint16_t command = OFPGC15_INSERT_BUCKET;
> +    if (gm->command == OFPGC15_DELETE ||
> +        gm->command == OFPGC15_REMOVE_BUCKET) {
> +        command = OFPGC15_REMOVE_BUCKET;
> +    }
> +    struct ofputil_group_mod split = {
> +        .command = command,
> +        .type = gm->type,
> +        .group_id = gm->group_id,
> +        .command_bucket_id = OFPG15_BUCKET_LAST,
> +    };
> +    ovs_list_init(&split.buckets);
> +
> +    size_t i = 0;
> +    struct ofputil_bucket *bucket;
> +    LIST_FOR_EACH (bucket, list_node, &gm->buckets) {
> +        if (i++ < max_buckets) {
> +            continue;
> +        }
> +        break;
> +    }
> +
> +    ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets);
> +
> +    struct ofpbuf *orig = encode_group_mod(gm);
> +    ovs_list_push_back(msgs, &orig->list_node);
> +
> +    /* We call this recursively just in case our new
> +     * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too
> +     * long for an OF message. This will allow for it to
> +     * be broken into pieces, too.
> +     */
> +    add_group_mod(&split, msgs);
> +    ofputil_uninit_group_mod(&split);
>

Do we need to also do  ofputil_uninit_group_mod(gm) ?


Thanks
Numan

 }
>
>
> @@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
>          char *group_string = xasprintf("group_id=%"PRIu32",%s",
>                                         desired->table_id,
>                                         desired->name);
> -        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
> group_string,
> +        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD,
> group_string,
>                                                NULL, NULL,
> &usable_protocols);
>          if (!error) {
>              add_group_mod(&gm, &msgs);
> @@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
>          enum ofputil_protocol usable_protocols;
>          char *group_string = xasprintf("group_id=%"PRIu32"",
>                                         installed->table_id);
> -        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
> +        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE,
>                                                group_string, NULL, NULL,
>                                                &usable_protocols);
>          if (!error) {
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 52d994972..f39fda2e4 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected])
>
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- Big Load Balancer])
> +ovn_start
> +
> +ovn-nbctl ls-add ls1
> +ovn-nbctl lsp-add ls1 lsp1
> +
> +net_add n1
> +sim_add hv1
> +
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl add-port br-int p1 -- set Interface p1
> external-ids:iface-id=lsp1
> +
> +IPS=192.169.0.1:80
> +for i in `seq 1 9` ; do
> +    for j in `seq 1 254` ; do
> +        IPS=${IPS},192.169.$i.$j:80
> +    done
> +done
> +
> +ovn-nbctl lb-add lb0 172.172.0.1:8080 "${IPS}"
> +ovn-nbctl --wait=hv ls-lb-add ls1 lb0
> +
> +AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o bucket
> | wc -l`])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> --
> 2.25.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>


More information about the dev mailing list