[ovs-dev] [PATCH v3 3/6] ovn: l3ha, handling of multiple gateway chassis

Russell Bryant russell at ovn.org
Fri Jul 7 19:09:27 UTC 2017


review part 2 of this one ...

On Wed, Jul 5, 2017 at 9:45 AM, Miguel Angel Ajo <majopela at redhat.com> wrote:
> This patch handles multiple gateway_chassis with in chassisredirect
> ports, any gateway with a chassis redirect port will implement the
> rules to de-encapsulate incomming packets for such port.
>
> And hosts targetting a remote chassisredirect port will setup a
> bundle(active_backup, ..) action to each tunnel port, in the given
> priority order.
>
> Signed-off-by: Miguel Angel Ajo <majopela at redhat.com>
> Signed-off-by: Venkata Anil Kommaddi <vkommadi at redhat.com>
> ---
>  ovn/controller/automake.mk      |   2 +
>  ovn/controller/binding.c        |  13 +-
>  ovn/controller/binding.h        |   1 +
>  ovn/controller/gchassis.c       | 176 +++++++++++++++++++++
>  ovn/controller/gchassis.h       |  63 ++++++++
>  ovn/controller/lflow.c          |   7 +-
>  ovn/controller/lport.h          |   4 +
>  ovn/controller/ovn-controller.c |  15 +-
>  ovn/controller/physical.c       | 124 ++++++++++++---
>  ovn/controller/physical.h       |   4 +-
>  tests/ovn.at                    | 330 +++++++++++++++++++++++++++++++++++++++-
>  11 files changed, 710 insertions(+), 29 deletions(-)
>  create mode 100644 ovn/controller/gchassis.c
>  create mode 100644 ovn/controller/gchassis.h
>
> diff --git a/ovn/controller/automake.mk b/ovn/controller/automake.mk
> index 8c6a787..d3828f5 100644
> --- a/ovn/controller/automake.mk
> +++ b/ovn/controller/automake.mk
> @@ -6,6 +6,8 @@ ovn_controller_ovn_controller_SOURCES = \
>         ovn/controller/chassis.h \
>         ovn/controller/encaps.c \
>         ovn/controller/encaps.h \
> +       ovn/controller/gchassis.c \
> +       ovn/controller/gchassis.h \
>         ovn/controller/lflow.c \
>         ovn/controller/lflow.h \
>         ovn/controller/lport.c \
> diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
> index bb76608..f5526fd 100644
> --- a/ovn/controller/binding.c
> +++ b/ovn/controller/binding.c
> @@ -15,6 +15,7 @@
>
>  #include <config.h>
>  #include "binding.h"
> +#include "gchassis.h"
>  #include "lflow.h"
>  #include "lport.h"
>
> @@ -26,6 +27,7 @@
>  #include "lib/vswitch-idl.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/vlog.h"
> +#include "ovn/lib/chassis-index.h"
>  #include "ovn/lib/ovn-sb-idl.h"
>  #include "ovn-controller.h"
>
> @@ -394,12 +396,15 @@ consider_local_datapath(struct controller_ctx *ctx,
>                                 false, local_datapaths);
>          }
>      } else if (!strcmp(binding_rec->type, "chassisredirect")) {
> -        const char *chassis_id = smap_get(&binding_rec->options,
> -                                          "redirect-chassis");
> -        our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
> -        if (our_chassis) {
> +        if (gateway_chassis_in_pb_contains(binding_rec, chassis_rec)) {
>              add_local_datapath(ldatapaths, lports, binding_rec->datapath,
>                                 false, local_datapaths);
> +            /* XXX this should only be set to true if our chassis
> +             * (chassis_rec) is the master for this chassisredirect port
> +             * but for now we'll bind it only when not bound, this is
> +             * handled in subsequent patches */
> +            our_chassis = !binding_rec->chassis ||
> +                chassis_rec == binding_rec->chassis;
>          }
>      } else if (!strcmp(binding_rec->type, "l3gateway")) {
>          const char *chassis_id = smap_get(&binding_rec->options,
> diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
> index 3bfa7d1..136b3a7 100644
> --- a/ovn/controller/binding.h
> +++ b/ovn/controller/binding.h
> @@ -20,6 +20,7 @@
>  #include <stdbool.h>
>
>  struct controller_ctx;
> +struct chassis_index;
>  struct hmap;
>  struct ldatapath_index;
>  struct lport_index;
> diff --git a/ovn/controller/gchassis.c b/ovn/controller/gchassis.c
> new file mode 100644
> index 0000000..27d8f66
> --- /dev/null
> +++ b/ovn/controller/gchassis.c
> @@ -0,0 +1,176 @@
> +/* Copyright (c) 2017, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "gchassis.h"
> +#include "lport.h"
> +#include "openvswitch/vlog.h"
> +#include "ovn/lib/chassis-index.h"
> +#include "ovn/lib/ovn-sb-idl.h"
> +
> +VLOG_DEFINE_THIS_MODULE(gchassis);
> +
> +/* gateway_chassis ordering
> + */
> +static int
> +compare_chassis_prio_(const void *a_, const void *b_)
> +{
> +    const struct gateway_chassis *gc_a = a_;
> +    const struct gateway_chassis *gc_b = b_;
> +    int prio_diff = gc_b->db->priority - gc_a->db->priority;
> +    if (!prio_diff) {
> +        return strcmp(gc_a->db->name, gc_b->db->name);
> +    }
> +    return prio_diff;
> +}
> +
> +struct ovs_list*
> +gateway_chassis_get_ordered(const struct sbrec_port_binding *binding,
> +                            const struct chassis_index *chassis_index)
> +{
> +    const char *redir_chassis_str;
> +    const struct sbrec_chassis *redirect_chassis = NULL;
> +
> +    /* XXX: redirect-chassis SBDB option handling is supported for backwards
> +     * compatibility with N-1 version of ovn-northd, this support can
> +     * be removed in OVS 2.9 where Gateway_Chassis list on the port binding
> +     * will allways be populated by northd */
> +    redir_chassis_str = smap_get(&binding->options, "redirect-chassis");
> +
> +    if (redir_chassis_str) {
> +        redirect_chassis = chassis_lookup_by_name(chassis_index,
> +                                                  redir_chassis_str);
> +        if (!redirect_chassis) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +            VLOG_WARN_RL(&rl, "chassis name (%s) in redirect-chassis option "
> +                              "of logical port %s not known",
> +                              redir_chassis_str, binding->logical_port);
> +        }
> +    }
> +
> +    if (!redirect_chassis && binding->n_gateway_chassis == 0) {
> +        return NULL;
> +    }
> +
> +    struct gateway_chassis *gateway_chassis = NULL;
> +    int n=0;
> +
> +    if (binding->n_gateway_chassis) {
> +        gateway_chassis = xmalloc(sizeof *gateway_chassis *
> +                                  binding->n_gateway_chassis);
> +        for (n=0; n < binding->n_gateway_chassis; n++) {
> +            gateway_chassis[n].db = binding->gateway_chassis[n];
> +            gateway_chassis[n].virtual_gwc = false;
> +        }
> +        qsort(gateway_chassis, n, sizeof *gateway_chassis,
> +              compare_chassis_prio_);
> +    } else if (redirect_chassis) {
> +        /* When only redirect_chassis is available, return a single
> +         * virtual entry that it's not on OVSDB, this way the code
> +         * handling the returned list will be uniform, regardless
> +         * of gateway_chassis being populated or redirect-chassis option
> +         * being used */
> +        gateway_chassis = xmalloc(sizeof *gateway_chassis);
> +        struct sbrec_gateway_chassis *gwc =
> +            xzalloc(sizeof *gateway_chassis->db);
> +        sbrec_gateway_chassis_init(gwc);
> +        gwc->name = xasprintf("%s_%s", binding->logical_port,
> +                                       redirect_chassis->name);
> +        gwc->chassis = (struct sbrec_chassis *)redirect_chassis;
> +        gateway_chassis->db = gwc;
> +        gateway_chassis->virtual_gwc = true;
> +        n++;
> +    }
> +
> +    struct ovs_list *list = NULL;
> +    if (n) {
> +        list = xmalloc(sizeof *list);
> +        ovs_list_init(list);
> +
> +        int i;
> +        for (i=0; i<n; i++) {
> +            ovs_list_push_back(list, &gateway_chassis[i].node);
> +        }
> +    }
> +
> +    return list;
> +}
> +
> +bool
> +gateway_chassis_contains(struct ovs_list *gateway_chassis,
> +                         const struct sbrec_chassis *chassis) {
> +    struct gateway_chassis *chassis_item;
> +    if (gateway_chassis) {
> +        LIST_FOR_EACH (chassis_item, node, gateway_chassis) {
> +            if (chassis_item->db->chassis &&
> +                !strcmp(chassis_item->db->chassis->name, chassis->name)) {
> +                return true;
> +            }
> +        }
> +    }
> +    return false;
> +}
> +
> +void
> +gateway_chassis_destroy(struct ovs_list *list)
> +{
> +    if (!list) {
> +        return;
> +    }
> +
> +    /* XXX: This loop is for backwards compatibility with redirect-chassis
> +     * which we insert as a single virtual Gateway_Chassis on the ordered
> +     * list */
> +    struct gateway_chassis *chassis_item;
> +    LIST_FOR_EACH (chassis_item, node, list) {
> +        if (chassis_item->virtual_gwc) {
> +            free(chassis_item->db->name);
> +            free((void *)chassis_item->db);
> +        }
> +    }
> +
> +    free(ovs_list_front(list));
> +    free(list);
> +}
> +
> +bool
> +gateway_chassis_in_pb_contains(const struct sbrec_port_binding *binding,
> +                               const struct sbrec_chassis *chassis)
> +{
> +    if (!binding || !chassis) {
> +        return false;
> +    }
> +
> +    /* XXX: redirect-chassis handling for backwards compatibility,
> +     * with older ovs-northd during upgrade phase, can be removed

s/ovs-northd/ovn-northd/

> +     * for OVS 2.9 */

I think we can remove this, because I'm not aware of anyone using the
southbound db directly.  It's possible that you could use
ovn-controller and the southbound db only though, so we'll we have to
watch out for people starting to use it that way.  If that starts
happening, we'll have to be more strict about removing things from the
southbound db.  Just FYI.  :-)

> +    const char *redirect_chassis = smap_get(&binding->options,
> +                                            "redirect-chassis");
> +    if (binding->n_gateway_chassis) {
> +        int n;
> +        for (n=0; n < binding->n_gateway_chassis; n++) {

Spaces around =.

> +            if (binding->gateway_chassis[n]->chassis &&
> +                !strcmp(binding->gateway_chassis[n]->chassis->name,
> +                        chassis->name)) {
> +                return true;
> +            }
> +        }
> +    } else if (redirect_chassis) {
> +        return !strcmp(redirect_chassis, chassis->name);
> +    }
> +
> +    return false;
> +}
> diff --git a/ovn/controller/gchassis.h b/ovn/controller/gchassis.h
> new file mode 100644
> index 0000000..46d5e42
> --- /dev/null
> +++ b/ovn/controller/gchassis.h
> @@ -0,0 +1,63 @@
> +/* Copyright (c) 2017 Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVN_GCHASSIS_H
> +#define OVN_GCHASSIS_H 1
> +
> +#include <stdint.h>
> +#include "lib/uuid.h"
> +#include "openvswitch/hmap.h"
> +#include "openvswitch/list.h"
> +
> +struct chassis_index;
> +struct ovsdb_idl;
> +struct sbrec_chassis;
> +struct sbrec_gateway_chassis;
> +struct sbrec_port_binding;
> +
> +
> +/* Gateway_Chassis management
> + * ==========================
> + *
> + * The following structure and methods handle ordering of Gateway_Chassis
> + * entries in a chassisredirect port. And parsing redirect-chassis option
> + * for backwards compatibility with older (N-1 version of ovn-northd).
> + */
> +struct gateway_chassis {
> +    struct ovs_list node;
> +    const struct sbrec_gateway_chassis *db; /* sbrec row for the gwc */
> +    bool virtual_gwc; /* db entry not from SBDB, but from redirect-chassis */
> +};
> +
> +/* Gets, and orders by priority/name the list of Gateway_Chassis */
> +struct ovs_list *gateway_chassis_get_ordered(
> +        const struct sbrec_port_binding *binding,
> +        const struct chassis_index *chassis_index);
> +
> +/* Checks if an specific chassis is contained in the gateway_chassis
> + * list */
> +bool gateway_chassis_contains(struct ovs_list *gateway_chassis,
> +                              const struct sbrec_chassis *chassis);
> +
> +/* Destroy a gateway_chassis list from memory */
> +void gateway_chassis_destroy(struct ovs_list *list);
> +
> +/* Checks if a chassis is referenced in the port_binding gateway_chassis
> + * list or redirect-chassis option (backwards compatibility) */
> +bool gateway_chassis_in_pb_contains(
> +        const struct sbrec_port_binding *binding,
> +        const struct sbrec_chassis *chassis);
> +
> +#endif /* ovn/controller/gchassis.h */
> diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
> index b1b4b23..8cc5e7e 100644
> --- a/ovn/controller/lflow.c
> +++ b/ovn/controller/lflow.c
> @@ -14,6 +14,7 @@
>   */
>
>  #include <config.h>
> +#include "gchassis.h"
>  #include "lflow.h"
>  #include "lport.h"
>  #include "ofctrl.h"
> @@ -96,7 +97,11 @@ is_chassis_resident_cb(const void *c_aux_, const char *port_name)
>
>      const struct sbrec_port_binding *pb
>          = lport_lookup_by_name(c_aux->lports, port_name);
> -    return pb && pb->chassis && pb->chassis == c_aux->chassis;
> +    if (pb && pb->chassis && pb->chassis == c_aux->chassis) {
> +        return true;
> +    } else {
> +        return gateway_chassis_in_pb_contains(pb, c_aux->chassis);

I wonder if this works?  Won't every chassis in the group return true,
then?  Maybe only the highest priority gateway_chassis should return
true?

Though I realize that this behavior changes later in the series once
BFD is enabled and is_chassis_resident is made aware of gateway
active/backup state ...

> +    }
>  }
>
>  static bool
> diff --git a/ovn/controller/lport.h b/ovn/controller/lport.h
> index fe0e430..8937ecb 100644
> --- a/ovn/controller/lport.h
> +++ b/ovn/controller/lport.h
> @@ -17,10 +17,14 @@
>  #define OVN_LPORT_H 1
>
>  #include <stdint.h>
> +#include "lib/uuid.h"
>  #include "openvswitch/hmap.h"
> +#include "openvswitch/list.h"
>
>  struct ovsdb_idl;
> +struct sbrec_chassis;
>  struct sbrec_datapath_binding;
> +struct sbrec_port_binding;
>
>  /* Database indexes.
>   * =================
> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
> index 00b4cda..3e6c86c 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -40,6 +40,7 @@
>  #include "openvswitch/vconn.h"
>  #include "openvswitch/vlog.h"
>  #include "ovn/actions.h"
> +#include "ovn/lib/chassis-index.h"
>  #include "ovn/lib/ovn-sb-idl.h"
>  #include "ovn/lib/ovn-util.h"
>  #include "patch.h"
> @@ -148,6 +149,10 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>      struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
>      struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
>      sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
> +    /* XXX: We can optimize this, if we find a way to only monitor
> +     * ports that have a Gateway_Chassis that point's to our own
> +     * chassis */
> +    sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "chassisredirect");

This should be possible, and if it isn't, let's fix it.  :-)

I'm OK with it as a follow-up, though.

>      if (chassis) {
>          /* This should be mostly redundant with the other clauses for port
>           * bindings, but it allows us to catch any ports that are assigned to
> @@ -165,10 +170,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>          sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l2);
>          const struct smap l3 = SMAP_CONST1(&l3, "l3gateway-chassis", id);
>          sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l3);
> -        const struct smap redirect = SMAP_CONST1(&redirect,
> -                                                 "redirect-chassis", id);
> -        sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES,
> -                                              &redirect);
>      }
>      if (local_ifaces) {
>          const char *name;
> @@ -627,9 +628,12 @@ main(int argc, char *argv[])
>          struct ldatapath_index ldatapaths;
>          struct lport_index lports;
>          struct mcgroup_index mcgroups;
> +        struct chassis_index chassis_index;
> +
>          ldatapath_index_init(&ldatapaths, ctx.ovnsb_idl);
>          lport_index_init(&lports, ctx.ovnsb_idl);
>          mcgroup_index_init(&mcgroups, ctx.ovnsb_idl);
> +        chassis_index_init(&chassis_index, ctx.ovnsb_idl);
>
>          const struct sbrec_chassis *chassis = NULL;
>          if (chassis_id) {
> @@ -662,7 +666,8 @@ main(int argc, char *argv[])
>
>                      physical_run(&ctx, mff_ovn_geneve,
>                                   br_int, chassis, &ct_zones, &lports,
> -                                 &flow_table, &local_datapaths, &local_lports);
> +                                 &flow_table, &local_datapaths, &local_lports,
> +                                 &chassis_index);
>
>                      ofctrl_put(&flow_table, &pending_ct_zones,
>                                 get_nb_cfg(ctx.ovnsb_idl));

It looks like there's a pretty big memory leak here.  It looks like
All of chassis_index is leaked each time through this loop.

> diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> index f2d9676..00e12e6 100644
> --- a/ovn/controller/physical.c
> +++ b/ovn/controller/physical.c
> @@ -17,16 +17,21 @@
>  #include "binding.h"
>  #include "byte-order.h"
>  #include "flow.h"
> +#include "gchassis.h"
>  #include "lflow.h"
>  #include "lport.h"
> +#include "lib/bundle.h"
>  #include "lib/poll-loop.h"
> +#include "lib/uuid.h"
>  #include "ofctrl.h"
> +#include "openvswitch/list.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/match.h"
>  #include "openvswitch/ofp-actions.h"
>  #include "openvswitch/ofpbuf.h"
>  #include "openvswitch/vlog.h"
>  #include "ovn-controller.h"
> +#include "ovn/lib/chassis-index.h"
>  #include "ovn/lib/ovn-sb-idl.h"
>  #include "ovn/lib/ovn-util.h"
>  #include "physical.h"
> @@ -289,6 +294,7 @@ static void
>  consider_port_binding(enum mf_field_id mff_ovn_geneve,
>                        const struct simap *ct_zones,
>                        const struct lport_index *lports,
> +                      const struct chassis_index *chassis_index,
>                        struct hmap *local_datapaths,
>                        const struct sbrec_port_binding *binding,
>                        const struct sbrec_chassis *chassis,
> @@ -353,8 +359,12 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>          return;
>      }
>
> +    struct ovs_list *gateway_chassis = NULL;
> +    gateway_chassis = gateway_chassis_get_ordered(binding, chassis_index);

Make one line, or at least one statement, like:

struct ovs_list *gateway_chassis
    = gateway_chassis_get_...

> +
>      if (!strcmp(binding->type, "chassisredirect")
> -        && binding->chassis == chassis) {
> +        && (binding->chassis == chassis ||
> +            gateway_chassis_contains(gateway_chassis, chassis))) {

If we update the port binding to always reflect the active gateway, is
this additional check needed?  What's the benefit of the added flows
on the backup gateways?

>
>          /* Table 33, priority 100.
>           * =======================
> @@ -413,7 +423,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>
>          ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
>                          &match, ofpacts_p);
> -        return;
> +
> +        goto out;
>      }
>
>      /* Find the OpenFlow port for the logical port, as 'ofport'.  This is
> @@ -442,7 +453,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>      bool is_remote = false;
>      if (binding->parent_port && *binding->parent_port) {
>          if (!binding->tag) {
> -            return;
> +            goto out;
>          }
>          ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
>                                        binding->parent_port));
> @@ -460,27 +471,34 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>          }
>      }
>
> +    bool is_ha_remote = false;
>      const struct chassis_tunnel *tun = NULL;
>      const struct sbrec_port_binding *localnet_port =
>          get_localnet_port(local_datapaths, dp_key);
>      if (!ofport) {
>          /* It is remote port, may be reached by tunnel or localnet port */
>          is_remote = true;
> -        if (!binding->chassis) {
> -            return;
> -        }
>          if (localnet_port) {
>              ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
>                                            localnet_port->logical_port));
>              if (!ofport) {
> -                return;
> +                goto out;
>              }
>          } else {
> -            tun = chassis_tunnel_find(binding->chassis->name);
> -            if (!tun) {
> -                return;
> +            if (!gateway_chassis || ovs_list_is_short(gateway_chassis)) {
> +                /* It's on a single remote chassis */
> +                if (!binding->chassis) {
> +                    goto out;
> +                }
> +                tun = chassis_tunnel_find(binding->chassis->name);
> +                if (!tun) {
> +                    goto out;
> +                }
> +                ofport = tun->ofport;
> +            } else {
> +                /* It's distributed across the "gateway_chassis" list */
> +                is_ha_remote = true;
>              }
> -            ofport = tun->ofport;
>          }
>      }
>
> @@ -575,7 +593,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>          }
>          ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
>                          &match, ofpacts_p);
> -    } else if (!tun) {
> +    } else if (!tun && !is_ha_remote) {
>          /* Remote port connected by localnet port */
>          /* Table 33, priority 100.
>           * =======================
> @@ -615,14 +633,84 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
>          match_set_metadata(&match, htonll(dp_key));
>          match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
>
> -        put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
> -                          port_key, ofpacts_p);
> +        if (!is_ha_remote) {
> +            /* Setup encapsulation */
> +            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
> +                              port_key, ofpacts_p);
> +            /* Output to tunnel. */
> +            ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
> +        } else {
> +            struct gateway_chassis *gwc;
> +            /* Make sure all tunnel endpoints use the same encapsulation,
> +             * and set it up */
> +            LIST_FOR_EACH (gwc, node, gateway_chassis) {
> +                if (gwc->db->chassis) {
> +                    if (!tun) {
> +                        tun = chassis_tunnel_find(gwc->db->chassis->name);
> +                    } else {
> +                        struct chassis_tunnel *chassis_tunnel =
> +                            chassis_tunnel_find(gwc->db->chassis->name);
> +                        if (chassis_tunnel &&
> +                            tun->type != chassis_tunnel->type) {
> +                            static struct vlog_rate_limit rl =
> +                                VLOG_RATE_LIMIT_INIT(1, 1);
> +                            VLOG_ERR_RL(&rl, "Port %s has Gateway_Chassis "
> +                                             "with mixed encapsulations, only "
> +                                             "uniform encapsulations are "
> +                                             "supported.",
> +                                        binding->logical_port);
> +                            goto out;
> +                        }
> +                    }
> +                }
> +            }
> +            if (!tun) {
> +                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +                VLOG_ERR_RL(&rl, "No tunnel endpoint found for gateways in "
> +                                 "Gateway_Chassis of port %s",
> +                            binding->logical_port);
> +                goto out;
> +            }
>
> -        /* Output to tunnel. */
> -        ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
> +            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
> +                              port_key, ofpacts_p);
> +
> +            /* Output to tunnels with active/backup */
> +            struct ofpact_bundle *bundle = ofpact_put_BUNDLE(ofpacts_p);
> +
> +            LIST_FOR_EACH (gwc, node, gateway_chassis) {
> +                if (gwc->db->chassis) {
> +                    tun = chassis_tunnel_find(gwc->db->chassis->name);
> +                    if (!tun) {
> +                        continue;
> +                    }
> +                    if (bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
> +                        static struct vlog_rate_limit rl =
> +                                VLOG_RATE_LIMIT_INIT(1, 1);
> +                        VLOG_WARN_RL(&rl, "Remote endpoints for port beyond "
> +                                          "BUNDLE_MAX_SLAVES");
> +                        break;
> +                    }
> +                    ofpbuf_put(ofpacts_p, &tun->ofport,
> +                               sizeof tun->ofport);
> +                    bundle->n_slaves++;
> +                }
> +            }
> +
> +            ofpact_finish_BUNDLE(ofpacts_p, &bundle);
> +            bundle->algorithm = NX_BD_ALG_ACTIVE_BACKUP;
> +            /* Although ACTIVE_BACKUP bundle algorithm seems to ignore
> +             * the next two fields, those are always set */
> +            bundle->basis = 0;
> +            bundle->fields = NX_HASH_FIELDS_ETH_SRC;
> +        }
>          ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
>                          &match, ofpacts_p);
>      }
> +out:
> +    if (gateway_chassis) {
> +        gateway_chassis_destroy(gateway_chassis);
> +    }
>  }
>
>  static void
> @@ -770,7 +858,8 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
>               const struct sbrec_chassis *chassis,
>               const struct simap *ct_zones, struct lport_index *lports,
>               struct hmap *flow_table, struct hmap *local_datapaths,
> -             const struct sset *local_lports)
> +             const struct sset *local_lports,
> +             struct chassis_index *chassis_index)
>  {
>
>      /* This bool tracks physical mapping changes. */
> @@ -891,6 +980,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
>      const struct sbrec_port_binding *binding;
>      SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
>          consider_port_binding(mff_ovn_geneve, ct_zones, lports,
> +                              chassis_index,
>                                local_datapaths, binding, chassis,
>                                &ofpacts, flow_table);
>      }
> diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
> index 66aa80e..9019621 100644
> --- a/ovn/controller/physical.h
> +++ b/ovn/controller/physical.h
> @@ -33,6 +33,7 @@ struct ovsdb_idl;
>  struct ovsrec_bridge;
>  struct simap;
>  struct sset;
> +struct chassis_index;
>
>  /* OVN Geneve option information.
>   *
> @@ -47,6 +48,7 @@ void physical_run(struct controller_ctx *, enum mf_field_id mff_ovn_geneve,
>                    const struct sbrec_chassis *chassis,
>                    const struct simap *ct_zones, struct lport_index *,
>                    struct hmap *flow_table, struct hmap *local_datapaths,
> -                  const struct sset *local_lports);
> +                  const struct sset *local_lports,
> +                  struct chassis_index *chassis_index);
>
>  #endif /* ovn/physical.h */
> diff --git a/tests/ovn.at b/tests/ovn.at
> index b1dcd6a..58b7780 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -6812,6 +6812,189 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
>  OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>  AT_CLEANUP
>
> +AT_SETUP([ovn -- packet test with HA distributed router gateway port])
> +AT_SKIP_IF([test $HAVE_PYTHON = no])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl -- add-port br-int hv1-vif1 -- \
> +    set interface hv1-vif1 external-ids:iface-id=foo1 \
> +    options:tx_pcap=hv1/vif1-tx.pcap \
> +    options:rxq_pcap=hv1/vif1-rx.pcap \
> +    ofport-request=1
> +
> +sim_add gw1
> +as gw1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +
> +sim_add gw2
> +as gw2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.4
> +
> +sim_add ext1
> +as ext1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.3
> +ovs-vsctl -- add-port br-int ext1-vif1 -- \
> +    set interface ext1-vif1 external-ids:iface-id=outside1 \
> +    options:tx_pcap=ext1/vif1-tx.pcap \
> +    options:rxq_pcap=ext1/vif1-rx.pcap \
> +    ofport-request=1
> +
> +# Pre-populate the hypervisors' ARP tables so that we don't lose any
> +# packets for ARP resolution (native tunneling doesn't queue packets
> +# for ARP resolution).
> +ovn_populate_arp
> +
> +ovn-nbctl create Logical_Router name=R1
> +
> +ovn-nbctl ls-add foo
> +ovn-nbctl ls-add alice
> +ovn-nbctl ls-add outside
> +
> +# Connect foo to R1
> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
> +    type=router options:router-port=foo \
> +    -- lsp-set-addresses rp-foo router
> +
> +# Connect alice to R1 as distributed router gateway port on gw1
> +ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24
> +
> +ovn-nbctl \
> +    --id=@gc0 create Gateway_Chassis name=alice_gw1 \
> +                                     chassis_name=gw1 \
> +                                     priority=20 -- \
> +    --id=@gc1 create Gateway_Chassis name=alice_gw2 \
> +                                     chassis_name=gw2 \
> +                                     priority=10 -- \
> +    set Logical_Router_Port alice 'gateway_chassis=[@gc0, at gc1]'
> +
> +ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
> +    type=router options:router-port=alice \
> +    -- lsp-set-addresses rp-alice router
> +
> +# Create logical port foo1 in foo
> +ovn-nbctl lsp-add foo foo1 \
> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
> +
> +# Create logical port outside1 in outside
> +ovn-nbctl lsp-add outside outside1 \
> +-- lsp-set-addresses outside1 "f0:00:00:01:02:04 172.16.1.3"
> +
> +# Create localnet port in alice
> +ovn-nbctl lsp-add alice ln-alice
> +ovn-nbctl lsp-set-addresses ln-alice unknown
> +ovn-nbctl lsp-set-type ln-alice localnet
> +ovn-nbctl lsp-set-options ln-alice network_name=phys
> +
> +# Create localnet port in outside
> +ovn-nbctl lsp-add outside ln-outside
> +ovn-nbctl lsp-set-addresses ln-outside unknown
> +ovn-nbctl lsp-set-type ln-outside localnet
> +ovn-nbctl lsp-set-options ln-outside network_name=phys
> +
> +# Create bridge-mappings on gw1, gw2 and ext1, hv1 doesn't need
> +# mapping to the external network, is the one generating packets
> +as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +as gw2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +as ext1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +
> +AT_CHECK([ovn-nbctl --timeout=3 --wait=sb sync], [0], [ignore])

I wonder if you change wait=sb to wait=hv, if that's good enough to
remove the sleep right after?

> +
> +# Allow some time for ovn-northd and ovn-controller to catch up.
> +# XXX This should be more systematic.
> +sleep 2


> +
> +ip_to_hex() {
> +    printf "%02x%02x%02x%02x" "$@"
> +}
> +
> +reset_pcap_file() {
> +    local iface=$1
> +    local pcap_file=$2
> +    ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
> +options:rxq_pcap=dummy-rx.pcap
> +    rm -f ${pcap_file}*.pcap
> +    ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
> +options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +test_ip_packet()
> +{
> +    local active_gw=$1
> +    local backup_gw=$2
> +
> +    # Send ip packet between foo1 and outside1
> +    src_mac="f00000010203" # foo1 mac
> +    dst_mac="000001010203" # rp-foo mac (internal router leg)
> +    src_ip=`ip_to_hex 192 168 1 2`
> +    dst_ip=`ip_to_hex 172 16 1 3`
> +    packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
> +
> +    # ARP request packet to expect at outside1
> +    #arp_request=ffffffffffff${src_mac}08060001080006040001${src_mac}${src_ip}000000000000${dst_ip}
> +
> +    as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
> +
> +    # Send ARP reply from outside1 back to the router
> +    # XXX: note, we could avoid this if we plug this port into a netns
> +    # and setup the IP address into the port, so the kernel would simply reply

which you could do in tests/system-ovn.at if you wanted ...

> +    src_mac="000002010203"
> +    reply_mac="f00000010204"
> +    dst_ip=`ip_to_hex 172 16 1 3`
> +    src_ip=`ip_to_hex 172 16 1 1`
> +    arp_reply=${src_mac}${reply_mac}08060001080006040002${reply_mac}${dst_ip}${src_mac}${src_ip}
> +
> +    as ext1 ovs-appctl netdev-dummy/receive ext1-vif1 $arp_reply
> +
> +    # Packet to Expect at ext1 chassis, outside1 port
> +    src_mac="000002010203"
> +    dst_mac="f00000010204"
> +    src_ip=`ip_to_hex 192 168 1 2`
> +    dst_ip=`ip_to_hex 172 16 1 3`
> +    expected=${dst_mac}${src_mac}08004500001c000000003f110100${src_ip}${dst_ip}0035111100080000
> +    echo $expected > ext1-vif1.expected
> +
> +    as $active_gw reset_pcap_file br-phys_n1 $active_gw/br-phys_n1
> +    as $backup_gw reset_pcap_file br-phys_n1 $backup_gw/br-phys_n1
> +    as ext1 reset_pcap_file ext1-vif1 ext1/vif1
> +
> +    # Resend packet from foo1 to outside1
> +    as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
> +
> +    sleep 1
> +
> +    OVN_CHECK_PACKETS([ext1/vif1-tx.pcap], [ext1-vif1.expected])
> +    $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $active_gw/br-phys_n1-tx.pcap  > packets
> +    AT_CHECK([grep $expected packets | sort], [0], [expout])
> +    $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $backup_gw/br-phys_n1-tx.pcap  > packets
> +    AT_CHECK([grep $expected packets | sort], [0], [])
> +}
> +
> +test_ip_packet gw1 gw2
> +
> +ovn-nbctl --wait=hv \
> +    --id=@gc0 create Gateway_Chassis name=alice_gw1 \
> +                                     chassis_name=gw1 \
> +                                     priority=10 -- \
> +    --id=@gc1 create Gateway_Chassis name=alice_gw2 \
> +                                     chassis_name=gw2 \
> +                                     priority=20 -- \
> +    set Logical_Router_Port alice 'gateway_chassis=[@gc0, at gc1]'
> +
> +test_ip_packet gw2 gw1
> +
> +OVN_CLEANUP([hv1],[gw1],[gw2],[ext1])
> +AT_CLEANUP
> +
>  AT_SETUP([ovn -- 1 LR with distributed router gateway port])
>  AT_SKIP_IF([test $HAVE_PYTHON = no])
>  ovn_start
> @@ -6937,7 +7120,11 @@ ovn-sbctl dump-flows
>  echo "---------------------"
>  ovn-sbctl list chassis
>  ovn-sbctl list encap
> -echo "---------------------"
> +echo "------ Gateway_Chassis dump (SBDB) -------"
> +ovn-sbctl list Gateway_Chassis
> +echo "------ Port_Binding chassisredirect -------"
> +ovn-sbctl find Port_Binding type=chassisredirect
> +echo "-------------------------------------------"
>
>  echo "------ hv1 dump ----------"
>  as hv1 ovs-ofctl show br-int
> @@ -6950,6 +7137,7 @@ as hv3 ovs-ofctl show br-int
>  as hv3 ovs-ofctl dump-flows br-int
>  echo "--------------------------"
>
> +
>  # Check that redirect mapping is programmed only on hv2
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=33 | grep =0x3,metadata=0x1 | wc -l], [0], [0
>  ])
> @@ -7546,5 +7734,145 @@ AT_CHECK([ovn-sbctl --bare --columns gateway_chassis find port_binding logical_p
>  ])
>  AT_CHECK([ovn-sbctl --bare --columns options find port_binding logical_port="cr-alice" | grep 'redirect-chassis=gw1' | wc -l], [0], [1
>  ])
> +
> +AT_CLEANUP
> +
> +AT_SETUP([ovn -- 1 LR with HA distributed router gateway port])
> +AT_SKIP_IF([test $HAVE_PYTHON = no])
> +ovn_start
> +
> +net_add n1
> +
> +# create gateways with external network connectivity
> +
> +for i in 1 2; do
> +    sim_add gw$i
> +    as gw$i
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.$i
> +    ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +done
> +
> +ovn-nbctl ls-add inside
> +ovn-nbctl ls-add outside
> +
> +# create hypervisors with a vif port each to an internal network
> +
> +for i in 1 2; do
> +    sim_add hv$i
> +    as hv$i
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.1$i
> +    ovs-vsctl -- add-port br-int hv$i-vif1 -- \
> +        set interface hv$i-vif1 external-ids:iface-id=inside$i \
> +        options:tx_pcap=hv$i/vif1-tx.pcap \
> +        options:rxq_pcap=hv$i/vif1-rx.pcap \
> +        ofport-request=1
> +
> +        ovn-nbctl lsp-add inside inside$i \
> +            -- lsp-set-addresses inside$i "f0:00:00:01:22:$i 192.168.1.10$i"
> +
> +done
> +
> +ovn_populate_arp
> +
> +ovn-nbctl create Logical_Router name=R1
> +
> +# Connect inside to R1
> +ovn-nbctl lrp-add R1 inside 00:00:01:01:02:03 192.168.1.1/24
> +ovn-nbctl lsp-add inside rp-inside -- set Logical_Switch_Port rp-inside \
> +    type=router options:router-port=inside \
> +    -- lsp-set-addresses rp-inside router
> +
> +# Connect outside to R1 as distributed router gateway port on gw1+gw2
> +ovn-nbctl lrp-add R1 outside 00:00:02:01:02:04 192.168.0.101/24
> +
> +ovn-nbctl --id=@gc0 create Gateway_Chassis \
> +                    name=outside_gw1 chassis_name=gw1 priority=20 -- \
> +          --id=@gc1 create Gateway_Chassis \
> +                    name=outside_gw2 chassis_name=gw2 priority=10 -- \
> +          set Logical_Router_Port outside 'gateway_chassis=[@gc0, at gc1]'
> +
> +ovn-nbctl lsp-add outside rp-outside -- set Logical_Switch_Port rp-outside \
> +    type=router options:router-port=outside \
> +    -- lsp-set-addresses rp-outside router
> +
> +# Create localnet port in outside
> +ovn-nbctl lsp-add outside ln-outside
> +ovn-nbctl lsp-set-addresses ln-outside unknown
> +ovn-nbctl lsp-set-type ln-outside localnet
> +ovn-nbctl lsp-set-options ln-outside network_name=phys
> +
> +# Allow some time for ovn-northd and ovn-controller to catch up.
> +# XXX This should be more systematic.
> +sleep 2

Add a sync=hv here, instead?

> +
> +echo "---------NB dump-----"
> +ovn-nbctl show
> +echo "---------------------"
> +ovn-nbctl list logical_router
> +echo "---------------------"
> +ovn-nbctl list logical_router_port
> +echo "---------------------"
> +
> +echo "---------SB dump-----"
> +ovn-sbctl list datapath_binding
> +echo "---------------------"
> +ovn-sbctl list port_binding
> +echo "---------------------"
> +ovn-sbctl dump-flows
> +echo "---------------------"
> +ovn-sbctl list chassis
> +ovn-sbctl list encap
> +echo "---------------------"
> +echo "------ Gateway_Chassis dump (SBDB) -------"
> +ovn-sbctl list Gateway_Chassis
> +echo "------ Port_Binding chassisredirect -------"
> +ovn-sbctl find Port_Binding type=chassisredirect
> +echo "-------------------------------------------"
> +
> +
> +hv1_gw1_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw1-0)
> +hv1_gw2_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw2-0)
> +hv2_gw1_ofport=$(as hv2 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw1-0)
> +hv2_gw2_ofport=$(as hv2 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw2-0)
> +
> +echo $hv1_gw1_ofport
> +echo $hv1_gw2_ofport
> +echo $hv2_gw1_ofport
> +echo $hv2_gw2_ofport
> +
> +echo "--- hv1 ---"
> +as hv1 ovs-ofctl dump-flows br-int table=32
> +
> +echo "--- hv2 ---"
> +as hv2 ovs-ofctl dump-flows br-int table=32
> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv1_gw1_ofport,$hv1_gw2_ofport | wc -l], [0], [1
> +])
> +
> +AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv2_gw1_ofport,$hv2_gw2_ofport | wc -l], [0], [1
> +])
> +
> +# set higher priority to gw2 instead of gw1, and check for changes
> +
> +ovn-nbctl --id=@gc0 create Gateway_Chassis \
> +                    name=outside_gw1 chassis_name=gw1 priority=10 -- \
> +          --id=@gc1 create Gateway_Chassis \
> +                    name=outside_gw2 chassis_name=gw2 priority=20 -- \
> +          set Logical_Router_Port outside 'gateway_chassis=[@gc0, at gc1]'
> +
> +# XXX: Let the change propagate down to the ovn-controllers
> +sleep 2

A sync=hv should do the trick here.

> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv1_gw2_ofport,$hv1_gw1_ofport | wc -l], [0], [1
> +])
> +
> +AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv2_gw2_ofport,$hv2_gw1_ofport | wc -l], [0], [1
> +])
> +
> +
> +OVN_CLEANUP([gw1],[gw2],[hv1],[hv2])
> +
>  AT_CLEANUP
>
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



-- 
Russell Bryant



More information about the dev mailing list