[ovs-dev] [PATCH v3 3/6] ovn: l3ha, handling of multiple gateway chassis

Miguel Angel Ajo majopela at redhat.com
Wed Jul 5 13:45:15 UTC 2017


This patch handles multiple gateway_chassis with in chassisredirect
ports, any gateway with a chassis redirect port will implement the
rules to de-encapsulate incomming packets for such port.

And hosts targetting a remote chassisredirect port will setup a
bundle(active_backup, ..) action to each tunnel port, in the given
priority order.

Signed-off-by: Miguel Angel Ajo <majopela at redhat.com>
Signed-off-by: Venkata Anil Kommaddi <vkommadi at redhat.com>
---
 ovn/controller/automake.mk      |   2 +
 ovn/controller/binding.c        |  13 +-
 ovn/controller/binding.h        |   1 +
 ovn/controller/gchassis.c       | 176 +++++++++++++++++++++
 ovn/controller/gchassis.h       |  63 ++++++++
 ovn/controller/lflow.c          |   7 +-
 ovn/controller/lport.h          |   4 +
 ovn/controller/ovn-controller.c |  15 +-
 ovn/controller/physical.c       | 124 ++++++++++++---
 ovn/controller/physical.h       |   4 +-
 tests/ovn.at                    | 330 +++++++++++++++++++++++++++++++++++++++-
 11 files changed, 710 insertions(+), 29 deletions(-)
 create mode 100644 ovn/controller/gchassis.c
 create mode 100644 ovn/controller/gchassis.h

diff --git a/ovn/controller/automake.mk b/ovn/controller/automake.mk
index 8c6a787..d3828f5 100644
--- a/ovn/controller/automake.mk
+++ b/ovn/controller/automake.mk
@@ -6,6 +6,8 @@ ovn_controller_ovn_controller_SOURCES = \
 	ovn/controller/chassis.h \
 	ovn/controller/encaps.c \
 	ovn/controller/encaps.h \
+	ovn/controller/gchassis.c \
+	ovn/controller/gchassis.h \
 	ovn/controller/lflow.c \
 	ovn/controller/lflow.h \
 	ovn/controller/lport.c \
diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index bb76608..f5526fd 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -15,6 +15,7 @@
 
 #include <config.h>
 #include "binding.h"
+#include "gchassis.h"
 #include "lflow.h"
 #include "lport.h"
 
@@ -26,6 +27,7 @@
 #include "lib/vswitch-idl.h"
 #include "openvswitch/hmap.h"
 #include "openvswitch/vlog.h"
+#include "ovn/lib/chassis-index.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "ovn-controller.h"
 
@@ -394,12 +396,15 @@ consider_local_datapath(struct controller_ctx *ctx,
                                false, local_datapaths);
         }
     } else if (!strcmp(binding_rec->type, "chassisredirect")) {
-        const char *chassis_id = smap_get(&binding_rec->options,
-                                          "redirect-chassis");
-        our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
-        if (our_chassis) {
+        if (gateway_chassis_in_pb_contains(binding_rec, chassis_rec)) {
             add_local_datapath(ldatapaths, lports, binding_rec->datapath,
                                false, local_datapaths);
+            /* XXX this should only be set to true if our chassis
+             * (chassis_rec) is the master for this chassisredirect port
+             * but for now we'll bind it only when not bound, this is
+             * handled in subsequent patches */
+            our_chassis = !binding_rec->chassis ||
+                chassis_rec == binding_rec->chassis;
         }
     } else if (!strcmp(binding_rec->type, "l3gateway")) {
         const char *chassis_id = smap_get(&binding_rec->options,
diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
index 3bfa7d1..136b3a7 100644
--- a/ovn/controller/binding.h
+++ b/ovn/controller/binding.h
@@ -20,6 +20,7 @@
 #include <stdbool.h>
 
 struct controller_ctx;
+struct chassis_index;
 struct hmap;
 struct ldatapath_index;
 struct lport_index;
diff --git a/ovn/controller/gchassis.c b/ovn/controller/gchassis.c
new file mode 100644
index 0000000..27d8f66
--- /dev/null
+++ b/ovn/controller/gchassis.c
@@ -0,0 +1,176 @@
+/* Copyright (c) 2017, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "gchassis.h"
+#include "lport.h"
+#include "openvswitch/vlog.h"
+#include "ovn/lib/chassis-index.h"
+#include "ovn/lib/ovn-sb-idl.h"
+
+VLOG_DEFINE_THIS_MODULE(gchassis);
+
+/* gateway_chassis ordering
+ */
+static int
+compare_chassis_prio_(const void *a_, const void *b_)
+{
+    const struct gateway_chassis *gc_a = a_;
+    const struct gateway_chassis *gc_b = b_;
+    int prio_diff = gc_b->db->priority - gc_a->db->priority;
+    if (!prio_diff) {
+        return strcmp(gc_a->db->name, gc_b->db->name);
+    }
+    return prio_diff;
+}
+
+struct ovs_list*
+gateway_chassis_get_ordered(const struct sbrec_port_binding *binding,
+                            const struct chassis_index *chassis_index)
+{
+    const char *redir_chassis_str;
+    const struct sbrec_chassis *redirect_chassis = NULL;
+
+    /* XXX: redirect-chassis SBDB option handling is supported for backwards
+     * compatibility with N-1 version of ovn-northd, this support can
+     * be removed in OVS 2.9 where Gateway_Chassis list on the port binding
+     * will allways be populated by northd */
+    redir_chassis_str = smap_get(&binding->options, "redirect-chassis");
+
+    if (redir_chassis_str) {
+        redirect_chassis = chassis_lookup_by_name(chassis_index,
+                                                  redir_chassis_str);
+        if (!redirect_chassis) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+            VLOG_WARN_RL(&rl, "chassis name (%s) in redirect-chassis option "
+                              "of logical port %s not known",
+                              redir_chassis_str, binding->logical_port);
+        }
+    }
+
+    if (!redirect_chassis && binding->n_gateway_chassis == 0) {
+        return NULL;
+    }
+
+    struct gateway_chassis *gateway_chassis = NULL;
+    int n=0;
+
+    if (binding->n_gateway_chassis) {
+        gateway_chassis = xmalloc(sizeof *gateway_chassis *
+                                  binding->n_gateway_chassis);
+        for (n=0; n < binding->n_gateway_chassis; n++) {
+            gateway_chassis[n].db = binding->gateway_chassis[n];
+            gateway_chassis[n].virtual_gwc = false;
+        }
+        qsort(gateway_chassis, n, sizeof *gateway_chassis,
+              compare_chassis_prio_);
+    } else if (redirect_chassis) {
+        /* When only redirect_chassis is available, return a single
+         * virtual entry that it's not on OVSDB, this way the code
+         * handling the returned list will be uniform, regardless
+         * of gateway_chassis being populated or redirect-chassis option
+         * being used */
+        gateway_chassis = xmalloc(sizeof *gateway_chassis);
+        struct sbrec_gateway_chassis *gwc =
+            xzalloc(sizeof *gateway_chassis->db);
+        sbrec_gateway_chassis_init(gwc);
+        gwc->name = xasprintf("%s_%s", binding->logical_port,
+                                       redirect_chassis->name);
+        gwc->chassis = (struct sbrec_chassis *)redirect_chassis;
+        gateway_chassis->db = gwc;
+        gateway_chassis->virtual_gwc = true;
+        n++;
+    }
+
+    struct ovs_list *list = NULL;
+    if (n) {
+        list = xmalloc(sizeof *list);
+        ovs_list_init(list);
+
+        int i;
+        for (i=0; i<n; i++) {
+            ovs_list_push_back(list, &gateway_chassis[i].node);
+        }
+    }
+
+    return list;
+}
+
+bool
+gateway_chassis_contains(struct ovs_list *gateway_chassis,
+                         const struct sbrec_chassis *chassis) {
+    struct gateway_chassis *chassis_item;
+    if (gateway_chassis) {
+        LIST_FOR_EACH (chassis_item, node, gateway_chassis) {
+            if (chassis_item->db->chassis &&
+                !strcmp(chassis_item->db->chassis->name, chassis->name)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void
+gateway_chassis_destroy(struct ovs_list *list)
+{
+    if (!list) {
+        return;
+    }
+
+    /* XXX: This loop is for backwards compatibility with redirect-chassis
+     * which we insert as a single virtual Gateway_Chassis on the ordered
+     * list */
+    struct gateway_chassis *chassis_item;
+    LIST_FOR_EACH (chassis_item, node, list) {
+        if (chassis_item->virtual_gwc) {
+            free(chassis_item->db->name);
+            free((void *)chassis_item->db);
+        }
+    }
+
+    free(ovs_list_front(list));
+    free(list);
+}
+
+bool
+gateway_chassis_in_pb_contains(const struct sbrec_port_binding *binding,
+                               const struct sbrec_chassis *chassis)
+{
+    if (!binding || !chassis) {
+        return false;
+    }
+
+    /* XXX: redirect-chassis handling for backwards compatibility,
+     * with older ovs-northd during upgrade phase, can be removed
+     * for OVS 2.9 */
+    const char *redirect_chassis = smap_get(&binding->options,
+                                            "redirect-chassis");
+    if (binding->n_gateway_chassis) {
+        int n;
+        for (n=0; n < binding->n_gateway_chassis; n++) {
+            if (binding->gateway_chassis[n]->chassis &&
+                !strcmp(binding->gateway_chassis[n]->chassis->name,
+                        chassis->name)) {
+                return true;
+            }
+        }
+    } else if (redirect_chassis) {
+        return !strcmp(redirect_chassis, chassis->name);
+    }
+
+    return false;
+}
diff --git a/ovn/controller/gchassis.h b/ovn/controller/gchassis.h
new file mode 100644
index 0000000..46d5e42
--- /dev/null
+++ b/ovn/controller/gchassis.h
@@ -0,0 +1,63 @@
+/* Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_GCHASSIS_H
+#define OVN_GCHASSIS_H 1
+
+#include <stdint.h>
+#include "lib/uuid.h"
+#include "openvswitch/hmap.h"
+#include "openvswitch/list.h"
+
+struct chassis_index;
+struct ovsdb_idl;
+struct sbrec_chassis;
+struct sbrec_gateway_chassis;
+struct sbrec_port_binding;
+
+
+/* Gateway_Chassis management
+ * ==========================
+ *
+ * The following structure and methods handle ordering of Gateway_Chassis
+ * entries in a chassisredirect port. And parsing redirect-chassis option
+ * for backwards compatibility with older (N-1 version of ovn-northd).
+ */
+struct gateway_chassis {
+    struct ovs_list node;
+    const struct sbrec_gateway_chassis *db; /* sbrec row for the gwc */
+    bool virtual_gwc; /* db entry not from SBDB, but from redirect-chassis */
+};
+
+/* Gets, and orders by priority/name the list of Gateway_Chassis */
+struct ovs_list *gateway_chassis_get_ordered(
+        const struct sbrec_port_binding *binding,
+        const struct chassis_index *chassis_index);
+
+/* Checks if an specific chassis is contained in the gateway_chassis
+ * list */
+bool gateway_chassis_contains(struct ovs_list *gateway_chassis,
+                              const struct sbrec_chassis *chassis);
+
+/* Destroy a gateway_chassis list from memory */
+void gateway_chassis_destroy(struct ovs_list *list);
+
+/* Checks if a chassis is referenced in the port_binding gateway_chassis
+ * list or redirect-chassis option (backwards compatibility) */
+bool gateway_chassis_in_pb_contains(
+        const struct sbrec_port_binding *binding,
+        const struct sbrec_chassis *chassis);
+
+#endif /* ovn/controller/gchassis.h */
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index b1b4b23..8cc5e7e 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -14,6 +14,7 @@
  */
 
 #include <config.h>
+#include "gchassis.h"
 #include "lflow.h"
 #include "lport.h"
 #include "ofctrl.h"
@@ -96,7 +97,11 @@ is_chassis_resident_cb(const void *c_aux_, const char *port_name)
 
     const struct sbrec_port_binding *pb
         = lport_lookup_by_name(c_aux->lports, port_name);
-    return pb && pb->chassis && pb->chassis == c_aux->chassis;
+    if (pb && pb->chassis && pb->chassis == c_aux->chassis) {
+        return true;
+    } else {
+        return gateway_chassis_in_pb_contains(pb, c_aux->chassis);
+    }
 }
 
 static bool
diff --git a/ovn/controller/lport.h b/ovn/controller/lport.h
index fe0e430..8937ecb 100644
--- a/ovn/controller/lport.h
+++ b/ovn/controller/lport.h
@@ -17,10 +17,14 @@
 #define OVN_LPORT_H 1
 
 #include <stdint.h>
+#include "lib/uuid.h"
 #include "openvswitch/hmap.h"
+#include "openvswitch/list.h"
 
 struct ovsdb_idl;
+struct sbrec_chassis;
 struct sbrec_datapath_binding;
+struct sbrec_port_binding;
 
 /* Database indexes.
  * =================
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 00b4cda..3e6c86c 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -40,6 +40,7 @@
 #include "openvswitch/vconn.h"
 #include "openvswitch/vlog.h"
 #include "ovn/actions.h"
+#include "ovn/lib/chassis-index.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "ovn/lib/ovn-util.h"
 #include "patch.h"
@@ -148,6 +149,10 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
     struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
     sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
+    /* XXX: We can optimize this, if we find a way to only monitor
+     * ports that have a Gateway_Chassis that point's to our own
+     * chassis */
+    sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "chassisredirect");
     if (chassis) {
         /* This should be mostly redundant with the other clauses for port
          * bindings, but it allows us to catch any ports that are assigned to
@@ -165,10 +170,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l2);
         const struct smap l3 = SMAP_CONST1(&l3, "l3gateway-chassis", id);
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l3);
-        const struct smap redirect = SMAP_CONST1(&redirect,
-                                                 "redirect-chassis", id);
-        sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES,
-                                              &redirect);
     }
     if (local_ifaces) {
         const char *name;
@@ -627,9 +628,12 @@ main(int argc, char *argv[])
         struct ldatapath_index ldatapaths;
         struct lport_index lports;
         struct mcgroup_index mcgroups;
+        struct chassis_index chassis_index;
+
         ldatapath_index_init(&ldatapaths, ctx.ovnsb_idl);
         lport_index_init(&lports, ctx.ovnsb_idl);
         mcgroup_index_init(&mcgroups, ctx.ovnsb_idl);
+        chassis_index_init(&chassis_index, ctx.ovnsb_idl);
 
         const struct sbrec_chassis *chassis = NULL;
         if (chassis_id) {
@@ -662,7 +666,8 @@ main(int argc, char *argv[])
 
                     physical_run(&ctx, mff_ovn_geneve,
                                  br_int, chassis, &ct_zones, &lports,
-                                 &flow_table, &local_datapaths, &local_lports);
+                                 &flow_table, &local_datapaths, &local_lports,
+                                 &chassis_index);
 
                     ofctrl_put(&flow_table, &pending_ct_zones,
                                get_nb_cfg(ctx.ovnsb_idl));
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index f2d9676..00e12e6 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -17,16 +17,21 @@
 #include "binding.h"
 #include "byte-order.h"
 #include "flow.h"
+#include "gchassis.h"
 #include "lflow.h"
 #include "lport.h"
+#include "lib/bundle.h"
 #include "lib/poll-loop.h"
+#include "lib/uuid.h"
 #include "ofctrl.h"
+#include "openvswitch/list.h"
 #include "openvswitch/hmap.h"
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-actions.h"
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/vlog.h"
 #include "ovn-controller.h"
+#include "ovn/lib/chassis-index.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "ovn/lib/ovn-util.h"
 #include "physical.h"
@@ -289,6 +294,7 @@ static void
 consider_port_binding(enum mf_field_id mff_ovn_geneve,
                       const struct simap *ct_zones,
                       const struct lport_index *lports,
+                      const struct chassis_index *chassis_index,
                       struct hmap *local_datapaths,
                       const struct sbrec_port_binding *binding,
                       const struct sbrec_chassis *chassis,
@@ -353,8 +359,12 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         return;
     }
 
+    struct ovs_list *gateway_chassis = NULL;
+    gateway_chassis = gateway_chassis_get_ordered(binding, chassis_index);
+
     if (!strcmp(binding->type, "chassisredirect")
-        && binding->chassis == chassis) {
+        && (binding->chassis == chassis ||
+            gateway_chassis_contains(gateway_chassis, chassis))) {
 
         /* Table 33, priority 100.
          * =======================
@@ -413,7 +423,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
                         &match, ofpacts_p);
-        return;
+
+        goto out;
     }
 
     /* Find the OpenFlow port for the logical port, as 'ofport'.  This is
@@ -442,7 +453,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
     bool is_remote = false;
     if (binding->parent_port && *binding->parent_port) {
         if (!binding->tag) {
-            return;
+            goto out;
         }
         ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
                                       binding->parent_port));
@@ -460,27 +471,34 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         }
     }
 
+    bool is_ha_remote = false;
     const struct chassis_tunnel *tun = NULL;
     const struct sbrec_port_binding *localnet_port =
         get_localnet_port(local_datapaths, dp_key);
     if (!ofport) {
         /* It is remote port, may be reached by tunnel or localnet port */
         is_remote = true;
-        if (!binding->chassis) {
-            return;
-        }
         if (localnet_port) {
             ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
                                           localnet_port->logical_port));
             if (!ofport) {
-                return;
+                goto out;
             }
         } else {
-            tun = chassis_tunnel_find(binding->chassis->name);
-            if (!tun) {
-                return;
+            if (!gateway_chassis || ovs_list_is_short(gateway_chassis)) {
+                /* It's on a single remote chassis */
+                if (!binding->chassis) {
+                    goto out;
+                }
+                tun = chassis_tunnel_find(binding->chassis->name);
+                if (!tun) {
+                    goto out;
+                }
+                ofport = tun->ofport;
+            } else {
+                /* It's distributed across the "gateway_chassis" list */
+                is_ha_remote = true;
             }
-            ofport = tun->ofport;
         }
     }
 
@@ -575,7 +593,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         }
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
                         &match, ofpacts_p);
-    } else if (!tun) {
+    } else if (!tun && !is_ha_remote) {
         /* Remote port connected by localnet port */
         /* Table 33, priority 100.
          * =======================
@@ -615,14 +633,84 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         match_set_metadata(&match, htonll(dp_key));
         match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
 
-        put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
-                          port_key, ofpacts_p);
+        if (!is_ha_remote) {
+            /* Setup encapsulation */
+            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
+                              port_key, ofpacts_p);
+            /* Output to tunnel. */
+            ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+        } else {
+            struct gateway_chassis *gwc;
+            /* Make sure all tunnel endpoints use the same encapsulation,
+             * and set it up */
+            LIST_FOR_EACH (gwc, node, gateway_chassis) {
+                if (gwc->db->chassis) {
+                    if (!tun) {
+                        tun = chassis_tunnel_find(gwc->db->chassis->name);
+                    } else {
+                        struct chassis_tunnel *chassis_tunnel =
+                            chassis_tunnel_find(gwc->db->chassis->name);
+                        if (chassis_tunnel &&
+                            tun->type != chassis_tunnel->type) {
+                            static struct vlog_rate_limit rl =
+                                VLOG_RATE_LIMIT_INIT(1, 1);
+                            VLOG_ERR_RL(&rl, "Port %s has Gateway_Chassis "
+                                             "with mixed encapsulations, only "
+                                             "uniform encapsulations are "
+                                             "supported.",
+                                        binding->logical_port);
+                            goto out;
+                        }
+                    }
+                }
+            }
+            if (!tun) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_ERR_RL(&rl, "No tunnel endpoint found for gateways in "
+                                 "Gateway_Chassis of port %s",
+                            binding->logical_port);
+                goto out;
+            }
 
-        /* Output to tunnel. */
-        ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
+                              port_key, ofpacts_p);
+
+            /* Output to tunnels with active/backup */
+            struct ofpact_bundle *bundle = ofpact_put_BUNDLE(ofpacts_p);
+
+            LIST_FOR_EACH (gwc, node, gateway_chassis) {
+                if (gwc->db->chassis) {
+                    tun = chassis_tunnel_find(gwc->db->chassis->name);
+                    if (!tun) {
+                        continue;
+                    }
+                    if (bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
+                        static struct vlog_rate_limit rl =
+                                VLOG_RATE_LIMIT_INIT(1, 1);
+                        VLOG_WARN_RL(&rl, "Remote endpoints for port beyond "
+                                          "BUNDLE_MAX_SLAVES");
+                        break;
+                    }
+                    ofpbuf_put(ofpacts_p, &tun->ofport,
+                               sizeof tun->ofport);
+                    bundle->n_slaves++;
+                }
+            }
+
+            ofpact_finish_BUNDLE(ofpacts_p, &bundle);
+            bundle->algorithm = NX_BD_ALG_ACTIVE_BACKUP;
+            /* Although ACTIVE_BACKUP bundle algorithm seems to ignore
+             * the next two fields, those are always set */
+            bundle->basis = 0;
+            bundle->fields = NX_HASH_FIELDS_ETH_SRC;
+        }
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
                         &match, ofpacts_p);
     }
+out:
+    if (gateway_chassis) {
+        gateway_chassis_destroy(gateway_chassis);
+    }
 }
 
 static void
@@ -770,7 +858,8 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
              const struct sbrec_chassis *chassis,
              const struct simap *ct_zones, struct lport_index *lports,
              struct hmap *flow_table, struct hmap *local_datapaths,
-             const struct sset *local_lports)
+             const struct sset *local_lports,
+             struct chassis_index *chassis_index)
 {
 
     /* This bool tracks physical mapping changes. */
@@ -891,6 +980,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
     const struct sbrec_port_binding *binding;
     SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
         consider_port_binding(mff_ovn_geneve, ct_zones, lports,
+                              chassis_index,
                               local_datapaths, binding, chassis,
                               &ofpacts, flow_table);
     }
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index 66aa80e..9019621 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -33,6 +33,7 @@ struct ovsdb_idl;
 struct ovsrec_bridge;
 struct simap;
 struct sset;
+struct chassis_index;
 
 /* OVN Geneve option information.
  *
@@ -47,6 +48,7 @@ void physical_run(struct controller_ctx *, enum mf_field_id mff_ovn_geneve,
                   const struct sbrec_chassis *chassis,
                   const struct simap *ct_zones, struct lport_index *,
                   struct hmap *flow_table, struct hmap *local_datapaths,
-                  const struct sset *local_lports);
+                  const struct sset *local_lports,
+                  struct chassis_index *chassis_index);
 
 #endif /* ovn/physical.h */
diff --git a/tests/ovn.at b/tests/ovn.at
index b1dcd6a..58b7780 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6812,6 +6812,189 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 AT_CLEANUP
 
+AT_SETUP([ovn -- packet test with HA distributed router gateway port])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=foo1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+
+sim_add gw1
+as gw1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+
+sim_add gw2
+as gw2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.4
+
+sim_add ext1
+as ext1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.3
+ovs-vsctl -- add-port br-int ext1-vif1 -- \
+    set interface ext1-vif1 external-ids:iface-id=outside1 \
+    options:tx_pcap=ext1/vif1-tx.pcap \
+    options:rxq_pcap=ext1/vif1-rx.pcap \
+    ofport-request=1
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+ovn-nbctl create Logical_Router name=R1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add alice
+ovn-nbctl ls-add outside
+
+# Connect foo to R1
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+    type=router options:router-port=foo \
+    -- lsp-set-addresses rp-foo router
+
+# Connect alice to R1 as distributed router gateway port on gw1
+ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24
+
+ovn-nbctl \
+    --id=@gc0 create Gateway_Chassis name=alice_gw1 \
+                                     chassis_name=gw1 \
+                                     priority=20 -- \
+    --id=@gc1 create Gateway_Chassis name=alice_gw2 \
+                                     chassis_name=gw2 \
+                                     priority=10 -- \
+    set Logical_Router_Port alice 'gateway_chassis=[@gc0, at gc1]'
+
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+    type=router options:router-port=alice \
+    -- lsp-set-addresses rp-alice router
+
+# Create logical port foo1 in foo
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Create logical port outside1 in outside
+ovn-nbctl lsp-add outside outside1 \
+-- lsp-set-addresses outside1 "f0:00:00:01:02:04 172.16.1.3"
+
+# Create localnet port in alice
+ovn-nbctl lsp-add alice ln-alice
+ovn-nbctl lsp-set-addresses ln-alice unknown
+ovn-nbctl lsp-set-type ln-alice localnet
+ovn-nbctl lsp-set-options ln-alice network_name=phys
+
+# Create localnet port in outside
+ovn-nbctl lsp-add outside ln-outside
+ovn-nbctl lsp-set-addresses ln-outside unknown
+ovn-nbctl lsp-set-type ln-outside localnet
+ovn-nbctl lsp-set-options ln-outside network_name=phys
+
+# Create bridge-mappings on gw1, gw2 and ext1, hv1 doesn't need
+# mapping to the external network, is the one generating packets
+as gw1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+as gw2 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+as ext1 ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+
+AT_CHECK([ovn-nbctl --timeout=3 --wait=sb sync], [0], [ignore])
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 2
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+
+reset_pcap_file() {
+    local iface=$1
+    local pcap_file=$2
+    ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
+options:rxq_pcap=dummy-rx.pcap
+    rm -f ${pcap_file}*.pcap
+    ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
+options:rxq_pcap=${pcap_file}-rx.pcap
+}
+
+test_ip_packet()
+{
+    local active_gw=$1
+    local backup_gw=$2
+
+    # Send ip packet between foo1 and outside1
+    src_mac="f00000010203" # foo1 mac
+    dst_mac="000001010203" # rp-foo mac (internal router leg)
+    src_ip=`ip_to_hex 192 168 1 2`
+    dst_ip=`ip_to_hex 172 16 1 3`
+    packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
+
+    # ARP request packet to expect at outside1
+    #arp_request=ffffffffffff${src_mac}08060001080006040001${src_mac}${src_ip}000000000000${dst_ip}
+
+    as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
+
+    # Send ARP reply from outside1 back to the router
+    # XXX: note, we could avoid this if we plug this port into a netns
+    # and setup the IP address into the port, so the kernel would simply reply
+    src_mac="000002010203"
+    reply_mac="f00000010204"
+    dst_ip=`ip_to_hex 172 16 1 3`
+    src_ip=`ip_to_hex 172 16 1 1`
+    arp_reply=${src_mac}${reply_mac}08060001080006040002${reply_mac}${dst_ip}${src_mac}${src_ip}
+
+    as ext1 ovs-appctl netdev-dummy/receive ext1-vif1 $arp_reply
+
+    # Packet to Expect at ext1 chassis, outside1 port
+    src_mac="000002010203"
+    dst_mac="f00000010204"
+    src_ip=`ip_to_hex 192 168 1 2`
+    dst_ip=`ip_to_hex 172 16 1 3`
+    expected=${dst_mac}${src_mac}08004500001c000000003f110100${src_ip}${dst_ip}0035111100080000
+    echo $expected > ext1-vif1.expected
+
+    as $active_gw reset_pcap_file br-phys_n1 $active_gw/br-phys_n1
+    as $backup_gw reset_pcap_file br-phys_n1 $backup_gw/br-phys_n1
+    as ext1 reset_pcap_file ext1-vif1 ext1/vif1
+
+    # Resend packet from foo1 to outside1
+    as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet
+
+    sleep 1
+
+    OVN_CHECK_PACKETS([ext1/vif1-tx.pcap], [ext1-vif1.expected])
+    $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $active_gw/br-phys_n1-tx.pcap  > packets
+    AT_CHECK([grep $expected packets | sort], [0], [expout])
+    $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $backup_gw/br-phys_n1-tx.pcap  > packets
+    AT_CHECK([grep $expected packets | sort], [0], [])
+}
+
+test_ip_packet gw1 gw2
+
+ovn-nbctl --wait=hv \
+    --id=@gc0 create Gateway_Chassis name=alice_gw1 \
+                                     chassis_name=gw1 \
+                                     priority=10 -- \
+    --id=@gc1 create Gateway_Chassis name=alice_gw2 \
+                                     chassis_name=gw2 \
+                                     priority=20 -- \
+    set Logical_Router_Port alice 'gateway_chassis=[@gc0, at gc1]'
+
+test_ip_packet gw2 gw1
+
+OVN_CLEANUP([hv1],[gw1],[gw2],[ext1])
+AT_CLEANUP
+
 AT_SETUP([ovn -- 1 LR with distributed router gateway port])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 ovn_start
@@ -6937,7 +7120,11 @@ ovn-sbctl dump-flows
 echo "---------------------"
 ovn-sbctl list chassis
 ovn-sbctl list encap
-echo "---------------------"
+echo "------ Gateway_Chassis dump (SBDB) -------"
+ovn-sbctl list Gateway_Chassis
+echo "------ Port_Binding chassisredirect -------"
+ovn-sbctl find Port_Binding type=chassisredirect
+echo "-------------------------------------------"
 
 echo "------ hv1 dump ----------"
 as hv1 ovs-ofctl show br-int
@@ -6950,6 +7137,7 @@ as hv3 ovs-ofctl show br-int
 as hv3 ovs-ofctl dump-flows br-int
 echo "--------------------------"
 
+
 # Check that redirect mapping is programmed only on hv2
 AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=33 | grep =0x3,metadata=0x1 | wc -l], [0], [0
 ])
@@ -7546,5 +7734,145 @@ AT_CHECK([ovn-sbctl --bare --columns gateway_chassis find port_binding logical_p
 ])
 AT_CHECK([ovn-sbctl --bare --columns options find port_binding logical_port="cr-alice" | grep 'redirect-chassis=gw1' | wc -l], [0], [1
 ])
+
+AT_CLEANUP
+
+AT_SETUP([ovn -- 1 LR with HA distributed router gateway port])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+net_add n1
+
+# create gateways with external network connectivity
+
+for i in 1 2; do
+    sim_add gw$i
+    as gw$i
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.$i
+    ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+done
+
+ovn-nbctl ls-add inside
+ovn-nbctl ls-add outside
+
+# create hypervisors with a vif port each to an internal network
+
+for i in 1 2; do
+    sim_add hv$i
+    as hv$i
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.1$i
+    ovs-vsctl -- add-port br-int hv$i-vif1 -- \
+        set interface hv$i-vif1 external-ids:iface-id=inside$i \
+        options:tx_pcap=hv$i/vif1-tx.pcap \
+        options:rxq_pcap=hv$i/vif1-rx.pcap \
+        ofport-request=1
+
+        ovn-nbctl lsp-add inside inside$i \
+            -- lsp-set-addresses inside$i "f0:00:00:01:22:$i 192.168.1.10$i"
+
+done
+
+ovn_populate_arp
+
+ovn-nbctl create Logical_Router name=R1
+
+# Connect inside to R1
+ovn-nbctl lrp-add R1 inside 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add inside rp-inside -- set Logical_Switch_Port rp-inside \
+    type=router options:router-port=inside \
+    -- lsp-set-addresses rp-inside router
+
+# Connect outside to R1 as distributed router gateway port on gw1+gw2
+ovn-nbctl lrp-add R1 outside 00:00:02:01:02:04 192.168.0.101/24
+
+ovn-nbctl --id=@gc0 create Gateway_Chassis \
+                    name=outside_gw1 chassis_name=gw1 priority=20 -- \
+          --id=@gc1 create Gateway_Chassis \
+                    name=outside_gw2 chassis_name=gw2 priority=10 -- \
+          set Logical_Router_Port outside 'gateway_chassis=[@gc0, at gc1]'
+
+ovn-nbctl lsp-add outside rp-outside -- set Logical_Switch_Port rp-outside \
+    type=router options:router-port=outside \
+    -- lsp-set-addresses rp-outside router
+
+# Create localnet port in outside
+ovn-nbctl lsp-add outside ln-outside
+ovn-nbctl lsp-set-addresses ln-outside unknown
+ovn-nbctl lsp-set-type ln-outside localnet
+ovn-nbctl lsp-set-options ln-outside network_name=phys
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 2
+
+echo "---------NB dump-----"
+ovn-nbctl show
+echo "---------------------"
+ovn-nbctl list logical_router
+echo "---------------------"
+ovn-nbctl list logical_router_port
+echo "---------------------"
+
+echo "---------SB dump-----"
+ovn-sbctl list datapath_binding
+echo "---------------------"
+ovn-sbctl list port_binding
+echo "---------------------"
+ovn-sbctl dump-flows
+echo "---------------------"
+ovn-sbctl list chassis
+ovn-sbctl list encap
+echo "---------------------"
+echo "------ Gateway_Chassis dump (SBDB) -------"
+ovn-sbctl list Gateway_Chassis
+echo "------ Port_Binding chassisredirect -------"
+ovn-sbctl find Port_Binding type=chassisredirect
+echo "-------------------------------------------"
+
+
+hv1_gw1_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw1-0)
+hv1_gw2_ofport=$(as hv1 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw2-0)
+hv2_gw1_ofport=$(as hv2 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw1-0)
+hv2_gw2_ofport=$(as hv2 ovs-vsctl --bare --columns ofport find Interface name=ovn-gw2-0)
+
+echo $hv1_gw1_ofport
+echo $hv1_gw2_ofport
+echo $hv2_gw1_ofport
+echo $hv2_gw2_ofport
+
+echo "--- hv1 ---"
+as hv1 ovs-ofctl dump-flows br-int table=32
+
+echo "--- hv2 ---"
+as hv2 ovs-ofctl dump-flows br-int table=32
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv1_gw1_ofport,$hv1_gw2_ofport | wc -l], [0], [1
+])
+
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv2_gw1_ofport,$hv2_gw2_ofport | wc -l], [0], [1
+])
+
+# set higher priority to gw2 instead of gw1, and check for changes
+
+ovn-nbctl --id=@gc0 create Gateway_Chassis \
+                    name=outside_gw1 chassis_name=gw1 priority=10 -- \
+          --id=@gc1 create Gateway_Chassis \
+                    name=outside_gw2 chassis_name=gw2 priority=20 -- \
+          set Logical_Router_Port outside 'gateway_chassis=[@gc0, at gc1]'
+
+# XXX: Let the change propagate down to the ovn-controllers
+sleep 2
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv1_gw2_ofport,$hv1_gw1_ofport | wc -l], [0], [1
+])
+
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=32 | grep active_backup | grep slaves:$hv2_gw2_ofport,$hv2_gw1_ofport | wc -l], [0], [1
+])
+
+
+OVN_CLEANUP([gw1],[gw2],[hv1],[hv2])
+
 AT_CLEANUP
 
-- 
1.8.3.1



More information about the dev mailing list