[ovs-dev] [PATCH v1 1/4] ovn: l3ha, handling of multiple gateways

majopela at redhat.com majopela at redhat.com
Fri Jun 2 12:31:41 UTC 2017


From: Miguel Angel Ajo <majopela at redhat.com>

This patch handles multiple gateways with priorities in chassisredirect
ports, any gateway with a chassis redirect port will implement the
rules to de-encapsulate incomming packets for such port.

And hosts targetting a remote chassisredirect port will setup a
bundle(active_backup, ..) action to each tunnel port, in the given
priority order.

Signed-off-by: Miguel Angel Ajo <majopela at redhat.com>
---
 ovn/controller/binding.c        |   9 +--
 ovn/controller/lflow.c          |   6 +-
 ovn/controller/lport.c          | 119 ++++++++++++++++++++++++++++++++++++++++
 ovn/controller/lport.h          |  28 ++++++++++
 ovn/controller/ovn-controller.c |   5 +-
 ovn/controller/physical.c       | 114 ++++++++++++++++++++++++++++++++------
 6 files changed, 255 insertions(+), 26 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index bb76608..d45c5df 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -394,12 +394,13 @@ consider_local_datapath(struct controller_ctx *ctx,
                                false, local_datapaths);
         }
     } else if (!strcmp(binding_rec->type, "chassisredirect")) {
-        const char *chassis_id = smap_get(&binding_rec->options,
-                                          "redirect-chassis");
-        our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
-        if (our_chassis) {
+        if (pb_redirect_chassis_contains(binding_rec, chassis_rec)) {
             add_local_datapath(ldatapaths, lports, binding_rec->datapath,
                                false, local_datapaths);
+            // XXX this should only be set to true if our chassis
+            // (chassis_rec) is the master for this chassisredirect port
+            // but for now we'll bind it only when not bound
+            our_chassis = !binding_rec->chassis;
         }
     } else if (!strcmp(binding_rec->type, "l3gateway")) {
         const char *chassis_id = smap_get(&binding_rec->options,
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index b1b4b23..b7d1bcb 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -96,7 +96,11 @@ is_chassis_resident_cb(const void *c_aux_, const char *port_name)
 
     const struct sbrec_port_binding *pb
         = lport_lookup_by_name(c_aux->lports, port_name);
-    return pb && pb->chassis && pb->chassis == c_aux->chassis;
+    if (pb && pb->chassis && pb->chassis == c_aux->chassis) {
+        return true;
+    } else {
+        return pb_redirect_chassis_contains(pb, c_aux->chassis);
+    }
 }
 
 static bool
diff --git a/ovn/controller/lport.c b/ovn/controller/lport.c
index 906fda2..52608a1 100644
--- a/ovn/controller/lport.c
+++ b/ovn/controller/lport.c
@@ -237,3 +237,122 @@ mcgroup_lookup_by_dp_name(const struct mcgroup_index *mcgroups,
     }
     return NULL;
 }
+
+
+/* redirect-chassis option parsing
+ */
+static int
+compare_chassis_prio_(const void *a_, const void *b_)
+{
+    const struct redirect_chassis *chassis_a = a_;
+    const struct redirect_chassis *chassis_b = b_;
+    int prio_diff = chassis_b->prio - chassis_a->prio;
+    if (!prio_diff) {
+        return strcmp(chassis_a->chassis_id, chassis_b->chassis_id);
+    }
+    return prio_diff;
+}
+
+struct ovs_list*
+parse_redirect_chassis(const struct sbrec_port_binding *binding)
+{
+
+    const char *redir_chassis_const;
+    char *redir_chassis_str;
+    char *save_ptr1 = NULL;
+    char *chassis_prio;
+
+    struct redirect_chassis *redirect_chassis =
+        xmalloc(sizeof *redirect_chassis);
+
+    int n=0;
+
+    redir_chassis_const = smap_get(&binding->options, "redirect-chassis");
+
+    if (!redir_chassis_const) {
+        free(redirect_chassis);
+        return NULL;
+    }
+
+    redir_chassis_str = xstrdup(redir_chassis_const);
+
+    for (chassis_prio = strtok_r(redir_chassis_str, ", ", &save_ptr1);
+         chassis_prio; chassis_prio = strtok_r(NULL, ", ", &save_ptr1)) {
+
+        char *save_ptr2 = NULL;
+        char *chassis_name = strtok_r(chassis_prio, ":", &save_ptr2);
+        char *prio = strtok_r(NULL, ":", &save_ptr2);
+
+        if (strlen(chassis_name) > UUID_LEN) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+            VLOG_WARN_RL(&rl, "chassis name (%s) in redirect-chassis option "
+                              "of logical port %s is too long, ignoring.",
+                              chassis_name, binding->logical_port);
+            continue;
+        }
+
+        ovs_strzcpy(redirect_chassis[n].chassis_id, chassis_name,
+                    UUID_LEN + 1);
+
+        /* chassis with no priority get lowest priority: 0 */
+        redirect_chassis[n].prio = prio ? atoi(prio):0;
+
+        redirect_chassis = xrealloc(redirect_chassis,
+                                    sizeof *redirect_chassis * (++n + 1));
+
+    }
+
+    free(redir_chassis_str);
+
+    qsort(redirect_chassis, n, sizeof *redirect_chassis,
+          compare_chassis_prio_);
+
+    struct ovs_list *list = NULL;
+    if (n) {
+        list = xmalloc(sizeof *list);
+        ovs_list_init(list);
+
+        int i;
+        for (i=0; i<n; i++) {
+            ovs_list_push_back(list, &redirect_chassis[i].node);
+        }
+    }
+
+    return list;
+}
+
+bool
+redirect_chassis_contains(const struct ovs_list *redirect_chassis,
+                          const struct sbrec_chassis *chassis)
+{
+    struct redirect_chassis *chassis_item;
+    if (redirect_chassis) {
+        LIST_FOR_EACH (chassis_item, node, redirect_chassis) {
+            if (!strcmp(chassis_item->chassis_id, chassis->name)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void
+redirect_chassis_destroy(struct ovs_list *list)
+{
+    if (!list) {
+        return;
+    }
+    free(ovs_list_front(list));
+    free(list);
+}
+
+bool
+pb_redirect_chassis_contains(const struct sbrec_port_binding *binding,
+                             const struct sbrec_chassis *chassis)
+{
+    bool contained;
+    struct ovs_list *redirect_chassis = parse_redirect_chassis(binding);
+    contained = redirect_chassis_contains(redirect_chassis, chassis);
+    redirect_chassis_destroy(redirect_chassis);
+    return contained;
+}
diff --git a/ovn/controller/lport.h b/ovn/controller/lport.h
index fe0e430..329a71f 100644
--- a/ovn/controller/lport.h
+++ b/ovn/controller/lport.h
@@ -17,10 +17,14 @@
 #define OVN_LPORT_H 1
 
 #include <stdint.h>
+#include "lib/uuid.h"
 #include "openvswitch/hmap.h"
+#include "openvswitch/list.h"
 
 struct ovsdb_idl;
+struct sbrec_chassis;
 struct sbrec_datapath_binding;
+struct sbrec_port_binding;
 
 /* Database indexes.
  * =================
@@ -93,4 +97,28 @@ const struct sbrec_multicast_group *mcgroup_lookup_by_dp_name(
     const struct sbrec_datapath_binding *,
     const char *name);
 
+
+/* Redirect chassis parsing
+ * ========================
+ *
+ * The following structure and methods allow parsing the redirect-chassis
+ * option on chassisredirect ports. A parsed list will always be in order
+ * of priority. */
+
+struct redirect_chassis {
+    struct ovs_list node;
+    char chassis_id[UUID_LEN+1];
+    int prio;
+};
+
+struct ovs_list*
+parse_redirect_chassis(const struct sbrec_port_binding *binding);
+bool redirect_chassis_contains(const struct ovs_list *redirect_chassis,
+                               const struct sbrec_chassis *chassis);
+void redirect_chassis_destroy(struct ovs_list *list);
+
+bool pb_redirect_chassis_contains(
+        const struct sbrec_port_binding *binding,
+        const struct sbrec_chassis *chassis);
+
 #endif /* ovn/lport.h */
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 8fbe356..21893bc 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -148,6 +148,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
     struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
     sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
+    sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "chassisredirect");
     if (chassis) {
         /* This should be mostly redundant with the other clauses for port
          * bindings, but it allows us to catch any ports that are assigned to
@@ -165,10 +166,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l2);
         const struct smap l3 = SMAP_CONST1(&l3, "l3gateway-chassis", id);
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l3);
-        const struct smap redirect = SMAP_CONST1(&redirect,
-                                                 "redirect-chassis", id);
-        sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES,
-                                              &redirect);
     }
     if (local_ifaces) {
         const char *name;
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index f2d9676..1c0ff8f 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -19,8 +19,11 @@
 #include "flow.h"
 #include "lflow.h"
 #include "lport.h"
+#include "lib/bundle.h"
 #include "lib/poll-loop.h"
+#include "lib/uuid.h"
 #include "ofctrl.h"
+#include "openvswitch/list.h"
 #include "openvswitch/hmap.h"
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-actions.h"
@@ -353,8 +356,12 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         return;
     }
 
+    struct ovs_list *redirect_chassis = NULL;
+    redirect_chassis = parse_redirect_chassis(binding);
+
     if (!strcmp(binding->type, "chassisredirect")
-        && binding->chassis == chassis) {
+        && (binding->chassis == chassis ||
+            redirect_chassis_contains(redirect_chassis, chassis))) {
 
         /* Table 33, priority 100.
          * =======================
@@ -413,7 +420,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
                         &match, ofpacts_p);
-        return;
+
+        goto out;
     }
 
     /* Find the OpenFlow port for the logical port, as 'ofport'.  This is
@@ -442,7 +450,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
     bool is_remote = false;
     if (binding->parent_port && *binding->parent_port) {
         if (!binding->tag) {
-            return;
+            goto out;
         }
         ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
                                       binding->parent_port));
@@ -460,27 +468,34 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         }
     }
 
+    bool is_ha_remote = false;
     const struct chassis_tunnel *tun = NULL;
     const struct sbrec_port_binding *localnet_port =
         get_localnet_port(local_datapaths, dp_key);
     if (!ofport) {
         /* It is remote port, may be reached by tunnel or localnet port */
         is_remote = true;
-        if (!binding->chassis) {
-            return;
-        }
         if (localnet_port) {
             ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
                                           localnet_port->logical_port));
             if (!ofport) {
-                return;
+                goto out;
             }
         } else {
-            tun = chassis_tunnel_find(binding->chassis->name);
-            if (!tun) {
-                return;
+            if (!redirect_chassis || ovs_list_is_short(redirect_chassis)) {
+                /* It's on a single remote chassis */
+                if (!binding->chassis) {
+                    goto out;
+                }
+                tun = chassis_tunnel_find(binding->chassis->name);
+                if (!tun) {
+                    goto out;
+                }
+                ofport = tun->ofport;
+            } else {
+                /* It's distributed across the "redirect_chassis" list */
+                is_ha_remote = true;
             }
-            ofport = tun->ofport;
         }
     }
 
@@ -575,7 +590,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         }
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
                         &match, ofpacts_p);
-    } else if (!tun) {
+    } else if (!tun && !is_ha_remote) {
         /* Remote port connected by localnet port */
         /* Table 33, priority 100.
          * =======================
@@ -598,7 +613,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
                         &match, ofpacts_p);
-    } else {
+    } else  {
         /* Remote port connected by tunnel */
 
         /* Table 32, priority 100.
@@ -615,14 +630,79 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
         match_set_metadata(&match, htonll(dp_key));
         match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
 
-        put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
-                          port_key, ofpacts_p);
+        if (!is_ha_remote) {
+            /* Setup encapsulation */
+            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
+                              port_key, ofpacts_p);
+            /* Output to tunnel. */
+            ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+        } else {
+            struct redirect_chassis *chassis;
+            /* Make sure all tunnel endpoints use the same encapsulation,
+             * and set it up */
+            LIST_FOR_EACH (chassis, node, redirect_chassis) {
+                if (!tun) {
+                    tun = chassis_tunnel_find(chassis->chassis_id);
+                } else {
+                    struct chassis_tunnel *chassis_tunnel;
+                    chassis_tunnel = chassis_tunnel_find(chassis->chassis_id);
+                    if (chassis_tunnel &&
+                        tun->type != chassis_tunnel->type) {
+                        static struct vlog_rate_limit rl =
+                            VLOG_RATE_LIMIT_INIT(1, 1);
+                        VLOG_ERR_RL(&rl, "Port %s has redirect-chassis with "
+                                         "mixed encapsulations, only uniform "
+                                         "encapsulations are supported.",
+                                    binding->logical_port);
+                        goto out;
+                    }
+                }
+            }
+            if (!tun) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,1);
+                VLOG_ERR_RL(&rl, "No tunnel endpoint found for gateways in "
+                                 "redirect-chassis of port %s",
+                            binding->logical_port);
+                goto out;
+            }
+
+            put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
+                              port_key, ofpacts_p);
 
-        /* Output to tunnel. */
-        ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+            /* Output to tunnels with active/backup */
+            struct ofpact_bundle *bundle = ofpact_put_BUNDLE(ofpacts_p);
+
+            LIST_FOR_EACH (chassis, node, redirect_chassis) {
+                tun = chassis_tunnel_find(chassis->chassis_id);
+                if (!tun) {
+                    continue;
+                }
+                if (bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
+                    static struct vlog_rate_limit rl =
+                            VLOG_RATE_LIMIT_INIT(1, 1);
+                    VLOG_WARN_RL(&rl, "Remote endpoints for port beyond "
+                                      "BUNDLE_MAX_SLAVES");
+                    break;
+                }
+                ofpbuf_put(ofpacts_p, &tun->ofport,
+                           sizeof tun->ofport);
+                bundle->n_slaves++;
+            }
+
+            ofpact_finish_BUNDLE(ofpacts_p, &bundle);
+            bundle->algorithm = NX_BD_ALG_ACTIVE_BACKUP;
+            /* Although ACTIVE_BACKUP bundle algorithm seems to ignore
+             * the next two fields, those are always set */
+            bundle->basis = 0;
+            bundle->fields = NX_HASH_FIELDS_ETH_SRC;
+        }
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
                         &match, ofpacts_p);
     }
+out:
+    if (redirect_chassis) {
+        redirect_chassis_destroy(redirect_chassis);
+    }
 }
 
 static void
-- 
1.8.3.1



More information about the dev mailing list