[ovs-dev] [PATCH v6] ovn: Connect to remote lports through localnet port.

Han Zhou zhouhan at gmail.com
Thu Feb 25 18:12:21 UTC 2016


Before this patch, inter-chassis communication between VIFs of same
lswitch will always go through tunnel, which end up of modeling a
single physical network with many lswitches and pairs of lports, and
complexity in CMS like OpenStack neutron to manage the lswitches and
lports.

With this patch, inter-chassis communication can go through physical
networks via localnet port with a 1:1 mapping between lswitches and
physical networks. The pipeline becomes:

Ingress -> Egress (local) -> Ingress (remote) -> Egress

The original tunneling mechanism will still be used if there is no
localnet port configured on the lswitch.

Signed-off-by: Han Zhou <zhouhan at gmail.com>
Acked-by: Russell Bryant <russell at ovn.org>
---

Notes:
    v1->v2: rebase on master, and more updates on documents
    v2->v3: updated based on Russell's comments
    v3->v4: rebase on master, and updated ovn-nb.xml document
    v4->v5: rebase and documents updates
        - update ovn-nb.xml on the restriction for "unknown" port
        - update architecture.7.xml for the flow change in table 33
        - add notes in TODO for packet duplication problem of "unknown" ports
    v5->v6: remove the incorrect notes on restrictions and TODO

 ovn/controller/binding.c        | 14 ++++-----
 ovn/controller/ovn-controller.c | 18 +++++------
 ovn/controller/ovn-controller.h | 10 ++++++
 ovn/controller/patch.c          | 17 +++++++++--
 ovn/controller/physical.c       | 67 ++++++++++++++++++++++++++++++++++++-----
 ovn/controller/physical.h       |  3 +-
 ovn/ovn-architecture.7.xml      |  9 ++++++
 ovn/ovn-nb.xml                  | 26 ++++++++++++++--
 ovn/ovn-sb.xml                  |  5 ++-
 9 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index cb12cea..9087052 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -125,14 +125,14 @@ static void
 add_local_datapath(struct hmap *local_datapaths,
         const struct sbrec_port_binding *binding_rec)
 {
-    struct hmap_node *ld;
-    ld = hmap_first_with_hash(local_datapaths,
-                              binding_rec->datapath->tunnel_key);
-    if (!ld) {
-        ld = xmalloc(sizeof *ld);
-        hmap_insert(local_datapaths, ld,
-                    binding_rec->datapath->tunnel_key);
+    if (hmap_first_with_hash(local_datapaths,
+                             binding_rec->datapath->tunnel_key)) {
+        return;
     }
+
+    struct local_datapath *ld = xzalloc(sizeof *ld);
+    hmap_insert(local_datapaths, &ld->hmap_node,
+                binding_rec->datapath->tunnel_key);
 }
 
 static void
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 3638342..f5769b5 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -278,8 +278,8 @@ main(int argc, char *argv[])
             .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
         };
 
-        /* Contains bare "struct hmap_node"s whose hash values are the tunnel_key
-         * of datapaths with at least one local port binding. */
+        /* Contains "struct local_datpath" nodes whose hash values are the
+         * tunnel_key of datapaths with at least one local port binding. */
         struct hmap local_datapaths = HMAP_INITIALIZER(&local_datapaths);
 
         const struct ovsrec_bridge *br_int = get_br_int(&ctx);
@@ -303,20 +303,16 @@ main(int argc, char *argv[])
             lflow_run(&ctx, &flow_table, &ct_zones, &local_datapaths);
             if (chassis_id) {
                 physical_run(&ctx, mff_ovn_geneve,
-                             br_int, chassis_id, &ct_zones, &flow_table);
+                             br_int, chassis_id, &ct_zones, &flow_table,
+                             &local_datapaths);
             }
             ofctrl_put(&flow_table);
             hmap_destroy(&flow_table);
         }
 
-        /* local_datapaths contains bare hmap_node instances.
-         * We use this wrapper so that we can make use of
-         * HMAP_FOR_EACH_SAFE to tear down the hmap. */
-        struct {
-            struct hmap_node node;
-        } *cur_node, *next_node;
-        HMAP_FOR_EACH_SAFE (cur_node, next_node, node, &local_datapaths) {
-            hmap_remove(&local_datapaths, &cur_node->node);
+        struct local_datapath *cur_node, *next_node;
+        HMAP_FOR_EACH_SAFE (cur_node, next_node, hmap_node, &local_datapaths) {
+            hmap_remove(&local_datapaths, &cur_node->hmap_node);
             free(cur_node);
         }
         hmap_destroy(&local_datapaths);
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index 8c437a7..a3465eb 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -31,6 +31,16 @@ struct controller_ctx {
     struct ovsdb_idl_txn *ovs_idl_txn;
 };
 
+/* Contains hmap_node whose hash values are the tunnel_key of datapaths
+ * with at least one local port binding. It also stores the port binding of
+ * "localnet" port if such a port exists on the datapath, which indicates
+ * physical network should be used for inter-chassis communication through
+ * the localnet port */
+struct local_datapath {
+    struct hmap_node hmap_node;
+    const struct sbrec_port_binding *localnet_port;
+};
+
 const struct ovsrec_bridge *get_bridge(struct ovsdb_idl *,
                                        const char *br_name);
 
diff --git a/ovn/controller/patch.c b/ovn/controller/patch.c
index 1f981b7..23aa6c5 100644
--- a/ovn/controller/patch.c
+++ b/ovn/controller/patch.c
@@ -180,15 +180,26 @@ add_bridge_mappings(struct controller_ctx *ctx,
             continue;
         }
 
-        struct hmap_node *ld;
-        ld = hmap_first_with_hash(local_datapaths,
-                                  binding->datapath->tunnel_key);
+        struct local_datapath *ld;
+        ld = CONTAINER_OF(hmap_first_with_hash(local_datapaths,
+                          binding->datapath->tunnel_key),
+                          struct local_datapath, hmap_node);
         if (!ld) {
             /* This localnet port is on a datapath with no
              * logical ports bound to this chassis, so there's no need
              * to create patch ports for it. */
             continue;
         }
+        if (ld->localnet_port) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "localnet port '%s' already set for datapath "
+                         "'%ld', skipping the new port '%s'.",
+                         ld->localnet_port->logical_port,
+                         binding->datapath->tunnel_key,
+                         binding->logical_port);
+            continue;
+        }
+        ld->localnet_port = binding;
 
         const char *network = smap_get(&binding->options, "network_name");
         if (!network) {
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 8b12769..657c3e2 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -135,10 +135,20 @@ put_stack(enum mf_field_id field, struct ofpact_stack *stack)
     stack->subfield.n_bits = stack->subfield.field->n_bits;
 }
 
+static const struct sbrec_port_binding*
+get_localnet_port(struct hmap *local_datapaths, int64_t tunnel_key)
+{
+    struct local_datapath *ld;
+    ld = CONTAINER_OF(hmap_first_with_hash(local_datapaths, tunnel_key),
+                      struct local_datapath, hmap_node);
+    return ld ? ld->localnet_port : NULL;
+}
+
 void
 physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
              const struct ovsrec_bridge *br_int, const char *this_chassis_id,
-             const struct simap *ct_zones, struct hmap *flow_table)
+             const struct simap *ct_zones, struct hmap *flow_table,
+             struct hmap *local_datapaths)
 {
     struct simap localvif_to_ofport = SIMAP_INITIALIZER(&localvif_to_ofport);
     struct hmap tunnels = HMAP_INITIALIZER(&tunnels);
@@ -244,6 +254,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
 
         int tag = 0;
         ofp_port_t ofport;
+        bool is_remote = false;
         if (binding->parent_port && *binding->parent_port) {
             if (!binding->tag) {
                 continue;
@@ -262,19 +273,32 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
         }
 
         const struct chassis_tunnel *tun = NULL;
+        const struct sbrec_port_binding *localnet_port =
+            get_localnet_port(local_datapaths,
+                              binding->datapath->tunnel_key);
         if (!ofport) {
+            /* It is remote port, may be reached by tunnel or localnet port */
+            is_remote = true;
             if (!binding->chassis) {
                 continue;
             }
-            tun = chassis_tunnel_find(&tunnels, binding->chassis->name);
-            if (!tun) {
-                continue;
+            if (localnet_port) {
+                ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
+                                              localnet_port->logical_port));
+                if (!ofport) {
+                    continue;
+                }
+            } else {
+                tun = chassis_tunnel_find(&tunnels, binding->chassis->name);
+                if (!tun) {
+                    continue;
+                }
+                ofport = tun->ofport;
             }
-            ofport = tun->ofport;
         }
 
         struct match match;
-        if (!tun) {
+        if (!is_remote) {
             int zone_id = simap_get(ct_zones, binding->logical_port);
             /* Packets that arrive from a vif can belong to a VM or
              * to a container located inside that VM. Packets that
@@ -395,7 +419,32 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
             }
             ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
                             &match, &ofpacts);
+        } else if (!tun) {
+            /* Remote port connected by localnet port */
+            /* Table 33, priority 100.
+             * =======================
+             *
+             * Implements switching to localnet port. Each flow matches a
+             * logical output port on remote hypervisor, switch the output port
+             * to connected localnet port and resubmits to same table.
+             */
+
+            match_init_catchall(&match);
+            ofpbuf_clear(&ofpacts);
+
+            /* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
+            match_set_metadata(&match, htonll(binding->datapath->tunnel_key));
+            match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
+                          binding->tunnel_key);
+
+            put_load(localnet_port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+
+            /* Resubmit to table 33. */
+            put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
+            ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, &match,
+                            &ofpacts);
         } else {
+            /* Remote port connected by tunnel */
             /* Table 32, priority 100.
              * =======================
              *
@@ -485,7 +534,11 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
                                ? port->parent_port : port->logical_port)) {
                 put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
                 put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts);
-            } else if (port->chassis) {
+            } else if (port->chassis && !get_localnet_port(local_datapaths,
+                                             mc->datapath->tunnel_key)) {
+                /* Add remote chassis only when localnet port not exist,
+                 * otherwise multicast will reach remote ports through localnet
+                 * port. */
                 sset_add(&remote_chassis, port->chassis->name);
             }
         }
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index 2906937..826b99b 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -43,6 +43,7 @@ struct simap;
 void physical_register_ovs_idl(struct ovsdb_idl *);
 void physical_run(struct controller_ctx *, enum mf_field_id mff_ovn_geneve,
                   const struct ovsrec_bridge *br_int, const char *chassis_id,
-                  const struct simap *ct_zones, struct hmap *flow_table);
+                  const struct simap *ct_zones, struct hmap *flow_table,
+                  struct hmap *local_datapaths);
 
 #endif /* ovn/physical.h */
diff --git a/ovn/ovn-architecture.7.xml b/ovn/ovn-architecture.7.xml
index d539db8..e0ab650 100644
--- a/ovn/ovn-architecture.7.xml
+++ b/ovn/ovn-architecture.7.xml
@@ -810,6 +810,15 @@
       </p>
 
       <p>
+        A special case is that when a localnet port exists on the datapath,
+        remote port is connected by switching to the localnet port. In this
+        case, instead of adding a flow in table 32 to reach the remote port, a
+        flow is added in table 33 to switch the logical outport to the localnet
+        port, and resubmit to table 33 as if it were unicasted to a logical
+        port on the local hypervisor.
+      </p>
+
+      <p>
         Table 34 matches and drops packets for which the logical input and
         output ports are the same.  It resubmits other packets to table 48.
       </p>
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index c3b4934..73c4428 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -35,6 +35,20 @@
       Each row represents one L2 logical switch.
     </p>
 
+    <p>
+      There are two kinds of logical switches, that is, ones that fully
+      virtualize the network (overlay logical switches) and ones that provide
+      simple connectivity to a physical network (bridged logical switches).
+      They work in the same way when providing connectivity between logical
+      ports on same chasis, but differently when connecting remote logical
+      ports.  Overlay logical switches connect remote logical ports by tunnels,
+      while bridged logical switches provide connectivity to remote ports by
+      bridging the packets to directly connected physical L2 segment with the
+      help of <code>localnet</code> ports.  Each bridged logical switch has
+      one and only one <code>localnet</code> port, which has only one special
+      address <code>unknown</code>.
+    </p>
+
     <column name="name">
       <p>
         A name for the logical switch.  This name has no special meaning or purpose
@@ -116,9 +130,8 @@
           <dd>
             A connection to a locally accessible network from each
             <code>ovn-controller</code> instance.  A logical switch can only
-            have a single <code>localnet</code> port attached and at most one
-            regular logical port.  This is used to model direct connectivity to
-            an existing network.
+            have a single <code>localnet</code> port attached.  This is used
+            to model direct connectivity to an existing network.
           </dd>
 
           <dt><code>vtep</code></dt>
@@ -410,6 +423,13 @@
         Note that you can not create an ACL matching on a port with
         type=router.
       </p>
+
+      <p>
+        Note that when <code>localnet</code> port exists in a lswitch, for
+        <code>to-lport</code> direction, the <code>inport</code> works only if
+        the <code>to-lport</code> is located on the same chassis as the
+        <code>inport</code>.
+      </p>
     </column>
 
     <column name="action">
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 1ea35d5..a49a63e 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1217,9 +1217,8 @@ tcp.flags = RST;
           <dd>
             A connection to a locally accessible network from each
             <code>ovn-controller</code> instance.  A logical switch can only
-            have a single <code>localnet</code> port attached and at most one
-            regular logical port.  This is used to model direct connectivity to
-            an existing network.
+            have a single <code>localnet</code> port attached.  This is used
+            to model direct connectivity to an existing network.
           </dd>
 
           <dt><code>vtep</code></dt>
-- 
2.1.0




More information about the dev mailing list