[ovs-dev] [PATCH RFC] OVN: Openstack floating ip support

Chandra S Vejendla csvejend at us.ibm.com
Tue Mar 22 21:19:07 UTC 2016


This patch adds distributed floating ip support for ovn. The assumption made
here is that the external network is a single L2 broadcast domain and all the
chassis have connectivity to the external network.

2 new tables are added in the LROUTER pipeline IN_IP_DNAT & IP_IN_SNAT.
IN_IP_DNAT will modify the dst ip of the packet from floating ip to vm ip.
IN_IP_SNAT will modify the src ip of the packet from vm ip to floating ip.

Rules in IN_IP_DNAT:
- Priority 100 rule to set the reg2 to 0x1 if dst & src networks are
  connectected via a router and both the networks are private.
- Priority 90 rule to modify the dst ip from floating ip to vm ip.
- Priority 0 rule to go to next table.

Rules in IN_IP_SNAT:
- Priority 100 rule to skip modifying the src ip when reg2 is set to 0x1
- Priority 90 rule to modify the src ip from vm ip to floating ip and dst mac
  to floating ip port mac if the packet is egressing via the gateway port
- Priority 50 rule to modify the src ip from vm ip to floating ip
- Priority 0 rule to go to next table.

Priority 100 rules in IN_IP_DNAT and IN_IP_SNAT serves 2 purposes.
- Avoid NAT when vms in different LSWITCHES connected via a LROUTER talk to
  each other using private ips.
- When 2 VMs connected to the same LSWITCH or different LSWITCHES connected
  via a router try to talk to each other, the dst ip of the packet should
  first be DNATed and then the src ip should be SNATed.

The initial design was to stage DNAT in the ingress pipeline and the SNAT in
the egress pipeline, but now both the stages are in the ingress pipeline. This
was done to solve the cases highlighted above [Priority 100 rules]. There is a
need to use information from DNAT stage when SNAT is being processed. This
would require an explicit register to be burnt to store the information.

Flows modified in the LSWITCH pipeline

Rules in IN_PORT_SEC:
- Priority 50 rule to allow packets ingressing the LSWITCH router port
  with a src mac of floating ip port

Rules in ARP_RSP:
- Priority 150 rule to respond to arp request for floating ip. To prevent arp
  responses for floating ip's from all the chassis, "lport" option is set in
  the external_id's column of the lflow table. lport will point to the vif-id of
  the vm that is associated with the floating ip. When ovn-controller is
  processing the flows, if it sees an lport option set in the external_ids
  column, it will install this lflow only if the lport is a local port on the
  chassis.

Rules in L2_LKUP:
- Priority 50 rule to set the outport to the lrouter port when the dst mac
  matches the floating ip mac

Rules in OUT_PORT_SEC:
- Priority 50 rule to allow packet egressing the lrouter port with a mac of a
  floating ip port.

Had to increase MAX_RESUBMIT_RECURSION from 64 to 96. When 2 VMs connected
via vm1->LS->LR->LS->LR->LS->vm2 are trying to talk to each other, the
resubmits are exceeding the existing 64 limit.

When a floating ip is associated with a VM ip, NB will set the options of the
floating ip lport to "fixed-ip-port=<lport of vif>, router-port=<lport of the
logical router port".

If you want to try out this patch with openstack, add the following patch [1]
to networking-ovn.

[1] https://review.openstack.org/#/c/295547/
---
 ofproto/ofproto-dpif-xlate.c    |   2 +-
 ovn/controller/binding.c        |  24 ++-
 ovn/controller/binding.h        |   4 +-
 ovn/controller/lflow.c          |  21 ++-
 ovn/controller/lflow.h          |   3 +-
 ovn/controller/ovn-controller.c |   7 +-
 ovn/northd/ovn-northd.c         | 360 +++++++++++++++++++++++++++++++++++++---
 7 files changed, 378 insertions(+), 43 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 67504e8..4a5aae2 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -68,7 +68,7 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
 
 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  * flow translation. */
-#define MAX_RESUBMIT_RECURSION 64
+#define MAX_RESUBMIT_RECURSION 96
 #define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules in
                                       internal table. */
 
diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index d3ca9c9..f4e0f4a 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -49,7 +49,7 @@ binding_register_ovs_idl(struct ovsdb_idl *ovs_idl)
                          &ovsrec_interface_col_ingress_policing_burst);
 }
 
-static void
+void
 get_local_iface_ids(const struct ovsrec_bridge *br_int, struct shash *lports)
 {
     int i;
@@ -149,7 +149,8 @@ update_qos(const struct ovsrec_interface *iface_rec,
 void
 binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
             const char *chassis_id, struct simap *ct_zones,
-            unsigned long *ct_zone_bitmap, struct hmap *local_datapaths)
+            unsigned long *ct_zone_bitmap, struct hmap *local_datapaths,
+            struct sset *all_lports)
 {
     const struct sbrec_chassis *chassis_rec;
     const struct sbrec_port_binding *binding_rec;
@@ -167,10 +168,9 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
          * We'll remove our chassis from all port binding records below. */
     }
 
-    struct sset all_lports = SSET_INITIALIZER(&all_lports);
     struct shash_node *node;
     SHASH_FOR_EACH (node, &lports) {
-        sset_add(&all_lports, node->name);
+        sset_add(all_lports, node->name);
     }
 
     /* Run through each binding record to see if it is resident on this
@@ -181,10 +181,10 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
             = shash_find_and_delete(&lports, binding_rec->logical_port);
         if (iface_rec
             || (binding_rec->parent_port && binding_rec->parent_port[0] &&
-                sset_contains(&all_lports, binding_rec->parent_port))) {
+                sset_contains(all_lports, binding_rec->parent_port))) {
             if (binding_rec->parent_port && binding_rec->parent_port[0]) {
                 /* Add child logical port to the set of all local ports. */
-                sset_add(&all_lports, binding_rec->logical_port);
+                sset_add(all_lports, binding_rec->logical_port);
             }
             add_local_datapath(local_datapaths, binding_rec);
             if (iface_rec && ctx->ovs_idl_txn) {
@@ -217,7 +217,14 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
              * to list them in all_lports because we want to allocate
              * a conntrack zone ID for each one, as we'll be creating
              * a patch port for each one. */
-            sset_add(&all_lports, binding_rec->logical_port);
+            sset_add(all_lports, binding_rec->logical_port);
+        }
+        else if (!binding_rec->chassis
+                           && !strcmp(binding_rec->type, "floating-ip")) {
+            const char *peer = smap_get(&binding_rec->options, "peer");
+            if (peer && sset_contains(all_lports, peer)) {
+                    add_local_datapath(local_datapaths, binding_rec);
+            }
         }
     }
 
@@ -225,10 +232,9 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
         VLOG_DBG("No port binding record for lport %s", node->name);
     }
 
-    update_ct_zones(&all_lports, ct_zones, ct_zone_bitmap);
+    update_ct_zones(all_lports, ct_zones, ct_zone_bitmap);
 
     shash_destroy(&lports);
-    sset_destroy(&all_lports);
 }
 
 /* Returns true if the database is all cleaned up, false if more work is
diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
index 6e19c10..73e6b0c 100644
--- a/ovn/controller/binding.h
+++ b/ovn/controller/binding.h
@@ -24,11 +24,13 @@ struct hmap;
 struct ovsdb_idl;
 struct ovsrec_bridge;
 struct simap;
+struct sset;
 
 void binding_register_ovs_idl(struct ovsdb_idl *);
 void binding_run(struct controller_ctx *, const struct ovsrec_bridge *br_int,
                  const char *chassis_id, struct simap *ct_zones,
-                 unsigned long *ct_zone_bitmap, struct hmap *local_datapaths);
+                 unsigned long *ct_zone_bitmap, struct hmap *local_datapaths,
+                 struct sset *all_lports);
 bool binding_cleanup(struct controller_ctx *, const char *chassis_id);
 
 #endif /* ovn/binding.h */
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 0614a54..a59d26f 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -16,6 +16,7 @@
 #include <config.h>
 #include "lflow.h"
 #include "lport.h"
+#include "lib/sset.h"
 #include "openvswitch/dynamic-string.h"
 #include "ofctrl.h"
 #include "ofp-actions.h"
@@ -198,7 +199,8 @@ static void
 add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
                   const struct mcgroup_index *mcgroups,
                   const struct hmap *local_datapaths,
-                  const struct simap *ct_zones, struct hmap *flow_table)
+                  const struct simap *ct_zones, struct hmap *flow_table,
+                  struct sset *local_ports)
 {
     uint32_t conj_id_ofs = 1;
 
@@ -240,6 +242,18 @@ add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
             }
         }
 
+        /* The following check is specifically for floating-ip ports.
+         * This will prevent from installing the arp request rule for 
+         * floating ip, unless the lport in the flow points to a local
+         * port which is a resident on this chassis */
+        const char *lport = smap_get(&lflow->external_ids, "lport");
+        if (lport) {
+            if (!sset_contains(local_ports, lport)) {
+                continue;
+            }
+        }
+
+
         /* Determine translation of logical table IDs to physical table IDs. */
         uint8_t first_ptable = (ingress
                                 ? OFTABLE_LOG_INGRESS_PIPELINE
@@ -416,10 +430,11 @@ void
 lflow_run(struct controller_ctx *ctx, const struct lport_index *lports,
           const struct mcgroup_index *mcgroups,
           const struct hmap *local_datapaths,
-          const struct simap *ct_zones, struct hmap *flow_table)
+          const struct simap *ct_zones, struct hmap *flow_table,
+          struct sset *local_ports)
 {
     add_logical_flows(ctx, lports, mcgroups, local_datapaths,
-                      ct_zones, flow_table);
+                      ct_zones, flow_table, local_ports);
     add_neighbor_flows(ctx, lports, flow_table);
 }
 
diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index ff823d4..3147e5c 100644
--- a/ovn/controller/lflow.h
+++ b/ovn/controller/lflow.h
@@ -41,6 +41,7 @@ struct lport_index;
 struct mcgroup_index;
 struct simap;
 struct uuid;
+struct sset;
 
 /* OpenFlow table numbers.
  *
@@ -63,7 +64,7 @@ void lflow_run(struct controller_ctx *, const struct lport_index *,
                const struct mcgroup_index *,
                const struct hmap *local_datapaths, 
                const struct simap *ct_zones,
-               struct hmap *flow_table);
+               struct hmap *flow_table, struct sset *local_ports);
 void lflow_destroy(void);
 
 #endif /* ovn/lflow.h */
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index e52b731..3e0b8e3 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -33,6 +33,7 @@
 #include "encaps.h"
 #include "fatal-signal.h"
 #include "hmap.h"
+#include "sset.h"
 #include "lflow.h"
 #include "lib/vswitch-idl.h"
 #include "lport.h"
@@ -284,12 +285,13 @@ main(int argc, char *argv[])
 
         const struct ovsrec_bridge *br_int = get_br_int(&ctx);
         const char *chassis_id = get_chassis_id(ctx.ovs_idl);
+        struct sset local_ports = SSET_INITIALIZER(&local_ports);
 
         if (chassis_id) {
             chassis_run(&ctx, chassis_id);
             encaps_run(&ctx, br_int, chassis_id);
             binding_run(&ctx, br_int, chassis_id, &ct_zones, ct_zone_bitmap,
-                    &local_datapaths);
+                    &local_datapaths, &local_ports);
         }
 
         if (br_int) {
@@ -306,7 +308,8 @@ main(int argc, char *argv[])
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
             lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
-                      &ct_zones, &flow_table);
+                      &ct_zones, &flow_table, &local_ports);
+            sset_destroy(&local_ports);
             if (chassis_id) {
                 physical_run(&ctx, mff_ovn_geneve,
                              br_int, chassis_id, &ct_zones, &flow_table,
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 598bbe3..12e7ebd 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -102,9 +102,11 @@ enum ovn_stage {
     /* Logical router ingress stages. */                              \
     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
-    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "lr_in_ip_routing")   \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 3, "lr_in_arp_resolve")  \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 4, "lr_in_arp_request")  \
+    PIPELINE_STAGE(ROUTER, IN,  IP_DNAT,     2, "lr_in_ip_dnat")      \
+    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  3, "lr_in_ip_routing")   \
+    PIPELINE_STAGE(ROUTER, IN,  IP_SNAT,     4, "lr_in_ip_snat")      \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 5, "lr_in_arp_resolve")  \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 6, "lr_in_arp_request")  \
                                                                       \
     /* Logical router egress stages. */                               \
     PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "lr_out_delivery")
@@ -479,6 +481,7 @@ struct ovn_port {
     ovs_be32 ip, mask;          /* 192.168.10.123/24. */
     ovs_be32 network;           /* 192.168.10.0. */
     ovs_be32 bcast;             /* 192.168.10.255. */
+    ovs_be32 fixed_ip;          /* fixed-ip for floating-ip */
     struct eth_addr mac;
     struct ovn_port *peer;
 
@@ -541,6 +544,20 @@ ovn_port_allocate_key(struct ovn_datapath *od)
                           (1u << 15) - 1, &od->port_key_hint);
 }
 
+static const char *
+get_router_port_for_floating_ip(struct ovn_port *op, struct hmap *ports)
+{
+    const char *lrp_name = smap_get(&op->nbs->options, "router-port");
+    if (lrp_name) {
+        struct ovn_port *lrp = ovn_port_find(ports, lrp_name);
+        if (lrp && lrp->nbs)
+        {
+            return lrp->json_key;
+        }
+    }
+    return op->json_key;
+}
+
 static void
 join_logical_ports(struct northd_context *ctx,
                    struct hmap *datapaths, struct hmap *ports,
@@ -671,10 +688,35 @@ join_logical_ports(struct northd_context *ctx,
             op->peer = ovn_port_find(ports, op->nbr->name);
         }
     }
+
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (op->nbs && !strcmp(op->nbs->type, "floating-ip")) {
+            const char *peer_name = smap_get(&op->nbs->options,
+                                             "fixed-ip-port");
+            if (!peer_name) {
+                continue;
+            }
+
+            struct ovn_port *peer = ovn_port_find(ports, peer_name);
+            if (!peer || !peer->nbs) {
+                continue;
+            }
+            struct eth_addr mac;
+            ovs_be32 ip;
+
+            /* Not sure if a port with multiple IP addresses can be 
+             * mapped to a floating-ip. For now, just using first ip */
+            if (ovs_scan(peer->nbs->addresses[0],
+                     ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
+                     ETH_ADDR_SCAN_ARGS(mac), IP_SCAN_ARGS(&ip))) {
+                op->fixed_ip = ip;
+            }
+        }
+    }
 }
 
 static void
-ovn_port_update_sbrec(const struct ovn_port *op)
+ovn_port_update_sbrec(const struct ovn_port *op, struct hmap *ports)
 {
     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
     if (op->nbr) {
@@ -688,7 +730,20 @@ ovn_port_update_sbrec(const struct ovn_port *op)
         sbrec_port_binding_set_tag(op->sb, NULL, 0);
         sbrec_port_binding_set_mac(op->sb, NULL, 0);
     } else {
-        if (strcmp(op->nbs->type, "router")) {
+        if (op->nbs && !strcmp(op->nbs->type, "floating-ip")) {
+            const char *peer_name = smap_get(&op->nbs->options,
+                                             "fixed-ip-port");
+            if (peer_name) {
+                struct ovn_port *peer = ovn_port_find(ports, peer_name);
+                if (peer) {
+                    const struct smap ids = SMAP_CONST1(&ids, "peer",
+                                             peer_name);
+                    sbrec_port_binding_set_options(op->sb, &ids);
+                }
+            }
+            sbrec_port_binding_set_type(op->sb, op->nbs->type);
+        }
+        else if (strcmp(op->nbs->type, "router")) {
             sbrec_port_binding_set_type(op->sb, op->nbs->type);
             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
         } else {
@@ -727,7 +782,7 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
      * record based on northbound data.  Also index the in-use tunnel_keys. */
     struct ovn_port *op, *next;
     LIST_FOR_EACH_SAFE (op, next, list, &both) {
-        ovn_port_update_sbrec(op);
+        ovn_port_update_sbrec(op, ports);
 
         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
         if (op->sb->tunnel_key > op->od->port_key_hint) {
@@ -743,7 +798,7 @@ build_ports(struct northd_context *ctx, struct hmap *datapaths,
         }
 
         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
-        ovn_port_update_sbrec(op);
+        ovn_port_update_sbrec(op, ports);
 
         sbrec_port_binding_set_logical_port(op->sb, op->key);
         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
@@ -869,6 +924,8 @@ struct ovn_lflow {
     uint16_t priority;
     char *match;
     char *actions;
+    char *lport; /* is not null, indicates that the flow should be installed
+                    on a chassis if the lport is local to that chassis */
 };
 
 static size_t
@@ -900,6 +957,18 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
     lflow->priority = priority;
     lflow->match = match;
     lflow->actions = actions;
+    lflow->lport = NULL;
+}
+
+static void
+ovn_lflow_lport_set(struct ovn_lflow *lflow, const char *lport_name)
+{
+    if (lport_name) {
+        lflow->lport = xstrdup(lport_name);
+    }
+    else {
+        lflow->lport = NULL;
+    }
 }
 
 /* Adds a row with the specified contents to the Logical_Flow table. */
@@ -1155,7 +1224,8 @@ build_port_security_ipv6_flow(
  *   - Priority 80 flow to drop ARP and IPv6 ND packets.
  */
 static void
-build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
+build_port_security_nd(struct ovn_port *op, struct hmap *lflows,
+                       struct hmap *ports)
 {
     for (size_t i = 0; i < op->nbs->n_port_security; i++) {
         struct lport_addresses ps;
@@ -1168,11 +1238,19 @@ build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
 
         bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
         struct ds match = DS_EMPTY_INITIALIZER;
+ 
+        const char *inport = NULL;
+        if (!strcmp(op->nbs->type, "floating-ip")) {
+            inport = get_router_port_for_floating_ip(op, ports);
+        }
+        else {
+            inport = op->json_key;
+        }
 
         if (ps.n_ipv4_addrs || no_ip) {
             ds_put_format(
                 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
-                ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
+                ETH_ADDR_FMT, inport, ETH_ADDR_ARGS(ps.ea),
                 ETH_ADDR_ARGS(ps.ea));
 
             if (ps.n_ipv4_addrs) {
@@ -1228,7 +1306,7 @@ build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
  */
 static void
 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
-                       struct hmap *lflows)
+                       struct hmap *lflows, struct hmap *ports)
 {
     char *port_direction;
     enum ovn_stage stage;
@@ -1250,16 +1328,25 @@ build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
             continue;
         }
 
+        const char *port = NULL;
+        if (!strcmp(op->nbs->type, "floating-ip")) {
+            port = get_router_port_for_floating_ip(op, ports);
+        }
+        else {
+            port = op->json_key;
+        }
+
+
         if (ps.n_ipv4_addrs) {
             struct ds match = DS_EMPTY_INITIALIZER;
             if (pipeline == P_IN) {
                 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
-                              " && ip4.src == {0.0.0.0, ", op->json_key,
+                              " && ip4.src == {0.0.0.0, ", port,
                               ETH_ADDR_ARGS(ps.ea));
             } else {
                 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
                               " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
-                              op->json_key, ETH_ADDR_ARGS(ps.ea));
+                              port, ETH_ADDR_ARGS(ps.ea));
             }
 
             for (int i = 0; i < ps.n_ipv4_addrs; i++) {
@@ -1525,18 +1612,26 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
+        const char *inport = NULL;
+        if (!strcmp(op->nbs->type, "floating-ip")) {
+            inport = get_router_port_for_floating_ip(op, ports);
+        }
+        else {
+            inport = op->json_key;
+        }
+
         struct ds match = DS_EMPTY_INITIALIZER;
-        ds_put_format(&match, "inport == %s", op->json_key);
+        ds_put_format(&match, "inport == %s", inport);
         build_port_security_l2(
-            "eth.src", op->nbs->port_security, op->nbs->n_port_security,
+			"eth.src", op->nbs->port_security, op->nbs->n_port_security,
             &match);
         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
                       ds_cstr(&match), "next;");
         ds_destroy(&match);
 
         if (op->nbs->n_port_security) {
-            build_port_security_ip(P_IN, op, lflows);
-            build_port_security_nd(op, lflows);
+            build_port_security_ip(P_IN, op, lflows, ports);
+            build_port_security_nd(op, lflows, ports);
         }
     }
 
@@ -1578,10 +1673,26 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
          *  - port is up or
          *  - port type is router
          */
-        if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
+        if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router") &&
+                                     strcmp(op->nbs->type, "floating-ip")) {
             continue;
         }
 
+        uint16_t priority = 0;
+        if (!strcmp(op->nbs->type, "floating-ip")) {
+            const char *peer_name = smap_get(&op->nbs->options,
+                                             "fixed-ip-port");
+            if (peer_name) {
+                priority = 150;
+            }
+            else {
+                priority = 50;
+            }
+        }
+        else {
+            priority = 50;
+        }
+
         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
             struct lport_addresses laddrs;
             if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
@@ -1606,8 +1717,20 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                     ETH_ADDR_ARGS(laddrs.ea),
                     ETH_ADDR_ARGS(laddrs.ea),
                     IP_ARGS(laddrs.ipv4_addrs[j].addr));
-                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
+                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, priority,
                               match, actions);
+                if (!strcmp(op->nbs->type, "floating-ip")) {
+                    const char *peer_name = smap_get(&op->nbs->options,
+                                                     "fixed-ip-port");
+                    struct ovn_lflow *lflow = ovn_lflow_find(lflows, op->od,
+                            S_SWITCH_IN_ARP_RSP, priority, match, actions);
+                    /* Setting the lport option in external_ids of lflow, so
+                     * that the controller will pick up this flow only if the
+                     * lport is a local port on the chassis */
+                    if (lflow) {
+                        ovn_lflow_lport_set(lflow, peer_name);
+                    }
+                }
                 free(match);
                 free(actions);
             }
@@ -1662,8 +1785,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
                               ETH_ADDR_ARGS(mac));
 
+                const char *outport = NULL;
+                if (!strcmp(op->nbs->type, "floating-ip")) {
+                    outport = get_router_port_for_floating_ip(op, ports);
+                }
+                else {
+                    outport = op->json_key;
+                }
                 ds_init(&actions);
-                ds_put_format(&actions, "outport = %s; output;", op->json_key);
+                ds_put_format(&actions, "outport = %s; output;", outport);
                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                               ds_cstr(&match), ds_cstr(&actions));
                 ds_destroy(&actions);
@@ -1722,8 +1852,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
+        const char *outport = NULL;
+        if (!strcmp(op->nbs->type, "floating-ip")) {
+            outport = get_router_port_for_floating_ip(op, ports);
+        }
+        else {
+            outport = op->json_key;
+        }
         struct ds match = DS_EMPTY_INITIALIZER;
-        ds_put_format(&match, "outport == %s", op->json_key);
+        ds_put_format(&match, "outport == %s", outport);
         if (lport_is_enabled(op->nbs)) {
             build_port_security_l2("eth.dst", op->nbs->port_security,
                                    op->nbs->n_port_security, &match);
@@ -1737,7 +1874,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         ds_destroy(&match);
 
         if (op->nbs->n_port_security) {
-            build_port_security_ip(P_OUT, op, lflows);
+            build_port_security_ip(P_OUT, op, lflows, ports);
         }
     }
 }
@@ -1819,6 +1956,48 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         free(match);
     }
 
+    /* Logical router ingress table 0: match (priority 50).
+     * The following rules allow packets with mac address 
+     * of floating ip ports ingressing on a logical router port */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!(od->nbr && od->gateway_port)) {
+            continue;
+        }
+        struct ovn_port *lrp = od->gateway_port->peer;  
+        if (!lrp) {
+            VLOG_ERR("No peer port for logical router port %s",
+                        od->gateway_port->key);
+            continue;
+        }
+        const struct nbrec_logical_switch *nbs = lrp->od->nbs;
+        for (size_t i = 0 ; i < nbs->n_ports ; i++) {
+            if (nbs->ports[i] && !strcmp(nbs->ports[i]->type, "floating-ip")) {
+                const char *peer_name = smap_get(&nbs->ports[i]->options,
+                                                 "fixed-ip-port");
+                const char *lrp_name = smap_get(&nbs->ports[i]->options,
+                                                 "router-port");
+                if (!peer_name || !lrp_name) {
+                    continue;
+                }
+                if (strcmp(lrp_name, lrp->key)) {
+                    continue;
+                }
+                for (size_t j = 0; j < nbs->ports[i]->n_addresses; j++) {
+                    struct eth_addr mac;
+                    char *match;
+                    if (eth_addr_from_string(nbs->ports[i]->addresses[j], &mac)) {
+                        match = xasprintf("(eth.mcast || eth.dst == "
+                           ETH_ADDR_FMT") && inport == %s",
+                           ETH_ADDR_ARGS(mac), od->gateway_port->json_key);
+                        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION,
+                                      50, match, "next;");
+                        free(match);
+                    }
+                }
+            }
+        }
+    }
+
     /* Logical router ingress table 1: IP Input. */
     HMAP_FOR_EACH (od, key_node, datapaths) {
         if (!od->nbr) {
@@ -1928,7 +2107,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         free(match);
     }
 
-    /* Logical router ingress table 2: IP Routing.
+    /* Logical router ingress table 3: IP Routing.
      *
      * A packet that arrives at this table is an IP packet that should be
      * routed to the address in ip4.dst. This table sets outport to the correct
@@ -1953,7 +2132,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
     }
     /* XXX destination unreachable */
 
-    /* Local router ingress table 3: ARP Resolution.
+    /* Local router ingress table 5: ARP Resolution.
      *
      * Any packet that reaches this table is an IP packet whose next-hop IP
      * address is in reg0. (ip4.dst is the final destination.) This table
@@ -2021,7 +2200,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                       "get_arp(outport, reg0); next;");
     }
 
-    /* Local router ingress table 4: ARP request.
+    /* Local router ingress table 6: ARP request.
      *
      * In the common case where the Ethernet destination has been resolved,
      * this table outputs the packet (priority 100).  Otherwise, it composes
@@ -2042,6 +2221,131 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
     }
 
+    /* DNAT & SNAT tables /
+     *
+     * Priority 100 rule in IN_IP_DNAT to set reg2 to 0x1 if dst ip &
+     * src ip networks are connected via the same router.
+     *
+     * Priority 100 rule in IN_IP_SNAT to skip modifying the src ip when
+     * reg2 is set to 0x1.
+     *
+     * Priority 90 rule in IN_IP_DNAT to modify dst ip from floating-ip
+     * vm-ip.
+     *
+     * Priority 90 rule in IN_IP_SNAT to modify src ip from vm ip to
+     * floating ip and dst mac to floating ip port mac if the packet is
+     * egressing via the gateway port.
+     *
+     * Priority 50 rule in IP_IP_SNAT to modify src ip from vm ip to 
+     * floating ip.
+     *
+     * Pririty 0 rule to go to next table if none of the above rules match.
+     */
+
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!(op->nbs && !strcmp(op->nbs->type, "floating-ip"))) {
+            continue;
+        }
+        const char *peer_name = smap_get(&op->nbs->options, "fixed-ip-port");
+        const char *lrp_name = smap_get(&op->nbs->options, "router-port");
+        if (!peer_name || !lrp_name) {
+            continue;
+        }
+        struct ovn_port *lrp = ovn_port_find(ports, lrp_name);
+        if (!lrp) {
+            continue;
+        }
+        for (size_t i = 0; i < op->nbs->n_addresses; i++) {
+            char *match;
+            char *actions;
+            struct eth_addr mac;
+            ovs_be32 ip;
+            if (ovs_scan(op->nbs->addresses[i],
+                     ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
+                     ETH_ADDR_SCAN_ARGS(mac), IP_SCAN_ARGS(&ip))) {
+                match = xasprintf("ip4.dst == "IP_FMT"", IP_ARGS(ip));
+                actions = xasprintf("ip4.dst = "IP_FMT"; inport = \"\"; next;",
+                                     IP_ARGS(op->fixed_ip));
+                ovn_lflow_add(lflows, lrp->peer->od, S_ROUTER_IN_IP_DNAT,
+                                90, match, actions);
+                free(match);
+                free(actions);
+
+                match = xasprintf("(ip4.src == "IP_FMT") && outport == %s",
+                        IP_ARGS(op->fixed_ip), lrp->peer->json_key);
+                actions = xasprintf("eth.src = "ETH_ADDR_FMT";"
+                                    " ip4.src = "IP_FMT"; next;",
+                                    ETH_ADDR_ARGS(mac), IP_ARGS(ip));
+                ovn_lflow_add(lflows, lrp->peer->od,
+                              S_ROUTER_IN_IP_SNAT, 90, match, actions);
+                free(match);
+                free(actions);
+
+                match = xasprintf("ip4.src == "IP_FMT"", 
+                                   IP_ARGS(op->fixed_ip));
+                actions = xasprintf("ip4.src = "IP_FMT"; next;",IP_ARGS(ip));
+                ovn_lflow_add(lflows, lrp->peer->od,
+                              S_ROUTER_IN_IP_SNAT, 50, match, actions);
+                free(match);
+                free(actions);
+            }
+        }
+    }
+
+    HMAP_FOR_EACH(od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* Default rules for DNAT & SNAT tables with priority 0. */
+        if (od->gateway_port) {
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_DNAT, 0, "1", "next;");
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_SNAT, 0, "1", "next;");
+        }
+
+        /* The following rules in DNAT & SNAT tables will prevent NAT when the
+         * src & dst ips belong to private networks that are connected via a
+         * router */
+        bool add_snat_flow = false;
+        for (size_t j = 0; j < od->nbr->n_ports;j++) {
+            if (od->gateway_port && !strcmp(od->nbr->ports[j]->name,
+                                            od->gateway_port->key)) {
+                continue;
+            }
+            ovs_be32 ip1, ip2, mask1, mask2;
+            char *error = ip_parse_masked(od->nbr->ports[j]->network, &ip1, &mask1);
+            if (error || mask1 == OVS_BE32_MAX || !ip_is_cidr(mask1)) {
+                free(error);
+                continue;
+            }
+            for (size_t l = 0; l < od->nbr->n_ports;l++) {
+                if ((l == j) || (od->gateway_port && 
+                                    !strcmp(od->nbr->ports[l]->name,
+                                            od->gateway_port->key))) {
+                    continue;
+                }
+                char *error = ip_parse_masked(od->nbr->ports[l]->network, &ip2, &mask2);
+                if (error || mask2 == OVS_BE32_MAX || !ip_is_cidr(mask2)) {
+                    free(error);
+                    continue;
+                }
+                char *match = xasprintf("(ip4.src == "IP_FMT"/"IP_FMT") && "
+                                   "(ip4.dst == "IP_FMT"/"IP_FMT")",
+                                   IP_ARGS(ip1 & mask1), IP_ARGS(mask1),
+                                   IP_ARGS(ip2 & mask2), IP_ARGS(mask2));
+                ovn_lflow_add(lflows, od,
+                              S_ROUTER_IN_IP_DNAT, 100, match,
+                              "reg2 = 1; next;"); 
+                free(match);
+                add_snat_flow = true;
+            }
+        }
+        if (add_snat_flow) {
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_SNAT, 100,
+                          "reg2 == 1", "next;"); 
+        }
+    }
+
     /* Logical router egress table 0: Delivery (priority 100).
      *
      * Priority 100 rules deliver packets to enabled logical ports. */
@@ -2111,8 +2415,12 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
         sbrec_logical_flow_set_match(sbflow, lflow->match);
         sbrec_logical_flow_set_actions(sbflow, lflow->actions);
 
-        const struct smap ids = SMAP_CONST1(&ids, "stage-name",
-                                            ovn_stage_to_str(lflow->stage));
+        struct smap ids;
+        smap_init(&ids);
+        if (lflow->lport) {
+            smap_add(&ids, "lport", lflow->lport);
+        }
+        smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
         sbrec_logical_flow_set_external_ids(sbflow, &ids);
 
         ovn_lflow_destroy(&lflows, lflow);
-- 
2.6.1




More information about the dev mailing list