[ovs-dev] [RFC 5/5 v2] ovn: DNAT and SNAT on a gateway router.

Gurucharan Shetty guru at ovn.org
Wed May 11 02:51:21 UTC 2016


For traffic from physical space to virtual space we need DNAT.
The DNAT happens in the gateway router and reaches the logical
port. The return traffic should be SNATed.

Traffic originating in virtual space heading to physical space
should be SNATed.

East-west traffic with the public destination IP address needs
a DNAT. This traffic is punted to the l3 gateway. This traffic
is also SNATed and eventually reaches its destination. The SNAT
is needed because we need the reverse traffic to go back to
the l3 gateway and not short-circuit directly to the source.

This commit introduces 4 new logical actions.
1. ct_snat_next: To send the packet through SNAT zone to get its
status and to resubmit to the next table. (Similar to ct_next).
2. ct_snat(IP): To SNAT to the provided IP address.
3. ct_dnat_next: To send the packet throgh DNAT zone to get its
status and to resubmit to the next table.
4. ct_dnat(IP): To DNAT to the provided IP.

The above format is used as packets need to be sent through 2
different zones. There are probably better ideas.

The current implementation does 4 recirculation for east-west
DNAT and there is potential to limit it to 2 (which I will work
on for non-RFC version.)

Command hints:

Consider a distributed router "R1" that has switch foo (192.168.1.0/24)
and bar (192.168.2.0/24) connected to it. You connect "R1" to
a gateway router "R2" via a switch "join" in (20.0.0.0/24) network.

R2 has a switch "alice" (172.16.1.0/24) connected to it (to simulate
external network).

case: Add pure DNAT (north-south)

Add a DNAT rule in R2:
ovn-nbctl set logical_router R2 dnat:"30.0.0.2"=192.168.1.2

Now alice1 should be able to ping 192.168.1.2 via 30.0.0.2.

case2 : Add pure SNAT (south-north)

Add a SNAT rule in R2:

ovn-nbctl set logical_router R2 snat:"192.168.1.0/24"=30.0.0.1

(You need a static route in R1 to send packets destined to outside
world to go through R2)

When foo1 pings alice1, alice1 receives traffic from 30.0.0.1

case3 : SNAT and DNAT (east-west traffic)

You do both case1 and case2.

Signed-off-by: Gurucharan Shetty <guru at ovn.org>
---
No v1.
---
 ovn/lib/actions.c       |  103 ++++++++++++++++++++++++
 ovn/northd/ovn-northd.c |  200 +++++++++++++++++++++++++++++++++++++++++++++--
 ovn/ovn-nb.ovsschema    |   12 ++-
 ovn/ovn-nb.xml          |   14 ++++
 4 files changed, 322 insertions(+), 7 deletions(-)

diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 5f0bf19..47ac344 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -442,6 +442,101 @@ emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
     add_prerequisite(ctx, "ip");
 }
 
+static void
+parse_ct_nat_next(struct action_context *ctx, bool snat)
+{
+    struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
+
+    if (ctx->ap->cur_ltable < ctx->ap->n_tables) {
+        ct->recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1;
+    } else {
+        action_error(ctx,
+                     "\"ct_[ds]nat_next\" action not allowed in last table.");
+        return;
+    }
+
+    if (snat) {
+        ct->zone_src.field = mf_from_id(MFF_LOG_SNAT_ZONE);
+    } else {
+        ct->zone_src.field = mf_from_id(MFF_LOG_DNAT_ZONE);
+    }
+    ct->zone_src.ofs = 0;
+    ct->zone_src.n_bits = 16;
+    ct->flags = 0;
+    ct->alg = 0;
+
+    add_prerequisite(ctx, "ip");
+}
+
+static void
+parse_ct_nat(struct action_context *ctx, bool snat)
+{
+    const size_t ct_offset = ctx->ofpacts->size;
+    ofpbuf_pull(ctx->ofpacts, ct_offset);
+
+    struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
+    if (ctx->ap->cur_ltable < ctx->ap->n_tables) {
+        ct->recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1;
+    } else {
+        action_error(ctx,
+                     "\"ct_[ds]nat\" action not allowed in last table.");
+        return;
+    }
+
+    if (snat) {
+        ct->zone_src.field = mf_from_id(MFF_LOG_SNAT_ZONE);
+    } else {
+        ct->zone_src.field = mf_from_id(MFF_LOG_DNAT_ZONE);
+    }
+    ct->zone_src.ofs = 0;
+    ct->zone_src.n_bits = 16;
+    ct->flags = 0;
+    ct->alg = 0;
+
+    add_prerequisite(ctx, "ip");
+
+    struct ofpact_nat *nat;
+    size_t nat_offset;
+    nat_offset = ctx->ofpacts->size;
+    ofpbuf_pull(ctx->ofpacts, nat_offset);
+
+    nat = ofpact_put_NAT(ctx->ofpacts);
+    nat->flags = 0;
+    nat->range_af = AF_UNSPEC;
+
+    int commit = 0;
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        ovs_be32 ip;
+        if (ctx->lexer->token.type == LEX_T_INTEGER
+            && ctx->lexer->token.format == LEX_F_IPV4) {
+            ip = ctx->lexer->token.value.ipv4;
+        } else {
+            action_syntax_error(ctx, "invalid ip");
+            return;
+        }
+
+        nat->range_af = AF_INET;
+        nat->range.addr.ipv4.min = ip;
+        if (snat) {
+            nat->flags |= NX_NAT_F_SRC;
+        } else {
+            nat->flags |= NX_NAT_F_DST;
+        }
+        commit = NX_CT_F_COMMIT;
+        lexer_get(ctx->lexer);
+        if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+            action_syntax_error(ctx, "expecting `)'");
+            return;
+        }
+    }
+
+    ctx->ofpacts->header = ofpbuf_push_uninit(ctx->ofpacts, nat_offset);
+    ct = ctx->ofpacts->header;
+    ct->flags |= commit;
+    ofpact_finish(ctx->ofpacts, &ct->ofpact);
+    ofpbuf_push_uninit(ctx->ofpacts, ct_offset);
+}
+
 static bool
 parse_action(struct action_context *ctx)
 {
@@ -469,6 +564,14 @@ parse_action(struct action_context *ctx)
         emit_ct(ctx, true, false);
     } else if (lexer_match_id(ctx->lexer, "ct_commit")) {
         emit_ct(ctx, false, true);
+    } else if (lexer_match_id(ctx->lexer, "ct_dnat_next")) {
+        parse_ct_nat_next(ctx, false);
+    } else if (lexer_match_id(ctx->lexer, "ct_dnat")) {
+        parse_ct_nat(ctx, false);
+    } else if (lexer_match_id(ctx->lexer, "ct_snat_next")) {
+        parse_ct_nat_next(ctx, true);
+    } else if (lexer_match_id(ctx->lexer, "ct_snat")) {
+        parse_ct_nat(ctx, true);
     } else if (lexer_match_id(ctx->lexer, "arp")) {
         parse_arp_action(ctx);
     } else if (lexer_match_id(ctx->lexer, "get_arp")) {
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 2cfaf95..f4c166d 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -104,12 +104,18 @@ enum ovn_stage {
     /* Logical router ingress stages. */                              \
     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
-    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "lr_in_ip_routing")   \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 3, "lr_in_arp_resolve")  \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 4, "lr_in_arp_request")  \
+    PIPELINE_STAGE(ROUTER, IN,  PRE_SNAT,    2, "lr_in_pre_snat")     \
+    PIPELINE_STAGE(ROUTER, IN,  SNAT,        3, "lr_in_snat")         \
+    PIPELINE_STAGE(ROUTER, IN,  PRE_DNAT,    4, "lr_in_pre_dnat")     \
+    PIPELINE_STAGE(ROUTER, IN,  DNAT,        5, "lr_in_dnat")         \
+    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  6, "lr_in_ip_routing")   \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 7, "lr_in_arp_resolve")  \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 8, "lr_in_arp_request")  \
                                                                       \
     /* Logical router egress stages. */                               \
-    PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "lr_out_delivery")
+    PIPELINE_STAGE(ROUTER, OUT, PRE_SNAT,  0, "lr_out_pre_snat")      \
+    PIPELINE_STAGE(ROUTER, OUT, SNAT,      1, "lr_out_snat")          \
+    PIPELINE_STAGE(ROUTER, OUT, DELIVERY,  2, "lr_out_delivery")
 
 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)   \
     S_##DP_TYPE##_##PIPELINE##_##STAGE                          \
@@ -2067,6 +2073,44 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         free(match);
         free(actions);
 
+        /* ARP handling for virtual IP addresses.
+         *
+         * DNAT IP addresses are virtual IP addresses that need ARP
+         * handling. */
+        struct smap_node *node;
+        SMAP_FOR_EACH(node, &op->od->nbr->dnat) {
+            ovs_be32 ip;
+            if (!ip_parse(node->key, &ip) || !ip) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration"
+                             "for router %s", node->key, op->key);
+                continue;
+            }
+
+            match = xasprintf(
+                "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
+                op->json_key, IP_ARGS(ip));
+            actions = xasprintf(
+                "eth.dst = eth.src; "
+                "eth.src = "ETH_ADDR_FMT"; "
+                "arp.op = 2; /* ARP reply */ "
+                "arp.tha = arp.sha; "
+                "arp.sha = "ETH_ADDR_FMT"; "
+                "arp.tpa = arp.spa; "
+                "arp.spa = "IP_FMT"; "
+                "outport = %s; "
+                "inport = \"\"; /* Allow sending out inport. */ "
+                "output;",
+                ETH_ADDR_ARGS(op->mac),
+                ETH_ADDR_ARGS(op->mac),
+                IP_ARGS(ip),
+                op->json_key);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                          match, actions);
+            free(match);
+            free(actions);
+        }
+
         /* Drop IP traffic to this router. */
         match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
@@ -2074,6 +2118,95 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         free(match);
     }
 
+    /* Ingress SNAT. This is for already established connections' reverse
+     * traffic. i.e., SNAT has already been done in egress pipeline and now
+     * the packet has entered the ingress pipeline as part of a reply.
+     * We undo the SNAT here. */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* Packets are allowed by default. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_PRE_SNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_SNAT, 0, "1", "next;");
+
+        /* SNAT rules are only valid on non-distributed routers. */
+        if (!smap_get(&od->nbr->options, "chassis")) {
+            continue;
+        }
+
+        /* All connections go through connection tracker to get
+         * their state. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_PRE_SNAT, 100,
+                      "ip", "ct_snat_next;");
+
+        /* Established and related connections are sent through nat(). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_SNAT, 100,
+                      "ct.est && ip", "ct_snat;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_SNAT, 100,
+                      "ct.rel && ip", "ct_snat;");
+    }
+
+    /* Ingress DNAT. Packets enter the pipeline with destination ip address
+     * that needs to be DNATted from a virtual ip address to a real
+     * ip address. */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* Packets are allowed by default. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_PRE_DNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+
+        /* DNAT rules are only valid on non-distributed routers. */
+        if (!smap_get(&od->nbr->options, "chassis")) {
+            continue;
+        }
+
+        /* All connections go through connection tracker to get
+         * their state. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_PRE_DNAT, 100,
+                      "ip", "ct_dnat_next;");
+
+        /* Established and related connections are sent through nat(). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+                      "ct.est && ip", "inport = \"\"; ct_dnat;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+                      "ct.rel && ip", "inport = \"\"; ct_dnat;");
+
+        struct smap_node *node;
+        SMAP_FOR_EACH(node, &od->nbr->dnat) {
+            char *match, *actions;
+            ovs_be32 ip, mask;
+
+            char *error = ip_parse_masked(node->key, &ip, &mask);
+            if (error || mask != OVS_BE32_MAX) {
+                static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad ip address %s for dnat", node->key);
+                continue;
+            }
+
+            error = ip_parse_masked(node->value, &ip, &mask);
+            if (error || mask != OVS_BE32_MAX) {
+                static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad ip address %s for dnat", node->value);
+                continue;
+            }
+
+            /* New connections are DNATed. */
+            match = xasprintf("ct.new && ip && ip4.dst == %s", node->key);
+            actions = xasprintf("inport = \"\"; ct_dnat(%s);", node->value);
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+                          match, actions);
+            free(match);
+            free(actions);
+        }
+    }
+
     /* Logical router ingress table 2: IP Routing.
      *
      * A packet that arrives at this table is an IP packet that should be
@@ -2274,7 +2407,64 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
     }
 
-    /* Logical router egress table 0: Delivery (priority 100).
+    /* Egress SNAT. Packets enter the pipeline with source ip address
+     * that needs to be SNATted to a virtual ip address. */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* Packets are allowed by default. */
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_PRE_SNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+
+        /* SNAT rules are only valid on non-distributed routers. */
+        if (!smap_get(&od->nbr->options, "chassis")) {
+            continue;
+        }
+
+        /* All connections go through connection tracker to get
+         * their state. */
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_PRE_SNAT, 100,
+                      "ip", "ct_snat_next;");
+
+        /* Established and related connections are sent through nat(). */
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
+                      "ct.est && ip", "ct_snat;");
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
+                      "ct.rel && ip", "ct_snat;");
+
+        struct smap_node *node;
+        SMAP_FOR_EACH(node, &od->nbr->snat) {
+            char *match, *actions;
+            ovs_be32 ip, mask;
+
+            char *error = ip_parse_masked(node->key, &ip, &mask);
+            if (error || !ip_is_cidr(mask)) {
+                static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad ip network %s for snat", node->key);
+                continue;
+            }
+
+            error = ip_parse_masked(node->value, &ip, &mask);
+            if (error || mask != OVS_BE32_MAX) {
+                static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad ip address %s for snat", node->value);
+                continue;
+            }
+            /* New connections are SNATed. */
+            match = xasprintf("ct.new && ip && ip4.src == %s", node->key);
+            actions = xasprintf("ct_snat(%s);", node->value);
+            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
+                          match, actions);
+            free(match);
+            free(actions);
+        }
+    }
+
+    /* Logical router egress table 2: Delivery (priority 100).
      *
      * Priority 100 rules deliver packets to enabled logical ports. */
     HMAP_FOR_EACH (op, key_node, ports) {
diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
index fa21b30..4d9c613 100644
--- a/ovn/ovn-nb.ovsschema
+++ b/ovn/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
 {
     "name": "OVN_Northbound",
-    "version": "2.1.2",
-    "cksum": "429668869 5325",
+    "version": "2.1.3",
+    "cksum": "400575131 5731",
     "tables": {
         "Logical_Switch": {
             "columns": {
@@ -78,6 +78,14 @@
                                    "max": "unlimited"}},
                 "default_gw": {"type": {"key": "string", "min": 0, "max": 1}},
                 "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
+                "dnat" : {
+                    "type": {"key": {"type": "string"},
+                             "value": {"type" : "string"},
+                             "min": 0, "max": "unlimited"}},
+                "snat" : {
+                    "type": {"key": {"type": "string"},
+                             "value": {"type" : "string"},
+                             "min": 0, "max": "unlimited"}},
                 "options": {
                      "type": {"key": "string",
                               "value": "string",
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index d239499..69c79e1 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -631,6 +631,20 @@
       router has all ingress and egress traffic dropped.
     </column>
 
+    <column name="dnat">
+      A map of externally visible IP address to their corresponding IP
+      addresses in the logical space.  The externally visible IP address
+      is DNATed to the IP address in the logical space.  DNAT only works
+      on routers that are non-distributed.
+    </column>
+
+    <column name="snat">
+      A map of IP network (e.g 192.168.1.0/24) to an IP address. Any IP packets
+      with their source IP address in the provided network (key) is SNATed
+      into the IP address in the value.  SNAT only works on routers that are
+      non-distributed.
+    </column>
+
     <group title="Options">
       <p>
         Additional options for the logical router.
-- 
1.7.9.5




More information about the dev mailing list