[ovs-dev] [IPv6 v2 07/10] ovn-northd: Implement basic IPv6 routing.

Justin Pettit jpettit at ovn.org
Fri Jul 29 06:26:17 UTC 2016


This commit only supports static MAC bindings.  A future commit will add
support for dynamic IPv6/MAC bindings.  It has a few other limitations
described in "ovn/TODO".

Signed-off-by: Justin Pettit <jpettit at ovn.org>
---
 ovn/TODO                    |  19 +-
 ovn/northd/ovn-northd.8.xml | 196 +++++++++++++++---
 ovn/northd/ovn-northd.c     | 476 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 536 insertions(+), 155 deletions(-)

diff --git a/ovn/TODO b/ovn/TODO
index fd15f5d..dab8648 100644
--- a/ovn/TODO
+++ b/ovn/TODO
@@ -40,11 +40,24 @@ the "arp" action, and an action for generating
 
 ** IPv6
 
-*** ND versus ARP
+*** Drop invalid source IPv6 addresses
 
-*** IPv6 routing
+*** Don't forward non-routable addresses
 
-*** ICMPv6
+*** ICMPv6 action
+
+Similar to the ICMPv4 action, ICMPv6 messages should be generated.
+
+*** Neighbor Discovery
+
+**** ND Router Advertisements
+
+The router ports should periodically send out ND Router Advertisements
+and respond to Router Solicitations.
+
+**** Learn MAC bindings on ND Solicitations
+
+**** Properly set RSO flags
 
 ** Dynamic IP to MAC bindings
 
diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index 85e0e66..6d9b6ca 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -412,10 +412,10 @@
       </li>
     </ul>
 
-    <h3>Ingress Table 9: ARP responder</h3>
+    <h3>Ingress Table 9: ARP/ND responder</h3>
 
     <p>
-      This table implements ARP responder for known IPs.  It contains these
+      This table implements ARP/ND responder for known IPs.  It contains these
       logical flows:
     </p>
 
@@ -427,8 +427,8 @@
 
       <li>
         <p>
-          Priority-50 flows that matches ARP requests to each known IP address
-          <var>A</var> of logical port <var>P</var>, and respond with ARP
+          Priority-50 flows that match ARP requests to each known IP address
+          <var>A</var> of every logical router port, and respond with ARP
           replies directly with corresponding Ethernet address <var>E</var>:
         </p>
 
@@ -440,7 +440,7 @@ arp.tha = arp.sha;
 arp.sha = <var>E</var>;
 arp.tpa = arp.spa;
 arp.spa = <var>A</var>;
-outport = <var>P</var>;
+outport = inport;
 inport = ""; /* Allow sending out inport. */
 output;
         </pre>
@@ -452,6 +452,33 @@ output;
       </li>
 
       <li>
+        <p>
+          Priority-50 flows that match IPv6 ND neighbor solicitations to
+          each known IP address <var>A</var> (and <var>A</var>'s
+          solicited node address) of every logical router port, and
+          respond with neighbor advertisements directly with
+          corresponding Ethernet address <var>E</var>:
+        </p>
+
+        <pre>
+nd_na {
+    eth.src = <var>E</var>;
+    ip6.src = <var>A</var>;
+    nd.target = <var>A</var>;
+    nd.tll = <var>E</var>;
+    outport = inport;
+    inport = ""; /* Allow sending out inport. */
+    output;
+};
+        </pre>
+
+        <p>
+          These flows are omitted for logical ports (other than router ports)
+          that are down.
+        </p>
+      </li>
+
+      <li>
         One priority-0 fallback flow that matches all packets and advances to
         the next table.
       </li>
@@ -712,7 +739,8 @@ output;
             network source or destination)
           </li>
           <li>
-            <code>ip4.src</code> is any IP address owned by the router.
+            <code>ip4.src</code> or <code>ip6.src</code> is any IP
+            address owned by the router.
           </li>
           <li>
             <code>ip4.src</code> is the broadcast address of any IP network
@@ -725,13 +753,19 @@ output;
         <p>
           ICMP echo reply.  These flows reply to ICMP echo requests received
           for the router's IP address.  Let <var>A</var> be an IP address
-          owned by a router port.  Then, for each <var>A</var>, a priority-90
-          flow matches on <code>ip4.dst == <var>A</var></code> and <code>
-          icmp4.type == 8 &amp;&amp; icmp4.code == 0</code> (ICMP echo
-          request).  The port of the router that receives the echo request
-          does not matter. Also, the ip.ttl of the echo request packet is not
-          checked, so it complies with RFC 1812, section 4.2.2.9. These flows
-          use the following actions:
+          owned by a router port.  Then, for each <var>A</var> that is
+          an IPv4 address, a priority-90 flow matches on
+          <code>ip4.dst == <var>A</var></code> and
+          <code>icmp4.type == 8 &amp;&amp; icmp4.code == 0</code>
+          (ICMP echo request).  For each <var>A</var> that is an IPv6
+          address, a priority-90 flow matches on
+          <code>ip6.dst == <var>A</var></code> and
+          <code>icmp6.type == 128 &amp;&amp; icmp6.code == 0</code>
+          (ICMPv6 echo request).The port of the router that receives the
+          echo request does not matter. Also, the ip.ttl of the echo
+          request packet is not checked, so it complies with RFC 1812,
+          section 4.2.2.9. Flows for ICMPv4 echo requests use the
+          following actions:
         </p>
 
         <pre>
@@ -741,6 +775,18 @@ icmp4.type = 0;
 inport = ""; /* Allow sending out inport. */
 next;
         </pre>
+
+        <p>
+          Flows for ICMPv6 echo requests use the following actions:
+        </p>
+
+        <pre>
+ip6.dst &lt;-&gt; ip6.src;
+ip.ttl = 255;
+icmp6.type = 129;
+inport = ""; /* Allow sending out inport. */
+next;
+        </pre>
       </li>
 
       <li>
@@ -805,6 +851,35 @@ output;
 
       <li>
         <p>
+          Reply to IPv6 Neighbor Solicitations.
+        </p>
+
+        <p>
+          These flows reply to Neighbor Solictation requests for the
+          router's own IPv6 address.  For each router port <var>P</var>
+          that owns IPv6 address <var>A</var>, solicited node address
+          <var>S</var>, and Ethernet address
+          <var>E</var>, a priority-90 flow matches <code>inport ==
+          <var>P</var> &amp;&amp; nd_ns &amp;&amp; ip6.dst == {<var>A</var>,
+          <var>E</var>} &amp;&amp; nd.target == <var>A</var></code>
+          with the following actions:
+        </p>
+
+        <pre>
+nd_na {
+    eth.src = <var>E</var>;
+    ip6.src = <var>A</var>;
+    nd.target = <var>A</var>;
+    nd.tll = <var>E</var>;
+    outport = inport;
+    inport = \"\"; /* Allow sending out inport. */
+    output;
+};
+        </pre>
+      </li>
+
+      <li>
+        <p>
           UDP port unreachable.  Priority-80 flows generate ICMP port
           unreachable messages in reply to UDP datagrams directed to the
           router's IP address.  The logical router doesn't accept any UDP
@@ -976,14 +1051,16 @@ icmp4 {
     <h3>Ingress Table 4: IP Routing</h3>
 
     <p>
-      A packet that arrives at this table is an IP packet that should be routed
-      to the address in <code>ip4.dst</code>.  This table implements IP
-      routing, setting <code>reg0</code> to the next-hop IP address (leaving
-      <code>ip4.dst</code>, the packet's final destination, unchanged) and
-      advances to the next table for ARP resolution.  It also sets
-      <code>reg1</code> to the IP address owned by the selected router port
-      (which is used later in table 6 as the IP source address for an ARP
-      request, if needed).
+      A packet that arrives at this table is an IP packet that should be
+      routed to the address in <code>ip4.dst</code> or
+      <code>ip6.dst</code>.  This table implements IP routing, setting
+      <code>reg0</code> (or <code>xxreg0</code> for IPv6) to the next-hop IP
+      address (leaving <code>ip4.dst</code> or <code>ip6.dst</code>, the
+      packet's final destination, unchanged) and advances to the next
+      table for ARP resolution.  It also sets <code>reg1</code> (or
+      <code>xxreg1</code> to the IP address owned by the selected router
+      port (which is used later in table 6 as the IP source address for
+      an ARP request, if needed).
     </p>
 
     <p>
@@ -993,7 +1070,7 @@ icmp4 {
     <ul>
       <li>
         <p>
-          Routing table.  For each route to IPv4 network <var>N</var> with
+          IPv4 routing table.  For each route to IPv4 network <var>N</var> with
           netmask <var>M</var>, on router port <var>P</var> with IP address
           <var>A</var> and Ethernet
           address <var>E</var>, a logical flow with match <code>ip4.dst ==
@@ -1023,6 +1100,39 @@ next;
         </p>
       </li>
 
+      <li>
+        <p>
+          IPv6 routing table.  For each route to IPv6 network
+          <var>N</var> with netmask <var>M</var>, on router port
+          <var>P</var> with IP address <var>A</var> and Ethernet address
+          <var>E</var>, a logical flow with match in CIDR notation
+          <code>ip6.dst == <var>N</var>/<var>M</var></code>,
+          whose priority is the integer value of <var>M</var>, has the
+          following actions:
+        </p>
+
+        <pre>
+ip.ttl--;
+xxreg0 = <var>G</var>;
+xxreg1 = <var>A</var>;
+eth.src = <var>E</var>;
+outport = <var>P</var>;
+inport = ""; /* Allow sending out inport. */
+next;
+        </pre>
+
+        <p>
+          (Ingress table 1 already verified that <code>ip.ttl--;</code> will
+          not yield a TTL exceeded error.)
+        </p>
+
+        <p>
+          If the route has a gateway, <var>G</var> is the gateway IP address.
+          Instead, if the route is from a configured static route, <var>G</var>
+          is the next hop IP address.  Else it is <code>ip6.dst</code>.
+        </p>
+      </li>
+
 <!-- ICMP errors are not yet supported.
       <li>
         <p>
@@ -1056,14 +1166,17 @@ icmp4 {
 -->
     </ul>
 
-    <h3>Ingress Table 5: ARP Resolution</h3>
+    <h3>Ingress Table 5: ARP/ND Resolution</h3>
 
     <p>
-      Any packet that reaches this table is an IP packet whose next-hop IP
-      address is in <code>reg0</code>.  (<code>ip4.dst</code> is the final
-      destination.)  This table resolves the IP address in <code>reg0</code>
-      into an output port in <code>outport</code> and an Ethernet address in
-      <code>eth.dst</code>, using the following flows:
+      Any packet that reaches this table is an IP packet whose next-hop
+      IPv4 address is in <code>reg0</code> or IPv6 address is in
+      <code>xxreg0</code>.  (<code>ip4.dst</code> or
+      <code>ip6.dst</code> contains the final destination.)  This table
+      resolves the IP address in <code>reg0</code> (or
+      <code>xxreg0</code>) into an output port in <code>outport</code>
+      and an Ethernet address in <code>eth.dst</code>, using the
+      following flows:
     </p>
 
     <ul>
@@ -1080,18 +1193,35 @@ icmp4 {
         </p>
 
         <p>
-          For each IP address <var>A</var> whose host is known to have Ethernet
-          address <var>E</var> on router port <var>P</var>, a priority-100 flow
-          with match <code>outport === <var>P</var> &amp;&amp; reg0 ==
+          For each IPv4 address <var>A</var> whose host is known to have
+          Ethernet address <var>E</var> on router port <var>P</var>, a
+          priority-100 flow with match <code>outport === <var>P</var>
+          &amp;&amp; reg0 == <var>A</var></code> has actions
+          <code>eth.dst = <var>E</var>; next;</code>.
+        </p>
+
+        <p>
+          For each IPv6 address <var>A</var> whose host is known to have
+          Ethernet address <var>E</var> on router port <var>P</var>, a
+          priority-100 flow with match <code>outport === <var>P</var>
+          &amp;&amp; xxreg0 == <var>A</var></code> has actions
+          <code>eth.dst = <var>E</var>; next;</code>.
+        </p>
+
+        <p>
+          For each logical router port with an IPv4 address <var>A</var> and
+          a mac address of <var>E</var> that is reachable via a different
+          logical router port <var>P</var>, a priority-100 flow with
+          match <code>outport === <var>P</var> &amp;&amp; reg0 ==
           <var>A</var></code> has actions <code>eth.dst = <var>E</var>;
           next;</code>.
         </p>
 
         <p>
-          For each logical router port with an IP address <var>A</var> and
+          For each logical router port with an IPv6 address <var>A</var> and
           a mac address of <var>E</var> that is reachable via a different
           logical router port <var>P</var>, a priority-100 flow with
-          match <code>outport === <var>P</var> &amp;&amp; reg0 ==
+          match <code>outport === <var>P</var> &amp;&amp; xxreg0 ==
           <var>A</var></code> has actions <code>eth.dst = <var>E</var>;
           next;</code>.
         </p>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index af13ec2..1fe73bf 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2478,35 +2478,34 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                               ds_cstr(&match), ds_cstr(&actions));
             }
 
-            if (op->lsp_addrs[i].n_ipv6_addrs > 0) {
+            /* For ND solicitations, we need to listen for both the
+             * unicast IPv6 address and its all-nodes multicast address,
+             * but always respond with the unicast IPv6 address. */
+            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
                 ds_clear(&match);
-                ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
-                if (op->lsp_addrs[i].n_ipv6_addrs == 1) {
-                    ds_put_format(&match, "nd.target == %s",
-                                  op->lsp_addrs[i].ipv6_addrs[0].addr_s);
-                } else {
-                    ds_put_format(&match, "nd.target == {");
-                    for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
-                        ds_put_format(&match, "%s, ",
-                                      op->lsp_addrs[i].ipv6_addrs[j].addr_s);
-                    }
-                    ds_chomp(&match, ' ');
-                    ds_chomp(&match, ',');
-                    ds_put_cstr(&match, "}");
-                }
+                ds_put_format(&match,
+                        "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
+                        op->lsp_addrs[i].ipv6_addrs[j].addr_s,
+                        op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
+                        op->lsp_addrs[i].ipv6_addrs[j].addr_s);
+
                 ds_clear(&actions);
                 ds_put_format(&actions,
-                    "nd_na { eth.src = %s; "
-                    "nd.tll = %s; "
-                    "outport = inport; "
-                    "inport = \"\"; /* Allow sending out inport. */ "
-                    "output; };",
-                    op->lsp_addrs[i].ea_s,
-                    op->lsp_addrs[i].ea_s);
-
+                        "nd_na { "
+                        "eth.src = %s; "
+                        "ip6.src = %s; "
+                        "nd.target = %s; "
+                        "nd.tll = %s; "
+                        "outport = inport; "
+                        "inport = \"\"; /* Allow sending out inport. */ "
+                        "output; "
+                        "};",
+                        op->lsp_addrs[i].ea_s,
+                        op->lsp_addrs[i].ipv6_addrs[j].addr_s,
+                        op->lsp_addrs[i].ipv6_addrs[j].addr_s,
+                        op->lsp_addrs[i].ea_s);
                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
                               ds_cstr(&match), ds_cstr(&actions));
-
             }
         }
     }
@@ -2723,23 +2722,49 @@ lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
 static const char *
 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
 {
-    ovs_be32 ip;
+    bool is_ipv4 = strchr(ip_s, '.') ? true : false;
 
-    if (!ip_parse(ip_s, &ip)) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
-        return NULL;
-    }
+    if (is_ipv4) {
+        ovs_be32 ip;
 
-    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
+        if (!ip_parse(ip_s, &ip)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
+            return NULL;
+        }
+
+        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+            const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
+
+            if (!((na->network ^ ip) & na->mask)) {
+                /* There should be only 1 interface that matches the
+                 * supplied IP.  Otherwise, it's a configuration error,
+                 * because subnets of a router's interfaces should NOT
+                 * overlap. */
+                return na->addr_s;
+            }
+        }
+    } else {
+        struct in6_addr ip6;
+
+        if (!ipv6_parse(ip_s, &ip6)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
+            return NULL;
+        }
 
-        if (!((na->network ^ ip) & na->mask)) {
-            /* There should be only 1 interface that matches the
-             * next hop.  Otherwise, it's a configuration error,
-             * because subnets of router's interfaces should NOT
-             * overlap. */
-            return na->addr_s;
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
+            struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
+            struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
+
+            if (ipv6_is_zero(&and_addr)) {
+                /* There should be only 1 interface that matches the
+                 * supplied IP.  Otherwise, it's a configuration error,
+                 * because subnets of a router's interfaces should NOT
+                 * overlap. */
+                return na->addr_s;
+            }
         }
     }
 
@@ -2751,21 +2776,26 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
           const char *lrp_addr_s, const char *network_s, int plen,
           const char *gateway)
 {
-    char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
+    bool is_ipv4 = strchr(network_s, '.') ? true : false;
+
+    char *match = xasprintf("ip%s.dst == %s/%d", is_ipv4 ? "4" : "6",
+                            network_s, plen);
 
     struct ds actions = DS_EMPTY_INITIALIZER;
-    ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
+    ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
+
     if (gateway) {
         ds_put_cstr(&actions, gateway);
     } else {
-        ds_put_cstr(&actions, "ip4.dst");
+        ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
     }
     ds_put_format(&actions, "; "
-                  "reg1 = %s; "
+                  "%sreg1 = %s; "
                   "eth.src = %s; "
                   "outport = %s; "
                   "inport = \"\"; /* Allow sending out inport. */ "
                   "next;",
+                  is_ipv4 ? "" : "xx",
                   lrp_addr_s,
                   op->lrp_networks.ea_s,
                   op->json_key);
@@ -2783,26 +2813,68 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
                         struct hmap *ports,
                         const struct nbrec_logical_router_static_route *route)
 {
-    ovs_be32 prefix, nexthop, mask;
+    ovs_be32 nexthop;
     const char *lrp_addr_s;
+    unsigned int plen;
+    bool is_ipv4;
 
-    /* Verify that next hop is an IP address with 32 bits mask. */
-    char *error = ip_parse_masked(route->nexthop, &nexthop, &mask);
-    if (error || mask != OVS_BE32_MAX) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
+    /* Verify that the next hop is an IP address with an all-ones mask. */
+    char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
+    if (!error) {
+        if (plen != 32) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
+            return;
+        }
+        is_ipv4 = true;
+    } else {
         free(error);
-        return;
+
+        struct in6_addr ip6;
+        char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
+        if (!error) {
+            if (plen != 128) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
+                return;
+            }
+            is_ipv4 = false;
+        } else {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
+            free(error);
+            return;
+        }
     }
 
-    /* Verify that ip prefix is a valid CIDR address. */
-    error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
-    if (error || !ip_is_cidr(mask)) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-        VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
-                     route->ip_prefix);
-        free(error);
-        return;
+    char *prefix_s;
+    if (is_ipv4) {
+        ovs_be32 prefix;
+        /* Verify that ip prefix is a valid IPv4 address. */
+        error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
+        if (error) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
+                         route->ip_prefix);
+            free(error);
+            return;
+        }
+        prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
+    } else {
+        /* Verify that ip prefix is a valid IPv6 address. */
+        struct in6_addr prefix;
+        error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
+        if (error) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
+                         route->ip_prefix);
+            free(error);
+            return;
+        }
+        struct in6_addr mask = ipv6_create_mask(plen);
+        struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
+        prefix_s = xmalloc(INET6_ADDRSTRLEN);
+        inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
     }
 
     /* Find the outgoing port. */
@@ -2813,7 +2885,7 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
             VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
                          route->output_port, route->ip_prefix);
-            return;
+            goto free_prefix_s;
         }
         lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
     } else {
@@ -2840,17 +2912,17 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
         VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
                      route->ip_prefix, route->nexthop);
-        return;
+        goto free_prefix_s;
     }
 
-    char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & mask));
-    add_route(lflows, out_port, lrp_addr_s, prefix_s,
-              ip_count_cidr_bits(mask), route->nexthop);
+    add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop);
+
+free_prefix_s:
     free(prefix_s);
 }
 
 static void
-op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
+op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
 {
     if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
         ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
@@ -2870,6 +2942,23 @@ op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
 }
 
 static void
+op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
+{
+    if (op->lrp_networks.n_ipv6_addrs == 1) {
+        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
+        return;
+    }
+
+    ds_put_cstr(ds, "{");
+    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
+    }
+    ds_chomp(ds, ' ');
+    ds_chomp(ds, ',');
+    ds_put_cstr(ds, "}");
+}
+
+static void
 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                     struct hmap *lflows)
 {
@@ -2953,39 +3042,43 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
     }
 
+    /* Logical router ingress table 1: IP Input for IPv4. */
     HMAP_FOR_EACH (op, key_node, ports) {
         if (!op->nbrp) {
             continue;
         }
 
-        /* L3 admission control: drop packets that originate from an IP address
-         * owned by the router or a broadcast address known to the router
-         * (priority 100). */
-        ds_clear(&match);
-        ds_put_cstr(&match, "ip4.src == ");
-        op_put_networks(&match, op, true);
-        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
-                      ds_cstr(&match), "drop;");
 
-        /* ICMP echo reply.  These flows reply to ICMP echo requests
-         * received for the router's IP address. Since packets only
-         * get here as part of the logical router datapath, the inport
-         * (i.e. the incoming locally attached net) does not matter.
-         * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
-        ds_clear(&match);
-        ds_put_cstr(&match, "ip4.dst == ");
-        op_put_networks(&match, op, false);
-        ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
-
-        ds_clear(&actions);
-        ds_put_format(&actions,
-            "ip4.dst <-> ip4.src; "
-            "ip.ttl = 255; "
-            "icmp4.type = 0; "
-            "inport = \"\"; /* Allow sending out inport. */ "
-            "next; ");
-        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
-                      ds_cstr(&match), ds_cstr(&actions));
+        if (op->lrp_networks.n_ipv4_addrs) {
+            /* L3 admission control: drop packets that originate from an
+             * IPv4 address owned by the router or a broadcast address
+             * known to the router (priority 100). */
+            ds_clear(&match);
+            ds_put_cstr(&match, "ip4.src == ");
+            op_put_v4_networks(&match, op, true);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+                          ds_cstr(&match), "drop;");
+
+            /* ICMP echo reply.  These flows reply to ICMP echo requests
+             * received for the router's IP address. Since packets only
+             * get here as part of the logical router datapath, the inport
+             * (i.e. the incoming locally attached net) does not matter.
+             * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+            ds_clear(&match);
+            ds_put_cstr(&match, "ip4.dst == ");
+            op_put_v4_networks(&match, op, false);
+            ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
+
+            ds_clear(&actions);
+            ds_put_format(&actions,
+                "ip4.dst <-> ip4.src; "
+                "ip.ttl = 255; "
+                "icmp4.type = 0; "
+                "inport = \"\"; /* Allow sending out inport. */ "
+                "next; ");
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                          ds_cstr(&match), ds_cstr(&actions));
+        }
 
         /* ARP reply.  These flows reply to ARP requests for the router's own
          * IP address. */
@@ -3096,6 +3189,78 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         free(snat_ips);
     }
 
+    /* Logical router ingress table 1: IP Input for IPv6. */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbrp) {
+            continue;
+        }
+
+        if (op->lrp_networks.n_ipv6_addrs) {
+            /* L3 admission control: drop packets that originate from an
+             * IPv6 address owned by the router (priority 100). */
+            ds_clear(&match);
+            ds_put_cstr(&match, "ip6.src == ");
+            op_put_v6_networks(&match, op);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+                          ds_cstr(&match), "drop;");
+
+            /* ICMPv6 echo reply.  These flows reply to echo requests
+             * received for the router's IP address. */
+            ds_clear(&match);
+            ds_put_cstr(&match, "ip6.dst == ");
+            op_put_v6_networks(&match, op);
+            ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
+
+            ds_clear(&actions);
+            ds_put_cstr(&actions,
+                        "ip6.dst <-> ip6.src; "
+                        "ip.ttl = 255; "
+                        "icmp6.type = 129; "
+                        "inport = \"\"; /* Allow sending out inport. */ "
+                        "next; ");
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                          ds_cstr(&match), ds_cstr(&actions));
+
+            /* Drop IPv6 traffic to this router. */
+            ds_clear(&match);
+            ds_put_cstr(&match, "ip6.dst == ");
+            op_put_v6_networks(&match, op);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
+                          ds_cstr(&match), "drop;");
+        }
+
+        /* ND reply.  These flows reply to ND solicitations for the
+         * router's own IP address. */
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            ds_clear(&match);
+            ds_put_format(&match,
+                    "inport == %s && nd_ns && ip6.dst == {%s, %s} "
+                    "&& nd.target == %s",
+                    op->json_key,
+                    op->lrp_networks.ipv6_addrs[i].addr_s,
+                    op->lrp_networks.ipv6_addrs[i].sn_addr_s,
+                    op->lrp_networks.ipv6_addrs[i].addr_s);
+
+            ds_clear(&actions);
+            ds_put_format(&actions,
+                          "nd_na { "
+                          "eth.src = %s; "
+                          "ip6.src = %s; "
+                          "nd.target = %s; "
+                          "nd.tll = %s; "
+                          "outport = inport; "
+                          "inport = \"\"; /* Allow sending out inport. */ "
+                          "output; "
+                          "};",
+                          op->lrp_networks.ea_s,
+                          op->lrp_networks.ipv6_addrs[i].addr_s,
+                          op->lrp_networks.ipv6_addrs[i].addr_s,
+                          op->lrp_networks.ea_s);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                          ds_cstr(&match), ds_cstr(&actions));
+        }
+    }
+
     /* NAT in Gateway routers. */
     HMAP_FOR_EACH (od, key_node, datapaths) {
         if (!od->nbr) {
@@ -3226,10 +3391,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
     /* Logical router ingress table 4: IP Routing.
      *
      * A packet that arrives at this table is an IP packet that should be
-     * routed to the address in ip4.dst. This table sets outport to the correct
-     * output port, eth.src to the output port's MAC address, and reg0 to the
-     * next-hop IP address (leaving ip4.dst, the packet’s final destination,
-     * unchanged), and advances to the next table for ARP resolution. */
+     * routed to the address in 'ip[46].dst'. This table sets outport to
+     * the correct output port, eth.src to the output port's MAC
+     * address, and '[xx]reg0' to the next-hop IP address (leaving
+     * 'ip[46].dst', the packet’s final destination, unchanged), and
+     * advances to the next table for ARP/ND resolution. */
     HMAP_FOR_EACH (op, key_node, ports) {
         if (!op->nbrp) {
             continue;
@@ -3240,14 +3406,20 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                       op->lrp_networks.ipv4_addrs[i].network_s,
                       op->lrp_networks.ipv4_addrs[i].plen, NULL);
         }
+
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
+                      op->lrp_networks.ipv6_addrs[i].network_s,
+                      op->lrp_networks.ipv6_addrs[i].plen, NULL);
+        }
     }
 
+    /* Convert the static routes to flows. */
     HMAP_FOR_EACH (od, key_node, datapaths) {
         if (!od->nbr) {
             continue;
         }
 
-        /* Convert the static routes to flows. */
         for (int i = 0; i < od->nbr->n_static_routes; i++) {
             const struct nbrec_logical_router_static_route *route;
 
@@ -3255,6 +3427,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             build_static_route_flow(lflows, od, ports, route);
         }
     }
+
     /* XXX destination unreachable */
 
     /* Local router ingress table 5: ARP Resolution.
@@ -3265,31 +3438,47 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
      * Ethernet address in eth.dst. */
     HMAP_FOR_EACH (op, key_node, ports) {
         if (op->nbrp) {
-            /* This is a logical router port. If next-hop IP address in 'reg0'
-             * matches ip address of this router port, then the packet is
-             * intended to eventually be sent to this logical port. Set the
-             * destination mac address using this port's mac address.
+            /* This is a logical router port. If next-hop IP address in
+             * '[xx]reg0' matches IP address of this router port, then
+             * the packet is intended to eventually be sent to this
+             * logical port. Set the destination mac address using this
+             * port's mac address.
              *
              * The packet is still in peer's logical pipeline. So the match
              * should be on peer's outport. */
-            if (op->peer && op->peer->nbrp) {
-                ds_clear(&match);
-                ds_put_format(&match, "outport == %s && reg0 == ",
-                              op->peer->json_key);
-                op_put_networks(&match, op, false);
+            if (op->peer && op->nbrp->peer) {
+                if (op->lrp_networks.n_ipv4_addrs) {
+                    ds_clear(&match);
+                    ds_put_format(&match, "outport == %s && reg0 == ",
+                                  op->peer->json_key);
+                    op_put_v4_networks(&match, op, false);
+
+                    ds_clear(&actions);
+                    ds_put_format(&actions, "eth.dst = %s; next;",
+                                  op->lrp_networks.ea_s);
+                    ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
+                                  100, ds_cstr(&match), ds_cstr(&actions));
+                }
 
-                ds_clear(&actions);
-                ds_put_format(&actions, "eth.dst = %s; next;",
-                              op->lrp_networks.ea_s);
-                ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
-                              100, ds_cstr(&match), ds_cstr(&actions));
+                if (op->lrp_networks.n_ipv6_addrs) {
+                    ds_clear(&match);
+                    ds_put_format(&match, "outport == %s && xxreg0 == ",
+                                  op->peer->json_key);
+                    op_put_v6_networks(&match, op);
+
+                    ds_clear(&actions);
+                    ds_put_format(&actions, "eth.dst = %s; next;",
+                                  op->lrp_networks.ea_s);
+                    ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
+                                  100, ds_cstr(&match), ds_cstr(&actions));
+                }
             }
         } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
             /* This is a logical switch port that backs a VM or a container.
              * Extract its addresses. For each of the address, go through all
              * the router ports attached to the switch (to which this port
              * connects) and if the address in question is reachable from the
-             * router port, add an ARP entry in that router's pipeline. */
+             * router port, add an ARP/ND entry in that router's pipeline. */
 
             for (size_t i = 0; i < op->n_lsp_addrs; i++) {
                 const char *ea_s = op->lsp_addrs[i].ea_s;
@@ -3326,6 +3515,40 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                                       ds_cstr(&match), ds_cstr(&actions));
                     }
                 }
+
+                for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+                    const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+                    for (size_t k = 0; k < op->od->n_router_ports; k++) {
+                        /* Get the Logical_Router_Port that the
+                         * Logical_Switch_Port is connected to, as
+                         * 'peer'. */
+                        const char *peer_name = smap_get(
+                            &op->od->router_ports[k]->nbsp->options,
+                            "router-port");
+                        if (!peer_name) {
+                            continue;
+                        }
+
+                        struct ovn_port *peer = ovn_port_find(ports, peer_name);
+                        if (!peer || !peer->nbrp) {
+                            continue;
+                        }
+
+                        if (!find_lrp_member_ip(peer, ip_s)) {
+                            continue;
+                        }
+
+                        ds_clear(&match);
+                        ds_put_format(&match, "outport == %s && xxreg0 == %s",
+                                      peer->json_key, ip_s);
+
+                        ds_clear(&actions);
+                        ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
+                        ovn_lflow_add(lflows, peer->od,
+                                      S_ROUTER_IN_ARP_RESOLVE, 100,
+                                      ds_cstr(&match), ds_cstr(&actions));
+                    }
+                }
             }
         } else if (!strcmp(op->nbsp->type, "router")) {
             /* This is a logical switch port that connects to a router. */
@@ -3361,16 +3584,31 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                    continue;
                 }
 
-                ds_clear(&match);
-                ds_put_format(&match, "outport == %s && reg0 == ",
-                              peer->json_key);
-                op_put_networks(&match, router_port, false);
+                if (router_port->lrp_networks.n_ipv4_addrs) {
+                    ds_clear(&match);
+                    ds_put_format(&match, "outport == %s && reg0 == ",
+                                  peer->json_key);
+                    op_put_v4_networks(&match, router_port, false);
+
+                    ds_clear(&actions);
+                    ds_put_format(&actions, "eth.dst = %s; next;",
+                                              router_port->lrp_networks.ea_s);
+                    ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
+                                  100, ds_cstr(&match), ds_cstr(&actions));
+                }
 
-                ds_clear(&actions);
-                ds_put_format(&actions, "eth.dst = %s; next;",
-                              router_port->lrp_networks.ea_s);
-                ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
-                              100, ds_cstr(&match), ds_cstr(&actions));
+                if (router_port->lrp_networks.n_ipv6_addrs) {
+                    ds_clear(&match);
+                    ds_put_format(&match, "outport == %s && xxreg0 == ",
+                                  peer->json_key);
+                    op_put_v6_networks(&match, router_port);
+
+                    ds_clear(&actions);
+                    ds_put_format(&actions, "eth.dst = %s; next;",
+                                  router_port->lrp_networks.ea_s);
+                    ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
+                                  100, ds_cstr(&match), ds_cstr(&actions));
+                }
             }
         }
     }
-- 
1.9.1





More information about the dev mailing list